{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 12304, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 4.273040591428128, "learning_rate": 5.405405405405406e-08, "loss": 0.8615, "step": 1 }, { "epoch": 0.0, "grad_norm": 4.14660204198603, "learning_rate": 1.0810810810810812e-07, "loss": 0.8351, "step": 2 }, { "epoch": 0.0, "grad_norm": 3.5136363685645717, "learning_rate": 1.6216216216216218e-07, "loss": 0.7338, "step": 3 }, { "epoch": 0.0, "grad_norm": 0.84314659649236, "learning_rate": 2.1621621621621625e-07, "loss": 0.3097, "step": 4 }, { "epoch": 0.0, "grad_norm": 4.3116383284825615, "learning_rate": 2.702702702702703e-07, "loss": 0.9433, "step": 5 }, { "epoch": 0.0, "grad_norm": 4.151887895391798, "learning_rate": 3.2432432432432436e-07, "loss": 0.8352, "step": 6 }, { "epoch": 0.0, "grad_norm": 4.359720301871594, "learning_rate": 3.7837837837837843e-07, "loss": 0.8869, "step": 7 }, { "epoch": 0.0, "grad_norm": 4.069900304039918, "learning_rate": 4.324324324324325e-07, "loss": 0.8074, "step": 8 }, { "epoch": 0.0, "grad_norm": 4.221531701629939, "learning_rate": 4.864864864864865e-07, "loss": 0.8024, "step": 9 }, { "epoch": 0.0, "grad_norm": 4.223144226951507, "learning_rate": 5.405405405405406e-07, "loss": 0.793, "step": 10 }, { "epoch": 0.0, "grad_norm": 4.3223820324853826, "learning_rate": 5.945945945945947e-07, "loss": 0.9239, "step": 11 }, { "epoch": 0.0, "grad_norm": 3.8854492475804197, "learning_rate": 6.486486486486487e-07, "loss": 0.7622, "step": 12 }, { "epoch": 0.0, "grad_norm": 3.5468659118607886, "learning_rate": 7.027027027027028e-07, "loss": 0.8072, "step": 13 }, { "epoch": 0.0, "grad_norm": 3.239336624444215, "learning_rate": 7.567567567567569e-07, "loss": 0.8146, "step": 14 }, { "epoch": 0.0, "grad_norm": 3.476905400596688, "learning_rate": 8.108108108108109e-07, "loss": 0.7157, "step": 15 }, { "epoch": 0.0, "grad_norm": 2.8515112912848704, "learning_rate": 8.64864864864865e-07, "loss": 0.6896, "step": 16 }, { "epoch": 0.0, "grad_norm": 3.363378808068876, "learning_rate": 9.189189189189191e-07, "loss": 0.7681, "step": 17 }, { "epoch": 0.0, "grad_norm": 3.646198456699034, "learning_rate": 9.72972972972973e-07, "loss": 0.8277, "step": 18 }, { "epoch": 0.0, "grad_norm": 2.958244139103593, "learning_rate": 1.027027027027027e-06, "loss": 0.707, "step": 19 }, { "epoch": 0.0, "grad_norm": 2.4628987030897243, "learning_rate": 1.0810810810810812e-06, "loss": 0.8058, "step": 20 }, { "epoch": 0.0, "grad_norm": 3.0814254976956748, "learning_rate": 1.1351351351351352e-06, "loss": 0.796, "step": 21 }, { "epoch": 0.0, "grad_norm": 2.134834005763383, "learning_rate": 1.1891891891891893e-06, "loss": 0.7277, "step": 22 }, { "epoch": 0.0, "grad_norm": 1.9432058635310248, "learning_rate": 1.2432432432432434e-06, "loss": 0.6734, "step": 23 }, { "epoch": 0.0, "grad_norm": 2.2219674117937425, "learning_rate": 1.2972972972972974e-06, "loss": 0.7524, "step": 24 }, { "epoch": 0.0, "grad_norm": 1.6618761393680126, "learning_rate": 1.3513513513513515e-06, "loss": 0.6931, "step": 25 }, { "epoch": 0.0, "grad_norm": 1.7996160383043525, "learning_rate": 1.4054054054054056e-06, "loss": 0.7208, "step": 26 }, { "epoch": 0.0, "grad_norm": 1.7728768994451718, "learning_rate": 1.4594594594594596e-06, "loss": 0.6902, "step": 27 }, { "epoch": 0.0, "grad_norm": 1.8967993526053564, "learning_rate": 1.5135135135135137e-06, "loss": 0.7391, "step": 28 }, { "epoch": 0.0, "grad_norm": 1.7443476331916143, "learning_rate": 1.5675675675675678e-06, "loss": 0.7357, "step": 29 }, { "epoch": 0.0, "grad_norm": 1.789052299834985, "learning_rate": 1.6216216216216219e-06, "loss": 0.6854, "step": 30 }, { "epoch": 0.0, "grad_norm": 1.584750261961582, "learning_rate": 1.675675675675676e-06, "loss": 0.6375, "step": 31 }, { "epoch": 0.0, "grad_norm": 1.789236566080917, "learning_rate": 1.72972972972973e-06, "loss": 0.7515, "step": 32 }, { "epoch": 0.0, "grad_norm": 1.5893620129109587, "learning_rate": 1.783783783783784e-06, "loss": 0.6829, "step": 33 }, { "epoch": 0.0, "grad_norm": 1.7943146599443252, "learning_rate": 1.8378378378378381e-06, "loss": 0.7003, "step": 34 }, { "epoch": 0.0, "grad_norm": 1.57691567184717, "learning_rate": 1.8918918918918922e-06, "loss": 0.6645, "step": 35 }, { "epoch": 0.0, "grad_norm": 1.5392384888398358, "learning_rate": 1.945945945945946e-06, "loss": 0.7122, "step": 36 }, { "epoch": 0.0, "grad_norm": 0.8831190046317889, "learning_rate": 2.0000000000000003e-06, "loss": 0.3036, "step": 37 }, { "epoch": 0.0, "grad_norm": 1.3157264544284732, "learning_rate": 2.054054054054054e-06, "loss": 0.6175, "step": 38 }, { "epoch": 0.0, "grad_norm": 1.432915059081968, "learning_rate": 2.1081081081081085e-06, "loss": 0.7022, "step": 39 }, { "epoch": 0.0, "grad_norm": 1.4613657485470233, "learning_rate": 2.1621621621621623e-06, "loss": 0.6661, "step": 40 }, { "epoch": 0.0, "grad_norm": 1.5547873171988644, "learning_rate": 2.2162162162162166e-06, "loss": 0.6233, "step": 41 }, { "epoch": 0.0, "grad_norm": 1.6877086684917586, "learning_rate": 2.2702702702702705e-06, "loss": 0.6675, "step": 42 }, { "epoch": 0.0, "grad_norm": 1.7184407982106682, "learning_rate": 2.3243243243243247e-06, "loss": 0.7379, "step": 43 }, { "epoch": 0.0, "grad_norm": 1.7731563332171092, "learning_rate": 2.3783783783783786e-06, "loss": 0.6577, "step": 44 }, { "epoch": 0.0, "grad_norm": 1.801920902456086, "learning_rate": 2.432432432432433e-06, "loss": 0.7479, "step": 45 }, { "epoch": 0.0, "grad_norm": 1.6573379957461731, "learning_rate": 2.4864864864864867e-06, "loss": 0.6584, "step": 46 }, { "epoch": 0.0, "grad_norm": 1.4084165272724114, "learning_rate": 2.540540540540541e-06, "loss": 0.6129, "step": 47 }, { "epoch": 0.0, "grad_norm": 1.6525233813389781, "learning_rate": 2.594594594594595e-06, "loss": 0.704, "step": 48 }, { "epoch": 0.0, "grad_norm": 1.3782860124442546, "learning_rate": 2.648648648648649e-06, "loss": 0.6154, "step": 49 }, { "epoch": 0.0, "grad_norm": 1.4337365267953683, "learning_rate": 2.702702702702703e-06, "loss": 0.6052, "step": 50 }, { "epoch": 0.0, "grad_norm": 1.2702372133594575, "learning_rate": 2.7567567567567573e-06, "loss": 0.5269, "step": 51 }, { "epoch": 0.0, "grad_norm": 1.2788406696387828, "learning_rate": 2.810810810810811e-06, "loss": 0.559, "step": 52 }, { "epoch": 0.0, "grad_norm": 1.5336812095086203, "learning_rate": 2.8648648648648654e-06, "loss": 0.688, "step": 53 }, { "epoch": 0.0, "grad_norm": 1.3528711974952585, "learning_rate": 2.9189189189189193e-06, "loss": 0.6331, "step": 54 }, { "epoch": 0.0, "grad_norm": 1.333332096964607, "learning_rate": 2.9729729729729736e-06, "loss": 0.6096, "step": 55 }, { "epoch": 0.0, "grad_norm": 1.4784282183701558, "learning_rate": 3.0270270270270274e-06, "loss": 0.6252, "step": 56 }, { "epoch": 0.0, "grad_norm": 1.4658057231898225, "learning_rate": 3.0810810810810817e-06, "loss": 0.6609, "step": 57 }, { "epoch": 0.0, "grad_norm": 1.4561817591061057, "learning_rate": 3.1351351351351356e-06, "loss": 0.6048, "step": 58 }, { "epoch": 0.0, "grad_norm": 1.355095612044604, "learning_rate": 3.1891891891891894e-06, "loss": 0.5902, "step": 59 }, { "epoch": 0.0, "grad_norm": 1.3067364183441053, "learning_rate": 3.2432432432432437e-06, "loss": 0.5688, "step": 60 }, { "epoch": 0.0, "grad_norm": 1.3751486378946132, "learning_rate": 3.2972972972972976e-06, "loss": 0.6159, "step": 61 }, { "epoch": 0.01, "grad_norm": 1.2862193884453008, "learning_rate": 3.351351351351352e-06, "loss": 0.593, "step": 62 }, { "epoch": 0.01, "grad_norm": 1.4789049778412198, "learning_rate": 3.4054054054054057e-06, "loss": 0.6427, "step": 63 }, { "epoch": 0.01, "grad_norm": 1.4546804071157595, "learning_rate": 3.45945945945946e-06, "loss": 0.632, "step": 64 }, { "epoch": 0.01, "grad_norm": 1.4395735150129738, "learning_rate": 3.513513513513514e-06, "loss": 0.6129, "step": 65 }, { "epoch": 0.01, "grad_norm": 1.4346828370676308, "learning_rate": 3.567567567567568e-06, "loss": 0.6517, "step": 66 }, { "epoch": 0.01, "grad_norm": 1.4377476761384835, "learning_rate": 3.621621621621622e-06, "loss": 0.6353, "step": 67 }, { "epoch": 0.01, "grad_norm": 0.8803533563697833, "learning_rate": 3.6756756756756763e-06, "loss": 0.3199, "step": 68 }, { "epoch": 0.01, "grad_norm": 1.3957121329522453, "learning_rate": 3.72972972972973e-06, "loss": 0.6205, "step": 69 }, { "epoch": 0.01, "grad_norm": 1.4125486289001528, "learning_rate": 3.7837837837837844e-06, "loss": 0.6586, "step": 70 }, { "epoch": 0.01, "grad_norm": 1.431189021990164, "learning_rate": 3.837837837837838e-06, "loss": 0.6508, "step": 71 }, { "epoch": 0.01, "grad_norm": 1.2515213521059998, "learning_rate": 3.891891891891892e-06, "loss": 0.5511, "step": 72 }, { "epoch": 0.01, "grad_norm": 1.4889319759544897, "learning_rate": 3.945945945945947e-06, "loss": 0.6802, "step": 73 }, { "epoch": 0.01, "grad_norm": 1.3850309722543301, "learning_rate": 4.000000000000001e-06, "loss": 0.5624, "step": 74 }, { "epoch": 0.01, "grad_norm": 1.5352306040196768, "learning_rate": 4.0540540540540545e-06, "loss": 0.6158, "step": 75 }, { "epoch": 0.01, "grad_norm": 1.306638038154721, "learning_rate": 4.108108108108108e-06, "loss": 0.5853, "step": 76 }, { "epoch": 0.01, "grad_norm": 1.4703554455302463, "learning_rate": 4.162162162162163e-06, "loss": 0.6308, "step": 77 }, { "epoch": 0.01, "grad_norm": 1.3157754223842182, "learning_rate": 4.216216216216217e-06, "loss": 0.6677, "step": 78 }, { "epoch": 0.01, "grad_norm": 1.2784647086503598, "learning_rate": 4.270270270270271e-06, "loss": 0.5917, "step": 79 }, { "epoch": 0.01, "grad_norm": 1.4109447882750359, "learning_rate": 4.324324324324325e-06, "loss": 0.635, "step": 80 }, { "epoch": 0.01, "grad_norm": 1.793412137191073, "learning_rate": 4.378378378378379e-06, "loss": 0.6547, "step": 81 }, { "epoch": 0.01, "grad_norm": 1.4467943130716234, "learning_rate": 4.432432432432433e-06, "loss": 0.6441, "step": 82 }, { "epoch": 0.01, "grad_norm": 1.5009537855152142, "learning_rate": 4.486486486486487e-06, "loss": 0.6814, "step": 83 }, { "epoch": 0.01, "grad_norm": 1.298628336993122, "learning_rate": 4.540540540540541e-06, "loss": 0.6049, "step": 84 }, { "epoch": 0.01, "grad_norm": 1.599083383122989, "learning_rate": 4.594594594594596e-06, "loss": 0.7032, "step": 85 }, { "epoch": 0.01, "grad_norm": 1.3744837239712366, "learning_rate": 4.6486486486486495e-06, "loss": 0.5911, "step": 86 }, { "epoch": 0.01, "grad_norm": 1.4225318479566327, "learning_rate": 4.702702702702703e-06, "loss": 0.5956, "step": 87 }, { "epoch": 0.01, "grad_norm": 1.3958885743685878, "learning_rate": 4.756756756756757e-06, "loss": 0.5795, "step": 88 }, { "epoch": 0.01, "grad_norm": 1.3249125605774177, "learning_rate": 4.810810810810811e-06, "loss": 0.5479, "step": 89 }, { "epoch": 0.01, "grad_norm": 1.3298464814569155, "learning_rate": 4.864864864864866e-06, "loss": 0.5754, "step": 90 }, { "epoch": 0.01, "grad_norm": 1.4683698015671058, "learning_rate": 4.91891891891892e-06, "loss": 0.5928, "step": 91 }, { "epoch": 0.01, "grad_norm": 1.3388444498030598, "learning_rate": 4.9729729729729735e-06, "loss": 0.6335, "step": 92 }, { "epoch": 0.01, "grad_norm": 1.3594399027949904, "learning_rate": 5.027027027027027e-06, "loss": 0.602, "step": 93 }, { "epoch": 0.01, "grad_norm": 0.8636527147025339, "learning_rate": 5.081081081081082e-06, "loss": 0.3149, "step": 94 }, { "epoch": 0.01, "grad_norm": 1.2168309642609405, "learning_rate": 5.135135135135135e-06, "loss": 0.5471, "step": 95 }, { "epoch": 0.01, "grad_norm": 1.2351839782392957, "learning_rate": 5.18918918918919e-06, "loss": 0.5424, "step": 96 }, { "epoch": 0.01, "grad_norm": 1.292604070171166, "learning_rate": 5.243243243243244e-06, "loss": 0.5823, "step": 97 }, { "epoch": 0.01, "grad_norm": 1.311816649798154, "learning_rate": 5.297297297297298e-06, "loss": 0.6077, "step": 98 }, { "epoch": 0.01, "grad_norm": 0.8143613433965735, "learning_rate": 5.351351351351351e-06, "loss": 0.3432, "step": 99 }, { "epoch": 0.01, "grad_norm": 1.3116387212090366, "learning_rate": 5.405405405405406e-06, "loss": 0.6164, "step": 100 }, { "epoch": 0.01, "grad_norm": 1.3535109082047654, "learning_rate": 5.45945945945946e-06, "loss": 0.6392, "step": 101 }, { "epoch": 0.01, "grad_norm": 1.3443165530744305, "learning_rate": 5.513513513513515e-06, "loss": 0.568, "step": 102 }, { "epoch": 0.01, "grad_norm": 1.414863407793237, "learning_rate": 5.567567567567568e-06, "loss": 0.6686, "step": 103 }, { "epoch": 0.01, "grad_norm": 1.3009721885803063, "learning_rate": 5.621621621621622e-06, "loss": 0.5804, "step": 104 }, { "epoch": 0.01, "grad_norm": 1.2774240010244946, "learning_rate": 5.675675675675676e-06, "loss": 0.6211, "step": 105 }, { "epoch": 0.01, "grad_norm": 0.8344894256527007, "learning_rate": 5.729729729729731e-06, "loss": 0.3314, "step": 106 }, { "epoch": 0.01, "grad_norm": 1.3879213164427744, "learning_rate": 5.783783783783784e-06, "loss": 0.656, "step": 107 }, { "epoch": 0.01, "grad_norm": 1.375044697622862, "learning_rate": 5.837837837837839e-06, "loss": 0.5651, "step": 108 }, { "epoch": 0.01, "grad_norm": 1.4877329295170625, "learning_rate": 5.8918918918918924e-06, "loss": 0.6075, "step": 109 }, { "epoch": 0.01, "grad_norm": 1.2983195899347304, "learning_rate": 5.945945945945947e-06, "loss": 0.5463, "step": 110 }, { "epoch": 0.01, "grad_norm": 1.2218725662034848, "learning_rate": 6e-06, "loss": 0.6036, "step": 111 }, { "epoch": 0.01, "grad_norm": 1.3436025991798024, "learning_rate": 6.054054054054055e-06, "loss": 0.6227, "step": 112 }, { "epoch": 0.01, "grad_norm": 1.2977100495834533, "learning_rate": 6.108108108108109e-06, "loss": 0.5586, "step": 113 }, { "epoch": 0.01, "grad_norm": 1.3507646576725383, "learning_rate": 6.162162162162163e-06, "loss": 0.5956, "step": 114 }, { "epoch": 0.01, "grad_norm": 1.399931221889171, "learning_rate": 6.2162162162162164e-06, "loss": 0.6347, "step": 115 }, { "epoch": 0.01, "grad_norm": 1.4071118149538326, "learning_rate": 6.270270270270271e-06, "loss": 0.6294, "step": 116 }, { "epoch": 0.01, "grad_norm": 1.5751588266470815, "learning_rate": 6.324324324324325e-06, "loss": 0.5842, "step": 117 }, { "epoch": 0.01, "grad_norm": 1.5903449377258876, "learning_rate": 6.378378378378379e-06, "loss": 0.6512, "step": 118 }, { "epoch": 0.01, "grad_norm": 1.6710624509177674, "learning_rate": 6.432432432432433e-06, "loss": 0.5931, "step": 119 }, { "epoch": 0.01, "grad_norm": 1.1494354077899656, "learning_rate": 6.486486486486487e-06, "loss": 0.5491, "step": 120 }, { "epoch": 0.01, "grad_norm": 1.3044888172319544, "learning_rate": 6.540540540540541e-06, "loss": 0.615, "step": 121 }, { "epoch": 0.01, "grad_norm": 5.268346144656469, "learning_rate": 6.594594594594595e-06, "loss": 0.5698, "step": 122 }, { "epoch": 0.01, "grad_norm": 1.3160275141042568, "learning_rate": 6.648648648648649e-06, "loss": 0.5061, "step": 123 }, { "epoch": 0.01, "grad_norm": 1.246078708770176, "learning_rate": 6.702702702702704e-06, "loss": 0.5999, "step": 124 }, { "epoch": 0.01, "grad_norm": 1.427653128474058, "learning_rate": 6.7567567567567575e-06, "loss": 0.6213, "step": 125 }, { "epoch": 0.01, "grad_norm": 1.303477151542448, "learning_rate": 6.810810810810811e-06, "loss": 0.6192, "step": 126 }, { "epoch": 0.01, "grad_norm": 1.3710171885376965, "learning_rate": 6.864864864864865e-06, "loss": 0.5732, "step": 127 }, { "epoch": 0.01, "grad_norm": 1.3631429826418746, "learning_rate": 6.91891891891892e-06, "loss": 0.5732, "step": 128 }, { "epoch": 0.01, "grad_norm": 1.4099354465501006, "learning_rate": 6.972972972972973e-06, "loss": 0.6389, "step": 129 }, { "epoch": 0.01, "grad_norm": 1.4281747107854714, "learning_rate": 7.027027027027028e-06, "loss": 0.6006, "step": 130 }, { "epoch": 0.01, "grad_norm": 1.4595708181113594, "learning_rate": 7.0810810810810815e-06, "loss": 0.6472, "step": 131 }, { "epoch": 0.01, "grad_norm": 1.489743891332324, "learning_rate": 7.135135135135136e-06, "loss": 0.6437, "step": 132 }, { "epoch": 0.01, "grad_norm": 1.2738442793830842, "learning_rate": 7.189189189189189e-06, "loss": 0.5813, "step": 133 }, { "epoch": 0.01, "grad_norm": 1.6295009674146617, "learning_rate": 7.243243243243244e-06, "loss": 0.7095, "step": 134 }, { "epoch": 0.01, "grad_norm": 1.5620836204569906, "learning_rate": 7.297297297297298e-06, "loss": 0.5914, "step": 135 }, { "epoch": 0.01, "grad_norm": 1.5060790781694255, "learning_rate": 7.3513513513513525e-06, "loss": 0.6339, "step": 136 }, { "epoch": 0.01, "grad_norm": 1.388913535992189, "learning_rate": 7.4054054054054055e-06, "loss": 0.6277, "step": 137 }, { "epoch": 0.01, "grad_norm": 1.4863516194848927, "learning_rate": 7.45945945945946e-06, "loss": 0.5646, "step": 138 }, { "epoch": 0.01, "grad_norm": 1.3445803127161098, "learning_rate": 7.513513513513514e-06, "loss": 0.6292, "step": 139 }, { "epoch": 0.01, "grad_norm": 1.3384584779109399, "learning_rate": 7.567567567567569e-06, "loss": 0.5435, "step": 140 }, { "epoch": 0.01, "grad_norm": 1.4926838153450348, "learning_rate": 7.621621621621622e-06, "loss": 0.6428, "step": 141 }, { "epoch": 0.01, "grad_norm": 1.3869777102360936, "learning_rate": 7.675675675675676e-06, "loss": 0.5966, "step": 142 }, { "epoch": 0.01, "grad_norm": 1.3409434328050722, "learning_rate": 7.72972972972973e-06, "loss": 0.6088, "step": 143 }, { "epoch": 0.01, "grad_norm": 1.2762128491212468, "learning_rate": 7.783783783783784e-06, "loss": 0.6029, "step": 144 }, { "epoch": 0.01, "grad_norm": 1.314550003433995, "learning_rate": 7.837837837837838e-06, "loss": 0.5568, "step": 145 }, { "epoch": 0.01, "grad_norm": 1.4580077217392413, "learning_rate": 7.891891891891894e-06, "loss": 0.6383, "step": 146 }, { "epoch": 0.01, "grad_norm": 1.460486772621834, "learning_rate": 7.945945945945946e-06, "loss": 0.6107, "step": 147 }, { "epoch": 0.01, "grad_norm": 1.3790848419120394, "learning_rate": 8.000000000000001e-06, "loss": 0.6367, "step": 148 }, { "epoch": 0.01, "grad_norm": 1.3986888028757734, "learning_rate": 8.054054054054055e-06, "loss": 0.5709, "step": 149 }, { "epoch": 0.01, "grad_norm": 1.5066361422827574, "learning_rate": 8.108108108108109e-06, "loss": 0.5565, "step": 150 }, { "epoch": 0.01, "grad_norm": 1.231260969143522, "learning_rate": 8.162162162162163e-06, "loss": 0.5974, "step": 151 }, { "epoch": 0.01, "grad_norm": 1.3662141363352507, "learning_rate": 8.216216216216217e-06, "loss": 0.5617, "step": 152 }, { "epoch": 0.01, "grad_norm": 1.379757692211869, "learning_rate": 8.27027027027027e-06, "loss": 0.6216, "step": 153 }, { "epoch": 0.01, "grad_norm": 1.4032029868039735, "learning_rate": 8.324324324324326e-06, "loss": 0.5659, "step": 154 }, { "epoch": 0.01, "grad_norm": 1.246102895886861, "learning_rate": 8.378378378378378e-06, "loss": 0.6069, "step": 155 }, { "epoch": 0.01, "grad_norm": 1.3129486277734963, "learning_rate": 8.432432432432434e-06, "loss": 0.5649, "step": 156 }, { "epoch": 0.01, "grad_norm": 1.5949807239779763, "learning_rate": 8.486486486486488e-06, "loss": 0.6402, "step": 157 }, { "epoch": 0.01, "grad_norm": 1.2367723511070507, "learning_rate": 8.540540540540542e-06, "loss": 0.5507, "step": 158 }, { "epoch": 0.01, "grad_norm": 1.3732449159415614, "learning_rate": 8.594594594594595e-06, "loss": 0.6039, "step": 159 }, { "epoch": 0.01, "grad_norm": 1.3522671789714542, "learning_rate": 8.64864864864865e-06, "loss": 0.583, "step": 160 }, { "epoch": 0.01, "grad_norm": 1.3857986224694943, "learning_rate": 8.702702702702703e-06, "loss": 0.6238, "step": 161 }, { "epoch": 0.01, "grad_norm": 1.6041881805697953, "learning_rate": 8.756756756756759e-06, "loss": 0.5787, "step": 162 }, { "epoch": 0.01, "grad_norm": 1.3844553479323647, "learning_rate": 8.810810810810811e-06, "loss": 0.6455, "step": 163 }, { "epoch": 0.01, "grad_norm": 1.372775499931938, "learning_rate": 8.864864864864866e-06, "loss": 0.5469, "step": 164 }, { "epoch": 0.01, "grad_norm": 1.3096092196718552, "learning_rate": 8.91891891891892e-06, "loss": 0.6066, "step": 165 }, { "epoch": 0.01, "grad_norm": 1.3873598772944997, "learning_rate": 8.972972972972974e-06, "loss": 0.6354, "step": 166 }, { "epoch": 0.01, "grad_norm": 1.4261461525288062, "learning_rate": 9.027027027027028e-06, "loss": 0.5119, "step": 167 }, { "epoch": 0.01, "grad_norm": 1.3805251800039318, "learning_rate": 9.081081081081082e-06, "loss": 0.5846, "step": 168 }, { "epoch": 0.01, "grad_norm": 0.8240477026882622, "learning_rate": 9.135135135135136e-06, "loss": 0.3422, "step": 169 }, { "epoch": 0.01, "grad_norm": 1.3817497767181797, "learning_rate": 9.189189189189191e-06, "loss": 0.5916, "step": 170 }, { "epoch": 0.01, "grad_norm": 1.4599858495694384, "learning_rate": 9.243243243243243e-06, "loss": 0.5474, "step": 171 }, { "epoch": 0.01, "grad_norm": 1.3796151016083495, "learning_rate": 9.297297297297299e-06, "loss": 0.5706, "step": 172 }, { "epoch": 0.01, "grad_norm": 1.409777572587561, "learning_rate": 9.351351351351353e-06, "loss": 0.6052, "step": 173 }, { "epoch": 0.01, "grad_norm": 1.4002947829604957, "learning_rate": 9.405405405405407e-06, "loss": 0.5698, "step": 174 }, { "epoch": 0.01, "grad_norm": 1.405204434110899, "learning_rate": 9.45945945945946e-06, "loss": 0.596, "step": 175 }, { "epoch": 0.01, "grad_norm": 1.462514585076768, "learning_rate": 9.513513513513514e-06, "loss": 0.62, "step": 176 }, { "epoch": 0.01, "grad_norm": 1.7499683079184325, "learning_rate": 9.567567567567568e-06, "loss": 0.5936, "step": 177 }, { "epoch": 0.01, "grad_norm": 1.4505838663306836, "learning_rate": 9.621621621621622e-06, "loss": 0.6323, "step": 178 }, { "epoch": 0.01, "grad_norm": 1.2193208841128076, "learning_rate": 9.675675675675676e-06, "loss": 0.5516, "step": 179 }, { "epoch": 0.01, "grad_norm": 1.5461709008460212, "learning_rate": 9.729729729729732e-06, "loss": 0.6565, "step": 180 }, { "epoch": 0.01, "grad_norm": 1.3783256914341504, "learning_rate": 9.783783783783785e-06, "loss": 0.6273, "step": 181 }, { "epoch": 0.01, "grad_norm": 1.3009129274113025, "learning_rate": 9.83783783783784e-06, "loss": 0.5709, "step": 182 }, { "epoch": 0.01, "grad_norm": 1.4331042802705694, "learning_rate": 9.891891891891893e-06, "loss": 0.6054, "step": 183 }, { "epoch": 0.01, "grad_norm": 1.6637836990959596, "learning_rate": 9.945945945945947e-06, "loss": 0.5736, "step": 184 }, { "epoch": 0.02, "grad_norm": 1.4268744416818444, "learning_rate": 1e-05, "loss": 0.5935, "step": 185 }, { "epoch": 0.02, "grad_norm": 1.325262107647321, "learning_rate": 1.0054054054054055e-05, "loss": 0.5971, "step": 186 }, { "epoch": 0.02, "grad_norm": 1.477582803403984, "learning_rate": 1.0108108108108109e-05, "loss": 0.6255, "step": 187 }, { "epoch": 0.02, "grad_norm": 1.3006442007786023, "learning_rate": 1.0162162162162164e-05, "loss": 0.5756, "step": 188 }, { "epoch": 0.02, "grad_norm": 1.3884979600616545, "learning_rate": 1.0216216216216216e-05, "loss": 0.6581, "step": 189 }, { "epoch": 0.02, "grad_norm": 1.2998097970689109, "learning_rate": 1.027027027027027e-05, "loss": 0.5781, "step": 190 }, { "epoch": 0.02, "grad_norm": 1.3950495411925943, "learning_rate": 1.0324324324324324e-05, "loss": 0.5968, "step": 191 }, { "epoch": 0.02, "grad_norm": 1.36884172166217, "learning_rate": 1.037837837837838e-05, "loss": 0.5746, "step": 192 }, { "epoch": 0.02, "grad_norm": 1.3242805937768347, "learning_rate": 1.0432432432432433e-05, "loss": 0.6482, "step": 193 }, { "epoch": 0.02, "grad_norm": 1.387103904758459, "learning_rate": 1.0486486486486487e-05, "loss": 0.5908, "step": 194 }, { "epoch": 0.02, "grad_norm": 1.3226413426613004, "learning_rate": 1.0540540540540541e-05, "loss": 0.5772, "step": 195 }, { "epoch": 0.02, "grad_norm": 1.4041121660539917, "learning_rate": 1.0594594594594597e-05, "loss": 0.5884, "step": 196 }, { "epoch": 0.02, "grad_norm": 1.4683717432308583, "learning_rate": 1.0648648648648649e-05, "loss": 0.6196, "step": 197 }, { "epoch": 0.02, "grad_norm": 1.3287374075081768, "learning_rate": 1.0702702702702703e-05, "loss": 0.5308, "step": 198 }, { "epoch": 0.02, "grad_norm": 1.31673514609701, "learning_rate": 1.0756756756756757e-05, "loss": 0.5923, "step": 199 }, { "epoch": 0.02, "grad_norm": 1.2248739732041203, "learning_rate": 1.0810810810810812e-05, "loss": 0.5463, "step": 200 }, { "epoch": 0.02, "grad_norm": 1.4582091435807332, "learning_rate": 1.0864864864864866e-05, "loss": 0.5624, "step": 201 }, { "epoch": 0.02, "grad_norm": 1.3974633462736783, "learning_rate": 1.091891891891892e-05, "loss": 0.5518, "step": 202 }, { "epoch": 0.02, "grad_norm": 1.3098255750598122, "learning_rate": 1.0972972972972974e-05, "loss": 0.5978, "step": 203 }, { "epoch": 0.02, "grad_norm": 1.4020937292739093, "learning_rate": 1.102702702702703e-05, "loss": 0.6133, "step": 204 }, { "epoch": 0.02, "grad_norm": 1.3006764141808833, "learning_rate": 1.1081081081081081e-05, "loss": 0.5437, "step": 205 }, { "epoch": 0.02, "grad_norm": 1.3121807130049612, "learning_rate": 1.1135135135135135e-05, "loss": 0.5422, "step": 206 }, { "epoch": 0.02, "grad_norm": 1.1807616133678631, "learning_rate": 1.1189189189189189e-05, "loss": 0.551, "step": 207 }, { "epoch": 0.02, "grad_norm": 1.411361256781282, "learning_rate": 1.1243243243243245e-05, "loss": 0.594, "step": 208 }, { "epoch": 0.02, "grad_norm": 1.4437358882210654, "learning_rate": 1.1297297297297298e-05, "loss": 0.6544, "step": 209 }, { "epoch": 0.02, "grad_norm": 1.359156252589416, "learning_rate": 1.1351351351351352e-05, "loss": 0.6272, "step": 210 }, { "epoch": 0.02, "grad_norm": 1.29652118501433, "learning_rate": 1.1405405405405404e-05, "loss": 0.6323, "step": 211 }, { "epoch": 0.02, "grad_norm": 1.3097621304296205, "learning_rate": 1.1459459459459462e-05, "loss": 0.6287, "step": 212 }, { "epoch": 0.02, "grad_norm": 1.2169668382664947, "learning_rate": 1.1513513513513514e-05, "loss": 0.5423, "step": 213 }, { "epoch": 0.02, "grad_norm": 1.4669873874010895, "learning_rate": 1.1567567567567568e-05, "loss": 0.6193, "step": 214 }, { "epoch": 0.02, "grad_norm": 1.2954066617571018, "learning_rate": 1.1621621621621622e-05, "loss": 0.5883, "step": 215 }, { "epoch": 0.02, "grad_norm": 1.160631713695103, "learning_rate": 1.1675675675675677e-05, "loss": 0.5929, "step": 216 }, { "epoch": 0.02, "grad_norm": 1.34874266847612, "learning_rate": 1.1729729729729731e-05, "loss": 0.6387, "step": 217 }, { "epoch": 0.02, "grad_norm": 1.439317708700643, "learning_rate": 1.1783783783783785e-05, "loss": 0.5649, "step": 218 }, { "epoch": 0.02, "grad_norm": 1.502437313426451, "learning_rate": 1.1837837837837837e-05, "loss": 0.6514, "step": 219 }, { "epoch": 0.02, "grad_norm": 1.3337609956026828, "learning_rate": 1.1891891891891894e-05, "loss": 0.5162, "step": 220 }, { "epoch": 0.02, "grad_norm": 1.2481859007998901, "learning_rate": 1.1945945945945946e-05, "loss": 0.6038, "step": 221 }, { "epoch": 0.02, "grad_norm": 1.3067393315538185, "learning_rate": 1.2e-05, "loss": 0.5858, "step": 222 }, { "epoch": 0.02, "grad_norm": 1.5757684424914125, "learning_rate": 1.2054054054054054e-05, "loss": 0.6355, "step": 223 }, { "epoch": 0.02, "grad_norm": 1.5277737917360357, "learning_rate": 1.210810810810811e-05, "loss": 0.6498, "step": 224 }, { "epoch": 0.02, "grad_norm": 1.3038105939520328, "learning_rate": 1.2162162162162164e-05, "loss": 0.5666, "step": 225 }, { "epoch": 0.02, "grad_norm": 1.3439900623746683, "learning_rate": 1.2216216216216217e-05, "loss": 0.6461, "step": 226 }, { "epoch": 0.02, "grad_norm": 1.4499492648511714, "learning_rate": 1.227027027027027e-05, "loss": 0.5793, "step": 227 }, { "epoch": 0.02, "grad_norm": 1.1895262665587139, "learning_rate": 1.2324324324324327e-05, "loss": 0.615, "step": 228 }, { "epoch": 0.02, "grad_norm": 1.4385484255224155, "learning_rate": 1.2378378378378379e-05, "loss": 0.5863, "step": 229 }, { "epoch": 0.02, "grad_norm": 1.2568226128615418, "learning_rate": 1.2432432432432433e-05, "loss": 0.5336, "step": 230 }, { "epoch": 0.02, "grad_norm": 1.4055308076396922, "learning_rate": 1.2486486486486487e-05, "loss": 0.5927, "step": 231 }, { "epoch": 0.02, "grad_norm": 1.3604519277780607, "learning_rate": 1.2540540540540542e-05, "loss": 0.6365, "step": 232 }, { "epoch": 0.02, "grad_norm": 1.3976912250958944, "learning_rate": 1.2594594594594596e-05, "loss": 0.6016, "step": 233 }, { "epoch": 0.02, "grad_norm": 1.3870555370772695, "learning_rate": 1.264864864864865e-05, "loss": 0.6367, "step": 234 }, { "epoch": 0.02, "grad_norm": 1.1506464171651247, "learning_rate": 1.2702702702702702e-05, "loss": 0.3983, "step": 235 }, { "epoch": 0.02, "grad_norm": 1.105610354688429, "learning_rate": 1.2756756756756758e-05, "loss": 0.4861, "step": 236 }, { "epoch": 0.02, "grad_norm": 1.2922528747586577, "learning_rate": 1.2810810810810812e-05, "loss": 0.5429, "step": 237 }, { "epoch": 0.02, "grad_norm": 1.4117421331128992, "learning_rate": 1.2864864864864865e-05, "loss": 0.7149, "step": 238 }, { "epoch": 0.02, "grad_norm": 1.1561171907937822, "learning_rate": 1.291891891891892e-05, "loss": 0.5846, "step": 239 }, { "epoch": 0.02, "grad_norm": 1.2823597420925605, "learning_rate": 1.2972972972972975e-05, "loss": 0.6354, "step": 240 }, { "epoch": 0.02, "grad_norm": 1.331199921173987, "learning_rate": 1.3027027027027029e-05, "loss": 0.6929, "step": 241 }, { "epoch": 0.02, "grad_norm": 1.3632982831874763, "learning_rate": 1.3081081081081083e-05, "loss": 0.6142, "step": 242 }, { "epoch": 0.02, "grad_norm": 1.30032739139169, "learning_rate": 1.3135135135135135e-05, "loss": 0.6053, "step": 243 }, { "epoch": 0.02, "grad_norm": 1.2669107246418034, "learning_rate": 1.318918918918919e-05, "loss": 0.6165, "step": 244 }, { "epoch": 0.02, "grad_norm": 1.4024543498429707, "learning_rate": 1.3243243243243244e-05, "loss": 0.5811, "step": 245 }, { "epoch": 0.02, "grad_norm": 1.2282938927207665, "learning_rate": 1.3297297297297298e-05, "loss": 0.5824, "step": 246 }, { "epoch": 0.02, "grad_norm": 1.2230011147853819, "learning_rate": 1.3351351351351352e-05, "loss": 0.5773, "step": 247 }, { "epoch": 0.02, "grad_norm": 1.4405143504114788, "learning_rate": 1.3405405405405407e-05, "loss": 0.5856, "step": 248 }, { "epoch": 0.02, "grad_norm": 1.407496997500501, "learning_rate": 1.3459459459459461e-05, "loss": 0.6758, "step": 249 }, { "epoch": 0.02, "grad_norm": 1.3481381140295823, "learning_rate": 1.3513513513513515e-05, "loss": 0.6091, "step": 250 }, { "epoch": 0.02, "grad_norm": 1.3772848449020698, "learning_rate": 1.3567567567567567e-05, "loss": 0.6378, "step": 251 }, { "epoch": 0.02, "grad_norm": 1.3399199799458354, "learning_rate": 1.3621621621621623e-05, "loss": 0.6966, "step": 252 }, { "epoch": 0.02, "grad_norm": 1.418137767899028, "learning_rate": 1.3675675675675677e-05, "loss": 0.6543, "step": 253 }, { "epoch": 0.02, "grad_norm": 1.3872559456216722, "learning_rate": 1.372972972972973e-05, "loss": 0.6297, "step": 254 }, { "epoch": 0.02, "grad_norm": 1.4057293135350568, "learning_rate": 1.3783783783783784e-05, "loss": 0.6444, "step": 255 }, { "epoch": 0.02, "grad_norm": 1.2832126600113147, "learning_rate": 1.383783783783784e-05, "loss": 0.5735, "step": 256 }, { "epoch": 0.02, "grad_norm": 1.1665636697135973, "learning_rate": 1.3891891891891894e-05, "loss": 0.606, "step": 257 }, { "epoch": 0.02, "grad_norm": 1.2594265086154244, "learning_rate": 1.3945945945945946e-05, "loss": 0.5554, "step": 258 }, { "epoch": 0.02, "grad_norm": 1.2758336481579695, "learning_rate": 1.4e-05, "loss": 0.5898, "step": 259 }, { "epoch": 0.02, "grad_norm": 1.300264913132061, "learning_rate": 1.4054054054054055e-05, "loss": 0.6402, "step": 260 }, { "epoch": 0.02, "grad_norm": 1.3315419378572004, "learning_rate": 1.410810810810811e-05, "loss": 0.548, "step": 261 }, { "epoch": 0.02, "grad_norm": 1.219427993604917, "learning_rate": 1.4162162162162163e-05, "loss": 0.5622, "step": 262 }, { "epoch": 0.02, "grad_norm": 1.1897819364789977, "learning_rate": 1.4216216216216217e-05, "loss": 0.6132, "step": 263 }, { "epoch": 0.02, "grad_norm": 1.333457147167597, "learning_rate": 1.4270270270270272e-05, "loss": 0.6269, "step": 264 }, { "epoch": 0.02, "grad_norm": 1.1467620301405113, "learning_rate": 1.4324324324324326e-05, "loss": 0.5014, "step": 265 }, { "epoch": 0.02, "grad_norm": 1.2858858581704378, "learning_rate": 1.4378378378378378e-05, "loss": 0.6116, "step": 266 }, { "epoch": 0.02, "grad_norm": 1.425258049531029, "learning_rate": 1.4432432432432432e-05, "loss": 0.5601, "step": 267 }, { "epoch": 0.02, "grad_norm": 1.1845124721882538, "learning_rate": 1.4486486486486488e-05, "loss": 0.5994, "step": 268 }, { "epoch": 0.02, "grad_norm": 1.2324371046729103, "learning_rate": 1.4540540540540542e-05, "loss": 0.5734, "step": 269 }, { "epoch": 0.02, "grad_norm": 1.2291795518358182, "learning_rate": 1.4594594594594596e-05, "loss": 0.5702, "step": 270 }, { "epoch": 0.02, "grad_norm": 1.2848681461436653, "learning_rate": 1.464864864864865e-05, "loss": 0.6426, "step": 271 }, { "epoch": 0.02, "grad_norm": 1.3483627165006886, "learning_rate": 1.4702702702702705e-05, "loss": 0.6642, "step": 272 }, { "epoch": 0.02, "grad_norm": 1.4277089296969299, "learning_rate": 1.4756756756756759e-05, "loss": 0.5671, "step": 273 }, { "epoch": 0.02, "grad_norm": 1.3093511771695634, "learning_rate": 1.4810810810810811e-05, "loss": 0.5995, "step": 274 }, { "epoch": 0.02, "grad_norm": 1.2312759429949, "learning_rate": 1.4864864864864865e-05, "loss": 0.6446, "step": 275 }, { "epoch": 0.02, "grad_norm": 1.398767031842918, "learning_rate": 1.491891891891892e-05, "loss": 0.6307, "step": 276 }, { "epoch": 0.02, "grad_norm": 1.2914932022710714, "learning_rate": 1.4972972972972974e-05, "loss": 0.5712, "step": 277 }, { "epoch": 0.02, "grad_norm": 1.2827552834296592, "learning_rate": 1.5027027027027028e-05, "loss": 0.6311, "step": 278 }, { "epoch": 0.02, "grad_norm": 1.17125498981903, "learning_rate": 1.5081081081081082e-05, "loss": 0.5277, "step": 279 }, { "epoch": 0.02, "grad_norm": 1.2532330404418561, "learning_rate": 1.5135135135135138e-05, "loss": 0.6122, "step": 280 }, { "epoch": 0.02, "grad_norm": 1.1771467390533292, "learning_rate": 1.5189189189189191e-05, "loss": 0.5551, "step": 281 }, { "epoch": 0.02, "grad_norm": 1.3178622763909202, "learning_rate": 1.5243243243243244e-05, "loss": 0.6077, "step": 282 }, { "epoch": 0.02, "grad_norm": 1.2923685907272977, "learning_rate": 1.5297297297297297e-05, "loss": 0.5549, "step": 283 }, { "epoch": 0.02, "grad_norm": 1.2778488850748242, "learning_rate": 1.5351351351351353e-05, "loss": 0.6552, "step": 284 }, { "epoch": 0.02, "grad_norm": 1.8710331713669297, "learning_rate": 1.540540540540541e-05, "loss": 0.6568, "step": 285 }, { "epoch": 0.02, "grad_norm": 1.288199139989406, "learning_rate": 1.545945945945946e-05, "loss": 0.5902, "step": 286 }, { "epoch": 0.02, "grad_norm": 1.452547686281634, "learning_rate": 1.5513513513513513e-05, "loss": 0.591, "step": 287 }, { "epoch": 0.02, "grad_norm": 1.2416649407225506, "learning_rate": 1.556756756756757e-05, "loss": 0.6087, "step": 288 }, { "epoch": 0.02, "grad_norm": 1.251564791733786, "learning_rate": 1.5621621621621624e-05, "loss": 0.6281, "step": 289 }, { "epoch": 0.02, "grad_norm": 1.2801045493742762, "learning_rate": 1.5675675675675676e-05, "loss": 0.5922, "step": 290 }, { "epoch": 0.02, "grad_norm": 1.2655122609805658, "learning_rate": 1.572972972972973e-05, "loss": 0.604, "step": 291 }, { "epoch": 0.02, "grad_norm": 1.1773901975659047, "learning_rate": 1.5783783783783787e-05, "loss": 0.5544, "step": 292 }, { "epoch": 0.02, "grad_norm": 1.1546196570757032, "learning_rate": 1.583783783783784e-05, "loss": 0.5362, "step": 293 }, { "epoch": 0.02, "grad_norm": 1.3076851584325124, "learning_rate": 1.589189189189189e-05, "loss": 0.5621, "step": 294 }, { "epoch": 0.02, "grad_norm": 1.2826279531334102, "learning_rate": 1.5945945945945947e-05, "loss": 0.5728, "step": 295 }, { "epoch": 0.02, "grad_norm": 1.282623769972093, "learning_rate": 1.6000000000000003e-05, "loss": 0.5627, "step": 296 }, { "epoch": 0.02, "grad_norm": 1.3408958446169106, "learning_rate": 1.6054054054054055e-05, "loss": 0.611, "step": 297 }, { "epoch": 0.02, "grad_norm": 1.2348334073578877, "learning_rate": 1.610810810810811e-05, "loss": 0.6068, "step": 298 }, { "epoch": 0.02, "grad_norm": 1.2975389815390468, "learning_rate": 1.6162162162162163e-05, "loss": 0.6597, "step": 299 }, { "epoch": 0.02, "grad_norm": 1.2096404489865318, "learning_rate": 1.6216216216216218e-05, "loss": 0.5774, "step": 300 }, { "epoch": 0.02, "grad_norm": 1.1813743688009573, "learning_rate": 1.6270270270270274e-05, "loss": 0.5426, "step": 301 }, { "epoch": 0.02, "grad_norm": 1.1681240565989068, "learning_rate": 1.6324324324324326e-05, "loss": 0.5872, "step": 302 }, { "epoch": 0.02, "grad_norm": 1.3077141773660117, "learning_rate": 1.6378378378378378e-05, "loss": 0.5684, "step": 303 }, { "epoch": 0.02, "grad_norm": 1.302528145958617, "learning_rate": 1.6432432432432434e-05, "loss": 0.6124, "step": 304 }, { "epoch": 0.02, "grad_norm": 1.0758705697342368, "learning_rate": 1.648648648648649e-05, "loss": 0.6116, "step": 305 }, { "epoch": 0.02, "grad_norm": 1.4131860721941674, "learning_rate": 1.654054054054054e-05, "loss": 0.609, "step": 306 }, { "epoch": 0.02, "grad_norm": 1.2851791287758079, "learning_rate": 1.6594594594594597e-05, "loss": 0.6528, "step": 307 }, { "epoch": 0.03, "grad_norm": 1.366338400597577, "learning_rate": 1.6648648648648652e-05, "loss": 0.6228, "step": 308 }, { "epoch": 0.03, "grad_norm": 1.497776907617428, "learning_rate": 1.6702702702702704e-05, "loss": 0.6945, "step": 309 }, { "epoch": 0.03, "grad_norm": 1.1602743781779306, "learning_rate": 1.6756756756756757e-05, "loss": 0.5984, "step": 310 }, { "epoch": 0.03, "grad_norm": 1.4018561667642373, "learning_rate": 1.6810810810810812e-05, "loss": 0.6094, "step": 311 }, { "epoch": 0.03, "grad_norm": 1.4040754275179608, "learning_rate": 1.6864864864864868e-05, "loss": 0.6365, "step": 312 }, { "epoch": 0.03, "grad_norm": 1.2110215218082543, "learning_rate": 1.691891891891892e-05, "loss": 0.5932, "step": 313 }, { "epoch": 0.03, "grad_norm": 1.2478904633212702, "learning_rate": 1.6972972972972975e-05, "loss": 0.5906, "step": 314 }, { "epoch": 0.03, "grad_norm": 1.219674052199782, "learning_rate": 1.7027027027027028e-05, "loss": 0.5978, "step": 315 }, { "epoch": 0.03, "grad_norm": 1.1334469527342586, "learning_rate": 1.7081081081081083e-05, "loss": 0.3721, "step": 316 }, { "epoch": 0.03, "grad_norm": 1.2986498696320505, "learning_rate": 1.7135135135135135e-05, "loss": 0.6508, "step": 317 }, { "epoch": 0.03, "grad_norm": 1.432617075086687, "learning_rate": 1.718918918918919e-05, "loss": 0.6134, "step": 318 }, { "epoch": 0.03, "grad_norm": 1.3291322605886884, "learning_rate": 1.7243243243243243e-05, "loss": 0.5531, "step": 319 }, { "epoch": 0.03, "grad_norm": 1.250068899737447, "learning_rate": 1.72972972972973e-05, "loss": 0.6449, "step": 320 }, { "epoch": 0.03, "grad_norm": 1.2273279396721362, "learning_rate": 1.7351351351351354e-05, "loss": 0.63, "step": 321 }, { "epoch": 0.03, "grad_norm": 1.288258537341939, "learning_rate": 1.7405405405405406e-05, "loss": 0.614, "step": 322 }, { "epoch": 0.03, "grad_norm": 1.207319610961464, "learning_rate": 1.745945945945946e-05, "loss": 0.6219, "step": 323 }, { "epoch": 0.03, "grad_norm": 1.1602660996710994, "learning_rate": 1.7513513513513517e-05, "loss": 0.5795, "step": 324 }, { "epoch": 0.03, "grad_norm": 1.3070055487401167, "learning_rate": 1.756756756756757e-05, "loss": 0.7466, "step": 325 }, { "epoch": 0.03, "grad_norm": 1.1048493267116712, "learning_rate": 1.7621621621621622e-05, "loss": 0.5529, "step": 326 }, { "epoch": 0.03, "grad_norm": 1.2579563285152189, "learning_rate": 1.7675675675675677e-05, "loss": 0.6631, "step": 327 }, { "epoch": 0.03, "grad_norm": 1.3235404147237428, "learning_rate": 1.7729729729729733e-05, "loss": 0.6216, "step": 328 }, { "epoch": 0.03, "grad_norm": 1.1827495846841425, "learning_rate": 1.7783783783783785e-05, "loss": 0.6543, "step": 329 }, { "epoch": 0.03, "grad_norm": 1.2601381738043989, "learning_rate": 1.783783783783784e-05, "loss": 0.6269, "step": 330 }, { "epoch": 0.03, "grad_norm": 1.2430301530941215, "learning_rate": 1.7891891891891893e-05, "loss": 0.5677, "step": 331 }, { "epoch": 0.03, "grad_norm": 1.2692760414700273, "learning_rate": 1.7945945945945948e-05, "loss": 0.6412, "step": 332 }, { "epoch": 0.03, "grad_norm": 1.2001150074501838, "learning_rate": 1.8e-05, "loss": 0.5676, "step": 333 }, { "epoch": 0.03, "grad_norm": 1.3075487794158436, "learning_rate": 1.8054054054054056e-05, "loss": 0.6472, "step": 334 }, { "epoch": 0.03, "grad_norm": 1.456133317177465, "learning_rate": 1.8108108108108108e-05, "loss": 0.6338, "step": 335 }, { "epoch": 0.03, "grad_norm": 1.1939888671400902, "learning_rate": 1.8162162162162164e-05, "loss": 0.6184, "step": 336 }, { "epoch": 0.03, "grad_norm": 1.1981881909785101, "learning_rate": 1.821621621621622e-05, "loss": 0.5323, "step": 337 }, { "epoch": 0.03, "grad_norm": 1.20091981296182, "learning_rate": 1.827027027027027e-05, "loss": 0.618, "step": 338 }, { "epoch": 0.03, "grad_norm": 1.41989949023168, "learning_rate": 1.8324324324324324e-05, "loss": 0.6336, "step": 339 }, { "epoch": 0.03, "grad_norm": 1.3139662867417505, "learning_rate": 1.8378378378378383e-05, "loss": 0.5965, "step": 340 }, { "epoch": 0.03, "grad_norm": 1.333158999654752, "learning_rate": 1.8432432432432435e-05, "loss": 0.6264, "step": 341 }, { "epoch": 0.03, "grad_norm": 1.4531220569740995, "learning_rate": 1.8486486486486487e-05, "loss": 0.5439, "step": 342 }, { "epoch": 0.03, "grad_norm": 1.3131403063055023, "learning_rate": 1.8540540540540542e-05, "loss": 0.6107, "step": 343 }, { "epoch": 0.03, "grad_norm": 1.3701905301886614, "learning_rate": 1.8594594594594598e-05, "loss": 0.6123, "step": 344 }, { "epoch": 0.03, "grad_norm": 1.3653791912501723, "learning_rate": 1.864864864864865e-05, "loss": 0.6613, "step": 345 }, { "epoch": 0.03, "grad_norm": 1.1142972006714638, "learning_rate": 1.8702702702702706e-05, "loss": 0.5526, "step": 346 }, { "epoch": 0.03, "grad_norm": 1.4299690140309247, "learning_rate": 1.8756756756756758e-05, "loss": 0.6966, "step": 347 }, { "epoch": 0.03, "grad_norm": 1.2593444176121449, "learning_rate": 1.8810810810810813e-05, "loss": 0.5774, "step": 348 }, { "epoch": 0.03, "grad_norm": 1.1765124465821895, "learning_rate": 1.8864864864864866e-05, "loss": 0.6137, "step": 349 }, { "epoch": 0.03, "grad_norm": 1.3065008361798192, "learning_rate": 1.891891891891892e-05, "loss": 0.5976, "step": 350 }, { "epoch": 0.03, "grad_norm": 1.4087498976721118, "learning_rate": 1.8972972972972973e-05, "loss": 0.6289, "step": 351 }, { "epoch": 0.03, "grad_norm": 1.2341841476607793, "learning_rate": 1.902702702702703e-05, "loss": 0.6139, "step": 352 }, { "epoch": 0.03, "grad_norm": 1.328079281513009, "learning_rate": 1.9081081081081084e-05, "loss": 0.6233, "step": 353 }, { "epoch": 0.03, "grad_norm": 1.2477408998174193, "learning_rate": 1.9135135135135137e-05, "loss": 0.6108, "step": 354 }, { "epoch": 0.03, "grad_norm": 1.2816918475775614, "learning_rate": 1.918918918918919e-05, "loss": 0.6245, "step": 355 }, { "epoch": 0.03, "grad_norm": 1.2016024948487902, "learning_rate": 1.9243243243243244e-05, "loss": 0.5771, "step": 356 }, { "epoch": 0.03, "grad_norm": 1.217828445501427, "learning_rate": 1.92972972972973e-05, "loss": 0.578, "step": 357 }, { "epoch": 0.03, "grad_norm": 1.9731305095764395, "learning_rate": 1.9351351351351352e-05, "loss": 0.4656, "step": 358 }, { "epoch": 0.03, "grad_norm": 1.3567485383775508, "learning_rate": 1.9405405405405408e-05, "loss": 0.6423, "step": 359 }, { "epoch": 0.03, "grad_norm": 1.5361091360932055, "learning_rate": 1.9459459459459463e-05, "loss": 0.6508, "step": 360 }, { "epoch": 0.03, "grad_norm": 1.1645320502210965, "learning_rate": 1.9513513513513515e-05, "loss": 0.5934, "step": 361 }, { "epoch": 0.03, "grad_norm": 1.0958112327792273, "learning_rate": 1.956756756756757e-05, "loss": 0.575, "step": 362 }, { "epoch": 0.03, "grad_norm": 1.3740750676813598, "learning_rate": 1.9621621621621623e-05, "loss": 0.6114, "step": 363 }, { "epoch": 0.03, "grad_norm": 1.262910997447116, "learning_rate": 1.967567567567568e-05, "loss": 0.6542, "step": 364 }, { "epoch": 0.03, "grad_norm": 1.3570416080679524, "learning_rate": 1.972972972972973e-05, "loss": 0.6802, "step": 365 }, { "epoch": 0.03, "grad_norm": 1.1127799363489526, "learning_rate": 1.9783783783783786e-05, "loss": 0.5859, "step": 366 }, { "epoch": 0.03, "grad_norm": 1.2463121588632178, "learning_rate": 1.983783783783784e-05, "loss": 0.6215, "step": 367 }, { "epoch": 0.03, "grad_norm": 1.2379252903825178, "learning_rate": 1.9891891891891894e-05, "loss": 0.6024, "step": 368 }, { "epoch": 0.03, "grad_norm": 1.1412956122460987, "learning_rate": 1.994594594594595e-05, "loss": 0.5442, "step": 369 }, { "epoch": 0.03, "grad_norm": 1.1359940245417317, "learning_rate": 2e-05, "loss": 0.6442, "step": 370 }, { "epoch": 0.03, "grad_norm": 1.1800301173417773, "learning_rate": 1.9999999653504437e-05, "loss": 0.6515, "step": 371 }, { "epoch": 0.03, "grad_norm": 1.2781092218781143, "learning_rate": 1.9999998614017768e-05, "loss": 0.6085, "step": 372 }, { "epoch": 0.03, "grad_norm": 1.172788213355845, "learning_rate": 1.999999688154006e-05, "loss": 0.5846, "step": 373 }, { "epoch": 0.03, "grad_norm": 1.2016265117946634, "learning_rate": 1.999999445607144e-05, "loss": 0.6501, "step": 374 }, { "epoch": 0.03, "grad_norm": 1.1201465902325103, "learning_rate": 1.9999991337612076e-05, "loss": 0.6075, "step": 375 }, { "epoch": 0.03, "grad_norm": 1.27326710987819, "learning_rate": 1.9999987526162182e-05, "loss": 0.7154, "step": 376 }, { "epoch": 0.03, "grad_norm": 1.2675834152794216, "learning_rate": 1.9999983021722023e-05, "loss": 0.6373, "step": 377 }, { "epoch": 0.03, "grad_norm": 1.3527705851821266, "learning_rate": 1.999997782429191e-05, "loss": 0.6465, "step": 378 }, { "epoch": 0.03, "grad_norm": 1.258748188700293, "learning_rate": 1.99999719338722e-05, "loss": 0.6348, "step": 379 }, { "epoch": 0.03, "grad_norm": 1.213322715499661, "learning_rate": 1.999996535046331e-05, "loss": 0.6418, "step": 380 }, { "epoch": 0.03, "grad_norm": 1.3321652569073361, "learning_rate": 1.999995807406569e-05, "loss": 0.722, "step": 381 }, { "epoch": 0.03, "grad_norm": 1.245916973083892, "learning_rate": 1.9999950104679847e-05, "loss": 0.6258, "step": 382 }, { "epoch": 0.03, "grad_norm": 1.1799491737624703, "learning_rate": 1.9999941442306328e-05, "loss": 0.6077, "step": 383 }, { "epoch": 0.03, "grad_norm": 1.187391126708189, "learning_rate": 1.9999932086945735e-05, "loss": 0.641, "step": 384 }, { "epoch": 0.03, "grad_norm": 1.2541712864332293, "learning_rate": 1.9999922038598724e-05, "loss": 0.5921, "step": 385 }, { "epoch": 0.03, "grad_norm": 1.316135226338434, "learning_rate": 1.9999911297265987e-05, "loss": 0.5714, "step": 386 }, { "epoch": 0.03, "grad_norm": 1.2810307718824059, "learning_rate": 1.999989986294826e-05, "loss": 0.612, "step": 387 }, { "epoch": 0.03, "grad_norm": 1.2079749802503408, "learning_rate": 1.999988773564635e-05, "loss": 0.6461, "step": 388 }, { "epoch": 0.03, "grad_norm": 1.221100438003557, "learning_rate": 1.9999874915361083e-05, "loss": 0.5395, "step": 389 }, { "epoch": 0.03, "grad_norm": 1.2321815633471245, "learning_rate": 1.999986140209336e-05, "loss": 0.626, "step": 390 }, { "epoch": 0.03, "grad_norm": 1.2602679527635106, "learning_rate": 1.9999847195844104e-05, "loss": 0.6293, "step": 391 }, { "epoch": 0.03, "grad_norm": 1.1795010152484604, "learning_rate": 1.999983229661431e-05, "loss": 0.5701, "step": 392 }, { "epoch": 0.03, "grad_norm": 1.2089494729842096, "learning_rate": 1.999981670440501e-05, "loss": 0.6006, "step": 393 }, { "epoch": 0.03, "grad_norm": 1.3261960086679885, "learning_rate": 1.9999800419217285e-05, "loss": 0.5857, "step": 394 }, { "epoch": 0.03, "grad_norm": 1.258321316496881, "learning_rate": 1.999978344105226e-05, "loss": 0.6662, "step": 395 }, { "epoch": 0.03, "grad_norm": 1.1705754710200273, "learning_rate": 1.9999765769911108e-05, "loss": 0.647, "step": 396 }, { "epoch": 0.03, "grad_norm": 1.1891364177409702, "learning_rate": 1.9999747405795057e-05, "loss": 0.5929, "step": 397 }, { "epoch": 0.03, "grad_norm": 1.2102667886877647, "learning_rate": 1.9999728348705386e-05, "loss": 0.6013, "step": 398 }, { "epoch": 0.03, "grad_norm": 1.2641137026960239, "learning_rate": 1.9999708598643405e-05, "loss": 0.6163, "step": 399 }, { "epoch": 0.03, "grad_norm": 1.2937634579523318, "learning_rate": 1.999968815561049e-05, "loss": 0.6061, "step": 400 }, { "epoch": 0.03, "grad_norm": 1.3553502126852537, "learning_rate": 1.9999667019608058e-05, "loss": 0.6649, "step": 401 }, { "epoch": 0.03, "grad_norm": 1.1871908937053395, "learning_rate": 1.999964519063757e-05, "loss": 0.5435, "step": 402 }, { "epoch": 0.03, "grad_norm": 1.2660823944720954, "learning_rate": 1.999962266870054e-05, "loss": 0.6907, "step": 403 }, { "epoch": 0.03, "grad_norm": 1.2083772436641733, "learning_rate": 1.9999599453798523e-05, "loss": 0.6537, "step": 404 }, { "epoch": 0.03, "grad_norm": 1.1642945571775338, "learning_rate": 1.999957554593314e-05, "loss": 0.6076, "step": 405 }, { "epoch": 0.03, "grad_norm": 1.2599482071313974, "learning_rate": 1.9999550945106038e-05, "loss": 0.6471, "step": 406 }, { "epoch": 0.03, "grad_norm": 1.3562495151348162, "learning_rate": 1.999952565131893e-05, "loss": 0.7162, "step": 407 }, { "epoch": 0.03, "grad_norm": 1.0991869227445243, "learning_rate": 1.999949966457356e-05, "loss": 0.6373, "step": 408 }, { "epoch": 0.03, "grad_norm": 1.4761791840477894, "learning_rate": 1.9999472984871734e-05, "loss": 0.681, "step": 409 }, { "epoch": 0.03, "grad_norm": 1.2778912175636392, "learning_rate": 1.99994456122153e-05, "loss": 0.6335, "step": 410 }, { "epoch": 0.03, "grad_norm": 1.3259903218416293, "learning_rate": 1.9999417546606153e-05, "loss": 0.6059, "step": 411 }, { "epoch": 0.03, "grad_norm": 1.30223892918819, "learning_rate": 1.9999388788046238e-05, "loss": 0.6614, "step": 412 }, { "epoch": 0.03, "grad_norm": 1.2694327949012507, "learning_rate": 1.9999359336537552e-05, "loss": 0.5949, "step": 413 }, { "epoch": 0.03, "grad_norm": 1.2617795135910619, "learning_rate": 1.9999329192082132e-05, "loss": 0.6396, "step": 414 }, { "epoch": 0.03, "grad_norm": 1.2451258095266837, "learning_rate": 1.999929835468207e-05, "loss": 0.6391, "step": 415 }, { "epoch": 0.03, "grad_norm": 1.276674414464419, "learning_rate": 1.9999266824339502e-05, "loss": 0.7109, "step": 416 }, { "epoch": 0.03, "grad_norm": 1.3589387839394185, "learning_rate": 1.999923460105661e-05, "loss": 0.6363, "step": 417 }, { "epoch": 0.03, "grad_norm": 1.2702756200783802, "learning_rate": 1.999920168483563e-05, "loss": 0.6659, "step": 418 }, { "epoch": 0.03, "grad_norm": 1.2708618223956263, "learning_rate": 1.9999168075678842e-05, "loss": 0.6351, "step": 419 }, { "epoch": 0.03, "grad_norm": 1.2391455379672414, "learning_rate": 1.999913377358858e-05, "loss": 0.6541, "step": 420 }, { "epoch": 0.03, "grad_norm": 1.1599886274827143, "learning_rate": 1.999909877856721e-05, "loss": 0.665, "step": 421 }, { "epoch": 0.03, "grad_norm": 1.288184833339696, "learning_rate": 1.999906309061717e-05, "loss": 0.6596, "step": 422 }, { "epoch": 0.03, "grad_norm": 1.2045342383158713, "learning_rate": 1.999902670974092e-05, "loss": 0.6208, "step": 423 }, { "epoch": 0.03, "grad_norm": 1.1790516976577494, "learning_rate": 1.9998989635940996e-05, "loss": 0.6316, "step": 424 }, { "epoch": 0.03, "grad_norm": 1.1881008635767092, "learning_rate": 1.9998951869219954e-05, "loss": 0.6216, "step": 425 }, { "epoch": 0.03, "grad_norm": 1.2987033612753498, "learning_rate": 1.9998913409580418e-05, "loss": 0.641, "step": 426 }, { "epoch": 0.03, "grad_norm": 1.1570461964852865, "learning_rate": 1.9998874257025055e-05, "loss": 0.6407, "step": 427 }, { "epoch": 0.03, "grad_norm": 1.1249351566142112, "learning_rate": 1.999883441155657e-05, "loss": 0.6047, "step": 428 }, { "epoch": 0.03, "grad_norm": 1.1065769182994274, "learning_rate": 1.999879387317773e-05, "loss": 0.6333, "step": 429 }, { "epoch": 0.03, "grad_norm": 1.2437237331960347, "learning_rate": 1.9998752641891347e-05, "loss": 0.6552, "step": 430 }, { "epoch": 0.04, "grad_norm": 1.3090764868276303, "learning_rate": 1.9998710717700272e-05, "loss": 0.6934, "step": 431 }, { "epoch": 0.04, "grad_norm": 1.4015060478983412, "learning_rate": 1.9998668100607414e-05, "loss": 0.6688, "step": 432 }, { "epoch": 0.04, "grad_norm": 1.1350594119836854, "learning_rate": 1.9998624790615722e-05, "loss": 0.5912, "step": 433 }, { "epoch": 0.04, "grad_norm": 1.1184799612791112, "learning_rate": 1.9998580787728207e-05, "loss": 0.6357, "step": 434 }, { "epoch": 0.04, "grad_norm": 1.2121276795334905, "learning_rate": 1.9998536091947907e-05, "loss": 0.6295, "step": 435 }, { "epoch": 0.04, "grad_norm": 1.243908237449138, "learning_rate": 1.999849070327793e-05, "loss": 0.6512, "step": 436 }, { "epoch": 0.04, "grad_norm": 1.1423620436885662, "learning_rate": 1.9998444621721413e-05, "loss": 0.5638, "step": 437 }, { "epoch": 0.04, "grad_norm": 1.195628182158961, "learning_rate": 1.9998397847281548e-05, "loss": 0.6488, "step": 438 }, { "epoch": 0.04, "grad_norm": 1.245661950095741, "learning_rate": 1.9998350379961583e-05, "loss": 0.6223, "step": 439 }, { "epoch": 0.04, "grad_norm": 1.2069261086286653, "learning_rate": 1.9998302219764806e-05, "loss": 0.5926, "step": 440 }, { "epoch": 0.04, "grad_norm": 1.1260911895558972, "learning_rate": 1.9998253366694555e-05, "loss": 0.6861, "step": 441 }, { "epoch": 0.04, "grad_norm": 1.1454069464798424, "learning_rate": 1.9998203820754213e-05, "loss": 0.6685, "step": 442 }, { "epoch": 0.04, "grad_norm": 1.1441632874889192, "learning_rate": 1.9998153581947216e-05, "loss": 0.5732, "step": 443 }, { "epoch": 0.04, "grad_norm": 1.2796985229602802, "learning_rate": 1.9998102650277046e-05, "loss": 0.6827, "step": 444 }, { "epoch": 0.04, "grad_norm": 1.259238092044345, "learning_rate": 1.9998051025747223e-05, "loss": 0.6325, "step": 445 }, { "epoch": 0.04, "grad_norm": 1.1999964484464798, "learning_rate": 1.999799870836134e-05, "loss": 0.6663, "step": 446 }, { "epoch": 0.04, "grad_norm": 1.2378447884686448, "learning_rate": 1.999794569812301e-05, "loss": 0.6847, "step": 447 }, { "epoch": 0.04, "grad_norm": 1.180576325266745, "learning_rate": 1.9997891995035914e-05, "loss": 0.6204, "step": 448 }, { "epoch": 0.04, "grad_norm": 1.1858681540882434, "learning_rate": 1.9997837599103772e-05, "loss": 0.6072, "step": 449 }, { "epoch": 0.04, "grad_norm": 1.1677804559635043, "learning_rate": 1.9997782510330352e-05, "loss": 0.6739, "step": 450 }, { "epoch": 0.04, "grad_norm": 1.1158809705378172, "learning_rate": 1.9997726728719468e-05, "loss": 0.5778, "step": 451 }, { "epoch": 0.04, "grad_norm": 1.2044311500249878, "learning_rate": 1.9997670254274992e-05, "loss": 0.6658, "step": 452 }, { "epoch": 0.04, "grad_norm": 1.1467332394365353, "learning_rate": 1.9997613087000833e-05, "loss": 0.6158, "step": 453 }, { "epoch": 0.04, "grad_norm": 1.1366647161281065, "learning_rate": 1.9997555226900957e-05, "loss": 0.6127, "step": 454 }, { "epoch": 0.04, "grad_norm": 1.3568063589428707, "learning_rate": 1.9997496673979375e-05, "loss": 0.6049, "step": 455 }, { "epoch": 0.04, "grad_norm": 1.2692474254544384, "learning_rate": 1.9997437428240136e-05, "loss": 0.6066, "step": 456 }, { "epoch": 0.04, "grad_norm": 1.359777571955867, "learning_rate": 1.999737748968735e-05, "loss": 0.7016, "step": 457 }, { "epoch": 0.04, "grad_norm": 1.260913107073566, "learning_rate": 1.9997316858325177e-05, "loss": 0.6022, "step": 458 }, { "epoch": 0.04, "grad_norm": 1.2523330977441332, "learning_rate": 1.9997255534157814e-05, "loss": 0.6213, "step": 459 }, { "epoch": 0.04, "grad_norm": 1.1390397288959015, "learning_rate": 1.9997193517189505e-05, "loss": 0.6717, "step": 460 }, { "epoch": 0.04, "grad_norm": 1.3578840331768134, "learning_rate": 1.9997130807424556e-05, "loss": 0.6383, "step": 461 }, { "epoch": 0.04, "grad_norm": 1.441823833716857, "learning_rate": 1.999706740486731e-05, "loss": 0.5689, "step": 462 }, { "epoch": 0.04, "grad_norm": 1.2290512975605155, "learning_rate": 1.999700330952216e-05, "loss": 0.5764, "step": 463 }, { "epoch": 0.04, "grad_norm": 1.1972836304503411, "learning_rate": 1.9996938521393542e-05, "loss": 0.6016, "step": 464 }, { "epoch": 0.04, "grad_norm": 1.190460722585969, "learning_rate": 1.9996873040485957e-05, "loss": 0.6477, "step": 465 }, { "epoch": 0.04, "grad_norm": 1.1123058896187992, "learning_rate": 1.9996806866803937e-05, "loss": 0.615, "step": 466 }, { "epoch": 0.04, "grad_norm": 1.0935102246732809, "learning_rate": 1.9996740000352068e-05, "loss": 0.5393, "step": 467 }, { "epoch": 0.04, "grad_norm": 1.2682103650008913, "learning_rate": 1.9996672441134987e-05, "loss": 0.7129, "step": 468 }, { "epoch": 0.04, "grad_norm": 1.1267973119694383, "learning_rate": 1.999660418915737e-05, "loss": 0.5966, "step": 469 }, { "epoch": 0.04, "grad_norm": 1.2916713629818193, "learning_rate": 1.9996535244423947e-05, "loss": 0.5527, "step": 470 }, { "epoch": 0.04, "grad_norm": 1.1237915877887323, "learning_rate": 1.99964656069395e-05, "loss": 0.5915, "step": 471 }, { "epoch": 0.04, "grad_norm": 1.2113708220129957, "learning_rate": 1.9996395276708856e-05, "loss": 0.6178, "step": 472 }, { "epoch": 0.04, "grad_norm": 1.2780784931519782, "learning_rate": 1.9996324253736884e-05, "loss": 0.6967, "step": 473 }, { "epoch": 0.04, "grad_norm": 1.2129306566189202, "learning_rate": 1.999625253802851e-05, "loss": 0.6319, "step": 474 }, { "epoch": 0.04, "grad_norm": 1.2176291595097406, "learning_rate": 1.99961801295887e-05, "loss": 0.602, "step": 475 }, { "epoch": 0.04, "grad_norm": 1.1928293808417556, "learning_rate": 1.9996107028422474e-05, "loss": 0.6077, "step": 476 }, { "epoch": 0.04, "grad_norm": 1.4041430903030365, "learning_rate": 1.9996033234534895e-05, "loss": 0.6844, "step": 477 }, { "epoch": 0.04, "grad_norm": 1.1082709009194491, "learning_rate": 1.9995958747931083e-05, "loss": 0.6288, "step": 478 }, { "epoch": 0.04, "grad_norm": 1.2591220583252387, "learning_rate": 1.9995883568616195e-05, "loss": 0.624, "step": 479 }, { "epoch": 0.04, "grad_norm": 1.1472894687965085, "learning_rate": 1.9995807696595442e-05, "loss": 0.6267, "step": 480 }, { "epoch": 0.04, "grad_norm": 1.282837857275939, "learning_rate": 1.9995731131874082e-05, "loss": 0.6904, "step": 481 }, { "epoch": 0.04, "grad_norm": 1.2078256390579016, "learning_rate": 1.9995653874457418e-05, "loss": 0.6489, "step": 482 }, { "epoch": 0.04, "grad_norm": 1.6494002270659436, "learning_rate": 1.9995575924350813e-05, "loss": 0.6999, "step": 483 }, { "epoch": 0.04, "grad_norm": 1.2895378872657652, "learning_rate": 1.9995497281559658e-05, "loss": 0.689, "step": 484 }, { "epoch": 0.04, "grad_norm": 1.350853543232127, "learning_rate": 1.9995417946089407e-05, "loss": 0.6829, "step": 485 }, { "epoch": 0.04, "grad_norm": 1.1943843703640955, "learning_rate": 1.999533791794556e-05, "loss": 0.6556, "step": 486 }, { "epoch": 0.04, "grad_norm": 1.3439418508960037, "learning_rate": 1.999525719713366e-05, "loss": 0.4694, "step": 487 }, { "epoch": 0.04, "grad_norm": 1.5174981490776334, "learning_rate": 1.9995175783659304e-05, "loss": 0.5545, "step": 488 }, { "epoch": 0.04, "grad_norm": 1.1823217060352558, "learning_rate": 1.999509367752813e-05, "loss": 0.58, "step": 489 }, { "epoch": 0.04, "grad_norm": 1.3439366064000626, "learning_rate": 1.999501087874583e-05, "loss": 0.6784, "step": 490 }, { "epoch": 0.04, "grad_norm": 1.1052186698208823, "learning_rate": 1.9994927387318142e-05, "loss": 0.6275, "step": 491 }, { "epoch": 0.04, "grad_norm": 1.1759748514990198, "learning_rate": 1.9994843203250853e-05, "loss": 0.6861, "step": 492 }, { "epoch": 0.04, "grad_norm": 1.3358621470858025, "learning_rate": 1.9994758326549794e-05, "loss": 0.6388, "step": 493 }, { "epoch": 0.04, "grad_norm": 1.2111472045537026, "learning_rate": 1.9994672757220845e-05, "loss": 0.5908, "step": 494 }, { "epoch": 0.04, "grad_norm": 1.3290851723488153, "learning_rate": 1.9994586495269944e-05, "loss": 0.6672, "step": 495 }, { "epoch": 0.04, "grad_norm": 1.1800279999404348, "learning_rate": 1.9994499540703062e-05, "loss": 0.6758, "step": 496 }, { "epoch": 0.04, "grad_norm": 1.187891209226948, "learning_rate": 1.9994411893526226e-05, "loss": 0.699, "step": 497 }, { "epoch": 0.04, "grad_norm": 1.4327628101310725, "learning_rate": 1.9994323553745515e-05, "loss": 0.4593, "step": 498 }, { "epoch": 0.04, "grad_norm": 1.2353310670768305, "learning_rate": 1.9994234521367043e-05, "loss": 0.6498, "step": 499 }, { "epoch": 0.04, "grad_norm": 1.217220834744078, "learning_rate": 1.9994144796396985e-05, "loss": 0.637, "step": 500 }, { "epoch": 0.04, "grad_norm": 1.1012119323943392, "learning_rate": 1.9994054378841557e-05, "loss": 0.5268, "step": 501 }, { "epoch": 0.04, "grad_norm": 1.109010150374482, "learning_rate": 1.999396326870702e-05, "loss": 0.6481, "step": 502 }, { "epoch": 0.04, "grad_norm": 1.3104432477385424, "learning_rate": 1.99938714659997e-05, "loss": 0.6435, "step": 503 }, { "epoch": 0.04, "grad_norm": 1.0225947862526552, "learning_rate": 1.9993778970725953e-05, "loss": 0.5475, "step": 504 }, { "epoch": 0.04, "grad_norm": 1.1807483240933856, "learning_rate": 1.9993685782892184e-05, "loss": 0.6164, "step": 505 }, { "epoch": 0.04, "grad_norm": 1.1036461749929372, "learning_rate": 1.9993591902504854e-05, "loss": 0.6136, "step": 506 }, { "epoch": 0.04, "grad_norm": 1.2879988587362776, "learning_rate": 1.9993497329570473e-05, "loss": 0.6551, "step": 507 }, { "epoch": 0.04, "grad_norm": 1.1963291092976445, "learning_rate": 1.999340206409559e-05, "loss": 0.6794, "step": 508 }, { "epoch": 0.04, "grad_norm": 1.1047181103321142, "learning_rate": 1.9993306106086808e-05, "loss": 0.5978, "step": 509 }, { "epoch": 0.04, "grad_norm": 1.1150390655450892, "learning_rate": 1.9993209455550773e-05, "loss": 0.5817, "step": 510 }, { "epoch": 0.04, "grad_norm": 1.233783965559147, "learning_rate": 1.999311211249419e-05, "loss": 0.6413, "step": 511 }, { "epoch": 0.04, "grad_norm": 1.2297400715670368, "learning_rate": 1.9993014076923803e-05, "loss": 0.6877, "step": 512 }, { "epoch": 0.04, "grad_norm": 1.2022050419955803, "learning_rate": 1.9992915348846403e-05, "loss": 0.6793, "step": 513 }, { "epoch": 0.04, "grad_norm": 1.3398503987350763, "learning_rate": 1.9992815928268832e-05, "loss": 0.7163, "step": 514 }, { "epoch": 0.04, "grad_norm": 1.0694563998559343, "learning_rate": 1.999271581519798e-05, "loss": 0.6376, "step": 515 }, { "epoch": 0.04, "grad_norm": 1.0402449566393575, "learning_rate": 1.999261500964079e-05, "loss": 0.6029, "step": 516 }, { "epoch": 0.04, "grad_norm": 1.227880870571574, "learning_rate": 1.999251351160424e-05, "loss": 0.5998, "step": 517 }, { "epoch": 0.04, "grad_norm": 1.1310312614228673, "learning_rate": 1.9992411321095366e-05, "loss": 0.614, "step": 518 }, { "epoch": 0.04, "grad_norm": 1.0086715856563895, "learning_rate": 1.9992308438121253e-05, "loss": 0.5827, "step": 519 }, { "epoch": 0.04, "grad_norm": 1.240841306459145, "learning_rate": 1.999220486268903e-05, "loss": 0.5864, "step": 520 }, { "epoch": 0.04, "grad_norm": 1.1996928015356496, "learning_rate": 1.9992100594805866e-05, "loss": 0.6422, "step": 521 }, { "epoch": 0.04, "grad_norm": 1.4282724911086893, "learning_rate": 1.9991995634479e-05, "loss": 0.6399, "step": 522 }, { "epoch": 0.04, "grad_norm": 1.1202283620976718, "learning_rate": 1.9991889981715696e-05, "loss": 0.5842, "step": 523 }, { "epoch": 0.04, "grad_norm": 1.206185140830073, "learning_rate": 1.9991783636523282e-05, "loss": 0.6642, "step": 524 }, { "epoch": 0.04, "grad_norm": 1.090229502573641, "learning_rate": 1.9991676598909124e-05, "loss": 0.6654, "step": 525 }, { "epoch": 0.04, "grad_norm": 1.3103010963837258, "learning_rate": 1.999156886888064e-05, "loss": 0.6399, "step": 526 }, { "epoch": 0.04, "grad_norm": 1.243129736124134, "learning_rate": 1.9991460446445297e-05, "loss": 0.5981, "step": 527 }, { "epoch": 0.04, "grad_norm": 1.2535060220321637, "learning_rate": 1.9991351331610606e-05, "loss": 0.6845, "step": 528 }, { "epoch": 0.04, "grad_norm": 1.0397220665110378, "learning_rate": 1.999124152438413e-05, "loss": 0.6322, "step": 529 }, { "epoch": 0.04, "grad_norm": 1.119272269521874, "learning_rate": 1.9991131024773478e-05, "loss": 0.6278, "step": 530 }, { "epoch": 0.04, "grad_norm": 1.176775643440753, "learning_rate": 1.9991019832786308e-05, "loss": 0.625, "step": 531 }, { "epoch": 0.04, "grad_norm": 1.161217955544425, "learning_rate": 1.9990907948430327e-05, "loss": 0.6539, "step": 532 }, { "epoch": 0.04, "grad_norm": 1.1368426586875748, "learning_rate": 1.999079537171329e-05, "loss": 0.6275, "step": 533 }, { "epoch": 0.04, "grad_norm": 1.338855766945942, "learning_rate": 1.9990682102642987e-05, "loss": 0.6615, "step": 534 }, { "epoch": 0.04, "grad_norm": 1.1168425819506358, "learning_rate": 1.9990568141227284e-05, "loss": 0.6824, "step": 535 }, { "epoch": 0.04, "grad_norm": 1.1617301433270104, "learning_rate": 1.9990453487474067e-05, "loss": 0.6133, "step": 536 }, { "epoch": 0.04, "grad_norm": 1.1564343856241175, "learning_rate": 1.9990338141391284e-05, "loss": 0.6341, "step": 537 }, { "epoch": 0.04, "grad_norm": 1.2885256022165874, "learning_rate": 1.9990222102986935e-05, "loss": 0.6475, "step": 538 }, { "epoch": 0.04, "grad_norm": 1.2400580863381165, "learning_rate": 1.999010537226905e-05, "loss": 0.5882, "step": 539 }, { "epoch": 0.04, "grad_norm": 1.2885791998719807, "learning_rate": 1.9989987949245725e-05, "loss": 0.6527, "step": 540 }, { "epoch": 0.04, "grad_norm": 1.177901799429232, "learning_rate": 1.9989869833925094e-05, "loss": 0.6551, "step": 541 }, { "epoch": 0.04, "grad_norm": 1.7818280325226765, "learning_rate": 1.9989751026315347e-05, "loss": 0.723, "step": 542 }, { "epoch": 0.04, "grad_norm": 1.3058762683856173, "learning_rate": 1.9989631526424716e-05, "loss": 0.6873, "step": 543 }, { "epoch": 0.04, "grad_norm": 1.150498168071998, "learning_rate": 1.998951133426148e-05, "loss": 0.7088, "step": 544 }, { "epoch": 0.04, "grad_norm": 1.1648183483500902, "learning_rate": 1.9989390449833968e-05, "loss": 0.634, "step": 545 }, { "epoch": 0.04, "grad_norm": 1.242716598701499, "learning_rate": 1.998926887315056e-05, "loss": 0.6164, "step": 546 }, { "epoch": 0.04, "grad_norm": 1.1423813392922217, "learning_rate": 1.998914660421968e-05, "loss": 0.6478, "step": 547 }, { "epoch": 0.04, "grad_norm": 1.3749975258731517, "learning_rate": 1.99890236430498e-05, "loss": 0.618, "step": 548 }, { "epoch": 0.04, "grad_norm": 1.1455085795035111, "learning_rate": 1.9988899989649438e-05, "loss": 0.5982, "step": 549 }, { "epoch": 0.04, "grad_norm": 1.2543874159946773, "learning_rate": 1.9988775644027172e-05, "loss": 0.633, "step": 550 }, { "epoch": 0.04, "grad_norm": 1.174490222564184, "learning_rate": 1.9988650606191614e-05, "loss": 0.6266, "step": 551 }, { "epoch": 0.04, "grad_norm": 1.1832096627499953, "learning_rate": 1.9988524876151425e-05, "loss": 0.6144, "step": 552 }, { "epoch": 0.04, "grad_norm": 1.21550652390854, "learning_rate": 1.9988398453915322e-05, "loss": 0.659, "step": 553 }, { "epoch": 0.05, "grad_norm": 1.1543957372759528, "learning_rate": 1.998827133949207e-05, "loss": 0.6467, "step": 554 }, { "epoch": 0.05, "grad_norm": 1.0967112499183245, "learning_rate": 1.998814353289047e-05, "loss": 0.6141, "step": 555 }, { "epoch": 0.05, "grad_norm": 1.0267622312276812, "learning_rate": 1.9988015034119385e-05, "loss": 0.6064, "step": 556 }, { "epoch": 0.05, "grad_norm": 1.1774243726685896, "learning_rate": 1.9987885843187717e-05, "loss": 0.6174, "step": 557 }, { "epoch": 0.05, "grad_norm": 1.0625061356459073, "learning_rate": 1.9987755960104418e-05, "loss": 0.6006, "step": 558 }, { "epoch": 0.05, "grad_norm": 1.084560871180758, "learning_rate": 1.9987625384878493e-05, "loss": 0.6063, "step": 559 }, { "epoch": 0.05, "grad_norm": 1.1889027689490759, "learning_rate": 1.9987494117518986e-05, "loss": 0.6729, "step": 560 }, { "epoch": 0.05, "grad_norm": 1.1335356972818682, "learning_rate": 1.9987362158034996e-05, "loss": 0.6896, "step": 561 }, { "epoch": 0.05, "grad_norm": 1.4050654834507526, "learning_rate": 1.9987229506435666e-05, "loss": 0.5667, "step": 562 }, { "epoch": 0.05, "grad_norm": 1.1973334017795196, "learning_rate": 1.998709616273019e-05, "loss": 0.6441, "step": 563 }, { "epoch": 0.05, "grad_norm": 1.1948117517144539, "learning_rate": 1.998696212692781e-05, "loss": 0.7147, "step": 564 }, { "epoch": 0.05, "grad_norm": 1.1921590585442832, "learning_rate": 1.998682739903781e-05, "loss": 0.6434, "step": 565 }, { "epoch": 0.05, "grad_norm": 1.243769075857278, "learning_rate": 1.9986691979069532e-05, "loss": 0.6608, "step": 566 }, { "epoch": 0.05, "grad_norm": 1.268764077048501, "learning_rate": 1.9986555867032357e-05, "loss": 0.6149, "step": 567 }, { "epoch": 0.05, "grad_norm": 1.0901318091600034, "learning_rate": 1.998641906293572e-05, "loss": 0.6869, "step": 568 }, { "epoch": 0.05, "grad_norm": 1.1808960933093477, "learning_rate": 1.99862815667891e-05, "loss": 0.6034, "step": 569 }, { "epoch": 0.05, "grad_norm": 1.1064884659907313, "learning_rate": 1.9986143378602026e-05, "loss": 0.6145, "step": 570 }, { "epoch": 0.05, "grad_norm": 1.2629968487327867, "learning_rate": 1.998600449838407e-05, "loss": 0.6354, "step": 571 }, { "epoch": 0.05, "grad_norm": 1.1098239795799647, "learning_rate": 1.998586492614486e-05, "loss": 0.6446, "step": 572 }, { "epoch": 0.05, "grad_norm": 1.1172317251983954, "learning_rate": 1.998572466189407e-05, "loss": 0.6059, "step": 573 }, { "epoch": 0.05, "grad_norm": 1.1776210955533486, "learning_rate": 1.9985583705641418e-05, "loss": 0.6194, "step": 574 }, { "epoch": 0.05, "grad_norm": 1.1421240125909013, "learning_rate": 1.9985442057396675e-05, "loss": 0.5688, "step": 575 }, { "epoch": 0.05, "grad_norm": 1.3033078081535983, "learning_rate": 1.9985299717169654e-05, "loss": 0.6404, "step": 576 }, { "epoch": 0.05, "grad_norm": 1.1946419016655512, "learning_rate": 1.9985156684970214e-05, "loss": 0.577, "step": 577 }, { "epoch": 0.05, "grad_norm": 1.2188327655634794, "learning_rate": 1.9985012960808275e-05, "loss": 0.6336, "step": 578 }, { "epoch": 0.05, "grad_norm": 1.2471832078397793, "learning_rate": 1.9984868544693795e-05, "loss": 0.6152, "step": 579 }, { "epoch": 0.05, "grad_norm": 1.2827823919534669, "learning_rate": 1.9984723436636785e-05, "loss": 0.6819, "step": 580 }, { "epoch": 0.05, "grad_norm": 1.154753356121878, "learning_rate": 1.9984577636647292e-05, "loss": 0.6452, "step": 581 }, { "epoch": 0.05, "grad_norm": 1.2501698234663974, "learning_rate": 1.9984431144735426e-05, "loss": 0.6386, "step": 582 }, { "epoch": 0.05, "grad_norm": 1.3338990753512137, "learning_rate": 1.998428396091134e-05, "loss": 0.6606, "step": 583 }, { "epoch": 0.05, "grad_norm": 1.1873704742882054, "learning_rate": 1.9984136085185232e-05, "loss": 0.591, "step": 584 }, { "epoch": 0.05, "grad_norm": 1.166414930342365, "learning_rate": 1.9983987517567348e-05, "loss": 0.6735, "step": 585 }, { "epoch": 0.05, "grad_norm": 1.1362047762422045, "learning_rate": 1.998383825806799e-05, "loss": 0.5989, "step": 586 }, { "epoch": 0.05, "grad_norm": 1.1785883612942007, "learning_rate": 1.9983688306697488e-05, "loss": 0.6682, "step": 587 }, { "epoch": 0.05, "grad_norm": 1.1575253422468532, "learning_rate": 1.9983537663466244e-05, "loss": 0.5574, "step": 588 }, { "epoch": 0.05, "grad_norm": 1.2171303979561474, "learning_rate": 1.9983386328384696e-05, "loss": 0.6653, "step": 589 }, { "epoch": 0.05, "grad_norm": 1.1779477719716205, "learning_rate": 1.998323430146333e-05, "loss": 0.6468, "step": 590 }, { "epoch": 0.05, "grad_norm": 1.0520298733113895, "learning_rate": 1.9983081582712684e-05, "loss": 0.5669, "step": 591 }, { "epoch": 0.05, "grad_norm": 1.2413891317205863, "learning_rate": 1.9982928172143337e-05, "loss": 0.6552, "step": 592 }, { "epoch": 0.05, "grad_norm": 1.0805023983063284, "learning_rate": 1.9982774069765923e-05, "loss": 0.5755, "step": 593 }, { "epoch": 0.05, "grad_norm": 1.2462092352478134, "learning_rate": 1.9982619275591124e-05, "loss": 0.6539, "step": 594 }, { "epoch": 0.05, "grad_norm": 1.2311934123337847, "learning_rate": 1.998246378962966e-05, "loss": 0.6769, "step": 595 }, { "epoch": 0.05, "grad_norm": 1.2862807486957082, "learning_rate": 1.9982307611892314e-05, "loss": 0.7153, "step": 596 }, { "epoch": 0.05, "grad_norm": 1.1501963851498185, "learning_rate": 1.9982150742389897e-05, "loss": 0.586, "step": 597 }, { "epoch": 0.05, "grad_norm": 1.0185449240780169, "learning_rate": 1.9981993181133297e-05, "loss": 0.6049, "step": 598 }, { "epoch": 0.05, "grad_norm": 1.390711835950105, "learning_rate": 1.998183492813342e-05, "loss": 0.6899, "step": 599 }, { "epoch": 0.05, "grad_norm": 1.4417654309382693, "learning_rate": 1.9981675983401234e-05, "loss": 0.6118, "step": 600 }, { "epoch": 0.05, "grad_norm": 1.0392600350000416, "learning_rate": 1.9981516346947757e-05, "loss": 0.5494, "step": 601 }, { "epoch": 0.05, "grad_norm": 1.124642591279403, "learning_rate": 1.998135601878405e-05, "loss": 0.5698, "step": 602 }, { "epoch": 0.05, "grad_norm": 1.1218889954717481, "learning_rate": 1.9981194998921226e-05, "loss": 0.6356, "step": 603 }, { "epoch": 0.05, "grad_norm": 1.189137467170877, "learning_rate": 1.9981033287370443e-05, "loss": 0.6391, "step": 604 }, { "epoch": 0.05, "grad_norm": 1.1882587920699188, "learning_rate": 1.9980870884142906e-05, "loss": 0.6681, "step": 605 }, { "epoch": 0.05, "grad_norm": 1.255753276002731, "learning_rate": 1.9980707789249866e-05, "loss": 0.6781, "step": 606 }, { "epoch": 0.05, "grad_norm": 1.2023510062253613, "learning_rate": 1.9980544002702635e-05, "loss": 0.7198, "step": 607 }, { "epoch": 0.05, "grad_norm": 1.054688112742724, "learning_rate": 1.998037952451255e-05, "loss": 0.6138, "step": 608 }, { "epoch": 0.05, "grad_norm": 1.107499007509298, "learning_rate": 1.9980214354691022e-05, "loss": 0.5985, "step": 609 }, { "epoch": 0.05, "grad_norm": 1.0589520103441592, "learning_rate": 1.998004849324949e-05, "loss": 0.6205, "step": 610 }, { "epoch": 0.05, "grad_norm": 1.1975815219590236, "learning_rate": 1.997988194019945e-05, "loss": 0.5926, "step": 611 }, { "epoch": 0.05, "grad_norm": 1.2336481921456244, "learning_rate": 1.9979714695552444e-05, "loss": 0.6479, "step": 612 }, { "epoch": 0.05, "grad_norm": 1.0394534716312003, "learning_rate": 1.997954675932006e-05, "loss": 0.5713, "step": 613 }, { "epoch": 0.05, "grad_norm": 1.1298571096569328, "learning_rate": 1.997937813151394e-05, "loss": 0.606, "step": 614 }, { "epoch": 0.05, "grad_norm": 1.1594385853901574, "learning_rate": 1.9979208812145766e-05, "loss": 0.6047, "step": 615 }, { "epoch": 0.05, "grad_norm": 1.0480989117397763, "learning_rate": 1.9979038801227273e-05, "loss": 0.6042, "step": 616 }, { "epoch": 0.05, "grad_norm": 1.11743684858994, "learning_rate": 1.9978868098770244e-05, "loss": 0.629, "step": 617 }, { "epoch": 0.05, "grad_norm": 1.0789247877309214, "learning_rate": 1.9978696704786505e-05, "loss": 0.7054, "step": 618 }, { "epoch": 0.05, "grad_norm": 1.214704874693797, "learning_rate": 1.9978524619287937e-05, "loss": 0.6837, "step": 619 }, { "epoch": 0.05, "grad_norm": 1.2762010798985541, "learning_rate": 1.997835184228646e-05, "loss": 0.6524, "step": 620 }, { "epoch": 0.05, "grad_norm": 1.036839257758854, "learning_rate": 1.9978178373794055e-05, "loss": 0.6761, "step": 621 }, { "epoch": 0.05, "grad_norm": 1.0783286513719492, "learning_rate": 1.9978004213822736e-05, "loss": 0.6654, "step": 622 }, { "epoch": 0.05, "grad_norm": 1.2133745178374113, "learning_rate": 1.997782936238458e-05, "loss": 0.6946, "step": 623 }, { "epoch": 0.05, "grad_norm": 1.2098030928382038, "learning_rate": 1.9977653819491696e-05, "loss": 0.5995, "step": 624 }, { "epoch": 0.05, "grad_norm": 1.1151817187068271, "learning_rate": 1.9977477585156252e-05, "loss": 0.586, "step": 625 }, { "epoch": 0.05, "grad_norm": 1.3347118405295793, "learning_rate": 1.9977300659390463e-05, "loss": 0.6752, "step": 626 }, { "epoch": 0.05, "grad_norm": 1.1815468771856832, "learning_rate": 1.997712304220659e-05, "loss": 0.5802, "step": 627 }, { "epoch": 0.05, "grad_norm": 1.1188855784750988, "learning_rate": 1.9976944733616935e-05, "loss": 0.63, "step": 628 }, { "epoch": 0.05, "grad_norm": 1.0985965917225944, "learning_rate": 1.9976765733633866e-05, "loss": 0.5863, "step": 629 }, { "epoch": 0.05, "grad_norm": 1.2046569696689249, "learning_rate": 1.9976586042269776e-05, "loss": 0.4522, "step": 630 }, { "epoch": 0.05, "grad_norm": 1.1672364801364987, "learning_rate": 1.9976405659537123e-05, "loss": 0.6388, "step": 631 }, { "epoch": 0.05, "grad_norm": 1.1689539536673572, "learning_rate": 1.9976224585448407e-05, "loss": 0.6218, "step": 632 }, { "epoch": 0.05, "grad_norm": 1.2637252555860425, "learning_rate": 1.9976042820016176e-05, "loss": 0.6143, "step": 633 }, { "epoch": 0.05, "grad_norm": 1.1814014645815236, "learning_rate": 1.997586036325303e-05, "loss": 0.6382, "step": 634 }, { "epoch": 0.05, "grad_norm": 1.222151385844291, "learning_rate": 1.9975677215171606e-05, "loss": 0.6453, "step": 635 }, { "epoch": 0.05, "grad_norm": 1.0914309372643383, "learning_rate": 1.9975493375784598e-05, "loss": 0.6333, "step": 636 }, { "epoch": 0.05, "grad_norm": 1.201970236345254, "learning_rate": 1.997530884510475e-05, "loss": 0.6586, "step": 637 }, { "epoch": 0.05, "grad_norm": 1.054938735797658, "learning_rate": 1.9975123623144847e-05, "loss": 0.6288, "step": 638 }, { "epoch": 0.05, "grad_norm": 1.3866748760527043, "learning_rate": 1.9974937709917722e-05, "loss": 0.6907, "step": 639 }, { "epoch": 0.05, "grad_norm": 1.1652988956147512, "learning_rate": 1.9974751105436266e-05, "loss": 0.659, "step": 640 }, { "epoch": 0.05, "grad_norm": 1.2370818741683758, "learning_rate": 1.9974563809713406e-05, "loss": 0.4051, "step": 641 }, { "epoch": 0.05, "grad_norm": 1.2982928662518747, "learning_rate": 1.9974375822762117e-05, "loss": 0.6157, "step": 642 }, { "epoch": 0.05, "grad_norm": 1.0479843603076193, "learning_rate": 1.9974187144595433e-05, "loss": 0.4918, "step": 643 }, { "epoch": 0.05, "grad_norm": 1.2429258136089865, "learning_rate": 1.9973997775226424e-05, "loss": 0.653, "step": 644 }, { "epoch": 0.05, "grad_norm": 1.2192984610891346, "learning_rate": 1.9973807714668224e-05, "loss": 0.6706, "step": 645 }, { "epoch": 0.05, "grad_norm": 1.1251401683601023, "learning_rate": 1.997361696293399e-05, "loss": 0.6229, "step": 646 }, { "epoch": 0.05, "grad_norm": 1.0708970840194627, "learning_rate": 1.9973425520036948e-05, "loss": 0.5686, "step": 647 }, { "epoch": 0.05, "grad_norm": 1.1553407807232856, "learning_rate": 1.9973233385990364e-05, "loss": 0.6734, "step": 648 }, { "epoch": 0.05, "grad_norm": 1.1466860232715055, "learning_rate": 1.997304056080755e-05, "loss": 0.5551, "step": 649 }, { "epoch": 0.05, "grad_norm": 1.350493965217335, "learning_rate": 1.9972847044501876e-05, "loss": 0.5949, "step": 650 }, { "epoch": 0.05, "grad_norm": 1.2454267382525472, "learning_rate": 1.9972652837086746e-05, "loss": 0.6507, "step": 651 }, { "epoch": 0.05, "grad_norm": 1.029352302353856, "learning_rate": 1.997245793857562e-05, "loss": 0.5958, "step": 652 }, { "epoch": 0.05, "grad_norm": 1.1497968726198924, "learning_rate": 1.9972262348982e-05, "loss": 0.67, "step": 653 }, { "epoch": 0.05, "grad_norm": 1.088989161085879, "learning_rate": 1.997206606831945e-05, "loss": 0.5854, "step": 654 }, { "epoch": 0.05, "grad_norm": 1.1790833944364503, "learning_rate": 1.997186909660157e-05, "loss": 0.6825, "step": 655 }, { "epoch": 0.05, "grad_norm": 1.0384747944909554, "learning_rate": 1.9971671433842e-05, "loss": 0.6763, "step": 656 }, { "epoch": 0.05, "grad_norm": 1.1213533648567533, "learning_rate": 1.9971473080054445e-05, "loss": 0.6894, "step": 657 }, { "epoch": 0.05, "grad_norm": 1.1609933428042643, "learning_rate": 1.9971274035252653e-05, "loss": 0.5707, "step": 658 }, { "epoch": 0.05, "grad_norm": 1.2081990423276776, "learning_rate": 1.9971074299450414e-05, "loss": 0.6806, "step": 659 }, { "epoch": 0.05, "grad_norm": 1.0724208806076496, "learning_rate": 1.9970873872661567e-05, "loss": 0.5806, "step": 660 }, { "epoch": 0.05, "grad_norm": 1.0610754332454024, "learning_rate": 1.997067275490001e-05, "loss": 0.6022, "step": 661 }, { "epoch": 0.05, "grad_norm": 1.1629971401024213, "learning_rate": 1.997047094617967e-05, "loss": 0.6024, "step": 662 }, { "epoch": 0.05, "grad_norm": 1.2436199105527355, "learning_rate": 1.9970268446514543e-05, "loss": 0.6317, "step": 663 }, { "epoch": 0.05, "grad_norm": 1.1442220153552487, "learning_rate": 1.997006525591865e-05, "loss": 0.6532, "step": 664 }, { "epoch": 0.05, "grad_norm": 1.3021218257030307, "learning_rate": 1.9969861374406086e-05, "loss": 0.6593, "step": 665 }, { "epoch": 0.05, "grad_norm": 1.1111386090671853, "learning_rate": 1.9969656801990967e-05, "loss": 0.6495, "step": 666 }, { "epoch": 0.05, "grad_norm": 1.1031034022345894, "learning_rate": 1.9969451538687474e-05, "loss": 0.6161, "step": 667 }, { "epoch": 0.05, "grad_norm": 1.0933845844784758, "learning_rate": 1.9969245584509832e-05, "loss": 0.5477, "step": 668 }, { "epoch": 0.05, "grad_norm": 1.1712771182304644, "learning_rate": 1.9969038939472315e-05, "loss": 0.6338, "step": 669 }, { "epoch": 0.05, "grad_norm": 1.2155964784926518, "learning_rate": 1.9968831603589243e-05, "loss": 0.6716, "step": 670 }, { "epoch": 0.05, "grad_norm": 1.1244215186538657, "learning_rate": 1.9968623576874984e-05, "loss": 0.6159, "step": 671 }, { "epoch": 0.05, "grad_norm": 1.11184462359373, "learning_rate": 1.996841485934395e-05, "loss": 0.5756, "step": 672 }, { "epoch": 0.05, "grad_norm": 1.136388764225708, "learning_rate": 1.996820545101061e-05, "loss": 0.6362, "step": 673 }, { "epoch": 0.05, "grad_norm": 1.2594325771585668, "learning_rate": 1.9967995351889476e-05, "loss": 0.6782, "step": 674 }, { "epoch": 0.05, "grad_norm": 1.077894031554686, "learning_rate": 1.9967784561995103e-05, "loss": 0.5933, "step": 675 }, { "epoch": 0.05, "grad_norm": 1.085391720228807, "learning_rate": 1.9967573081342103e-05, "loss": 0.6247, "step": 676 }, { "epoch": 0.06, "grad_norm": 1.092634209209323, "learning_rate": 1.996736090994513e-05, "loss": 0.5373, "step": 677 }, { "epoch": 0.06, "grad_norm": 1.198269526162391, "learning_rate": 1.9967148047818884e-05, "loss": 0.5686, "step": 678 }, { "epoch": 0.06, "grad_norm": 1.2183730301752702, "learning_rate": 1.996693449497812e-05, "loss": 0.6678, "step": 679 }, { "epoch": 0.06, "grad_norm": 1.1399175734048983, "learning_rate": 1.9966720251437635e-05, "loss": 0.6017, "step": 680 }, { "epoch": 0.06, "grad_norm": 1.0273161496250076, "learning_rate": 1.996650531721228e-05, "loss": 0.5534, "step": 681 }, { "epoch": 0.06, "grad_norm": 1.047089911204804, "learning_rate": 1.9966289692316944e-05, "loss": 0.5956, "step": 682 }, { "epoch": 0.06, "grad_norm": 1.217569905753707, "learning_rate": 1.9966073376766575e-05, "loss": 0.6095, "step": 683 }, { "epoch": 0.06, "grad_norm": 1.1145304786124874, "learning_rate": 1.9965856370576163e-05, "loss": 0.613, "step": 684 }, { "epoch": 0.06, "grad_norm": 1.1977722179345607, "learning_rate": 1.9965638673760738e-05, "loss": 0.6071, "step": 685 }, { "epoch": 0.06, "grad_norm": 1.2160224513488096, "learning_rate": 1.9965420286335397e-05, "loss": 0.6376, "step": 686 }, { "epoch": 0.06, "grad_norm": 1.3450712803411724, "learning_rate": 1.996520120831527e-05, "loss": 0.6046, "step": 687 }, { "epoch": 0.06, "grad_norm": 1.1398839262388, "learning_rate": 1.9964981439715532e-05, "loss": 0.5862, "step": 688 }, { "epoch": 0.06, "grad_norm": 1.1615547015849625, "learning_rate": 1.9964760980551428e-05, "loss": 0.5632, "step": 689 }, { "epoch": 0.06, "grad_norm": 1.1095424326717258, "learning_rate": 1.996453983083822e-05, "loss": 0.6577, "step": 690 }, { "epoch": 0.06, "grad_norm": 1.1245069632159537, "learning_rate": 1.9964317990591243e-05, "loss": 0.6358, "step": 691 }, { "epoch": 0.06, "grad_norm": 1.0739036008737486, "learning_rate": 1.9964095459825866e-05, "loss": 0.6257, "step": 692 }, { "epoch": 0.06, "grad_norm": 1.0889267792227462, "learning_rate": 1.9963872238557516e-05, "loss": 0.6612, "step": 693 }, { "epoch": 0.06, "grad_norm": 1.101405073966227, "learning_rate": 1.9963648326801653e-05, "loss": 0.6636, "step": 694 }, { "epoch": 0.06, "grad_norm": 1.061467009295685, "learning_rate": 1.99634237245738e-05, "loss": 0.4699, "step": 695 }, { "epoch": 0.06, "grad_norm": 1.0916752453273684, "learning_rate": 1.9963198431889523e-05, "loss": 0.5637, "step": 696 }, { "epoch": 0.06, "grad_norm": 1.0034377408449018, "learning_rate": 1.996297244876443e-05, "loss": 0.5266, "step": 697 }, { "epoch": 0.06, "grad_norm": 1.1009331610859694, "learning_rate": 1.9962745775214187e-05, "loss": 0.6557, "step": 698 }, { "epoch": 0.06, "grad_norm": 1.006040563282083, "learning_rate": 1.9962518411254493e-05, "loss": 0.571, "step": 699 }, { "epoch": 0.06, "grad_norm": 1.2442846642824592, "learning_rate": 1.996229035690111e-05, "loss": 0.6507, "step": 700 }, { "epoch": 0.06, "grad_norm": 1.0822018700447882, "learning_rate": 1.9962061612169844e-05, "loss": 0.648, "step": 701 }, { "epoch": 0.06, "grad_norm": 1.2097081161212462, "learning_rate": 1.9961832177076544e-05, "loss": 0.6196, "step": 702 }, { "epoch": 0.06, "grad_norm": 1.2030315390368393, "learning_rate": 1.996160205163711e-05, "loss": 0.6099, "step": 703 }, { "epoch": 0.06, "grad_norm": 1.1297382460923315, "learning_rate": 1.9961371235867494e-05, "loss": 0.6596, "step": 704 }, { "epoch": 0.06, "grad_norm": 1.1674304329946574, "learning_rate": 1.9961139729783683e-05, "loss": 0.6333, "step": 705 }, { "epoch": 0.06, "grad_norm": 1.2304187056475613, "learning_rate": 1.9960907533401722e-05, "loss": 0.6619, "step": 706 }, { "epoch": 0.06, "grad_norm": 1.1425239340299234, "learning_rate": 1.996067464673771e-05, "loss": 0.5948, "step": 707 }, { "epoch": 0.06, "grad_norm": 1.2514604903606674, "learning_rate": 1.9960441069807778e-05, "loss": 0.6098, "step": 708 }, { "epoch": 0.06, "grad_norm": 1.1006930003492308, "learning_rate": 1.996020680262811e-05, "loss": 0.6225, "step": 709 }, { "epoch": 0.06, "grad_norm": 1.0897684407070278, "learning_rate": 1.9959971845214953e-05, "loss": 0.6381, "step": 710 }, { "epoch": 0.06, "grad_norm": 1.0351947993478245, "learning_rate": 1.9959736197584577e-05, "loss": 0.5927, "step": 711 }, { "epoch": 0.06, "grad_norm": 1.2403363792215993, "learning_rate": 1.9959499859753317e-05, "loss": 0.647, "step": 712 }, { "epoch": 0.06, "grad_norm": 1.1981011259553083, "learning_rate": 1.995926283173755e-05, "loss": 0.6881, "step": 713 }, { "epoch": 0.06, "grad_norm": 0.9841149549694405, "learning_rate": 1.9959025113553706e-05, "loss": 0.6494, "step": 714 }, { "epoch": 0.06, "grad_norm": 1.0600192658138001, "learning_rate": 1.9958786705218254e-05, "loss": 0.6265, "step": 715 }, { "epoch": 0.06, "grad_norm": 1.2811772952743912, "learning_rate": 1.9958547606747715e-05, "loss": 0.5239, "step": 716 }, { "epoch": 0.06, "grad_norm": 1.1521958975518616, "learning_rate": 1.9958307818158662e-05, "loss": 0.6341, "step": 717 }, { "epoch": 0.06, "grad_norm": 1.1179962043127805, "learning_rate": 1.995806733946771e-05, "loss": 0.6329, "step": 718 }, { "epoch": 0.06, "grad_norm": 1.0146153054249425, "learning_rate": 1.995782617069152e-05, "loss": 0.5976, "step": 719 }, { "epoch": 0.06, "grad_norm": 1.1645750270250357, "learning_rate": 1.9957584311846814e-05, "loss": 0.576, "step": 720 }, { "epoch": 0.06, "grad_norm": 1.2491529995294561, "learning_rate": 1.9957341762950346e-05, "loss": 0.7102, "step": 721 }, { "epoch": 0.06, "grad_norm": 1.1764586868629787, "learning_rate": 1.9957098524018925e-05, "loss": 0.6875, "step": 722 }, { "epoch": 0.06, "grad_norm": 1.1389846638956975, "learning_rate": 1.995685459506941e-05, "loss": 0.6148, "step": 723 }, { "epoch": 0.06, "grad_norm": 1.057344796664354, "learning_rate": 1.9956609976118704e-05, "loss": 0.5285, "step": 724 }, { "epoch": 0.06, "grad_norm": 1.246786646040238, "learning_rate": 1.9956364667183755e-05, "loss": 0.5909, "step": 725 }, { "epoch": 0.06, "grad_norm": 1.2410890769714888, "learning_rate": 1.9956118668281568e-05, "loss": 0.6763, "step": 726 }, { "epoch": 0.06, "grad_norm": 1.1142586997360597, "learning_rate": 1.9955871979429188e-05, "loss": 0.6469, "step": 727 }, { "epoch": 0.06, "grad_norm": 1.1276135732050507, "learning_rate": 1.9955624600643712e-05, "loss": 0.6592, "step": 728 }, { "epoch": 0.06, "grad_norm": 1.066854522714197, "learning_rate": 1.9955376531942278e-05, "loss": 0.5611, "step": 729 }, { "epoch": 0.06, "grad_norm": 1.08373527229625, "learning_rate": 1.9955127773342086e-05, "loss": 0.5927, "step": 730 }, { "epoch": 0.06, "grad_norm": 1.2196226278884847, "learning_rate": 1.9954878324860365e-05, "loss": 0.6232, "step": 731 }, { "epoch": 0.06, "grad_norm": 1.1409979528374246, "learning_rate": 1.995462818651441e-05, "loss": 0.6322, "step": 732 }, { "epoch": 0.06, "grad_norm": 1.074608252737145, "learning_rate": 1.9954377358321547e-05, "loss": 0.5813, "step": 733 }, { "epoch": 0.06, "grad_norm": 1.0325700029995777, "learning_rate": 1.9954125840299165e-05, "loss": 0.4502, "step": 734 }, { "epoch": 0.06, "grad_norm": 1.1297723614367152, "learning_rate": 1.995387363246469e-05, "loss": 0.6199, "step": 735 }, { "epoch": 0.06, "grad_norm": 1.0916886554647816, "learning_rate": 1.9953620734835603e-05, "loss": 0.6186, "step": 736 }, { "epoch": 0.06, "grad_norm": 1.1978421868431721, "learning_rate": 1.995336714742943e-05, "loss": 0.6573, "step": 737 }, { "epoch": 0.06, "grad_norm": 1.146234644774168, "learning_rate": 1.9953112870263737e-05, "loss": 0.6158, "step": 738 }, { "epoch": 0.06, "grad_norm": 1.0833865452731306, "learning_rate": 1.9952857903356155e-05, "loss": 0.6236, "step": 739 }, { "epoch": 0.06, "grad_norm": 1.0277210942729749, "learning_rate": 1.9952602246724348e-05, "loss": 0.6171, "step": 740 }, { "epoch": 0.06, "grad_norm": 1.1046435282049112, "learning_rate": 1.995234590038603e-05, "loss": 0.6509, "step": 741 }, { "epoch": 0.06, "grad_norm": 1.187550009575104, "learning_rate": 1.995208886435897e-05, "loss": 0.6832, "step": 742 }, { "epoch": 0.06, "grad_norm": 1.1013173491692863, "learning_rate": 1.995183113866098e-05, "loss": 0.6326, "step": 743 }, { "epoch": 0.06, "grad_norm": 1.0349313608491895, "learning_rate": 1.9951572723309918e-05, "loss": 0.5911, "step": 744 }, { "epoch": 0.06, "grad_norm": 1.13623690688545, "learning_rate": 1.9951313618323696e-05, "loss": 0.6131, "step": 745 }, { "epoch": 0.06, "grad_norm": 1.05623891586562, "learning_rate": 1.9951053823720267e-05, "loss": 0.6311, "step": 746 }, { "epoch": 0.06, "grad_norm": 1.098775832907385, "learning_rate": 1.9950793339517632e-05, "loss": 0.6321, "step": 747 }, { "epoch": 0.06, "grad_norm": 1.0964712573130224, "learning_rate": 1.9950532165733847e-05, "loss": 0.6329, "step": 748 }, { "epoch": 0.06, "grad_norm": 1.1245652087511098, "learning_rate": 1.995027030238701e-05, "loss": 0.6432, "step": 749 }, { "epoch": 0.06, "grad_norm": 1.2375929509587618, "learning_rate": 1.9950007749495263e-05, "loss": 0.611, "step": 750 }, { "epoch": 0.06, "grad_norm": 1.080590540348563, "learning_rate": 1.9949744507076806e-05, "loss": 0.6639, "step": 751 }, { "epoch": 0.06, "grad_norm": 1.106532099604561, "learning_rate": 1.994948057514988e-05, "loss": 0.6232, "step": 752 }, { "epoch": 0.06, "grad_norm": 1.0356971574762561, "learning_rate": 1.994921595373278e-05, "loss": 0.5809, "step": 753 }, { "epoch": 0.06, "grad_norm": 1.2054321702962159, "learning_rate": 1.9948950642843836e-05, "loss": 0.6959, "step": 754 }, { "epoch": 0.06, "grad_norm": 1.1866427603796048, "learning_rate": 1.9948684642501433e-05, "loss": 0.5915, "step": 755 }, { "epoch": 0.06, "grad_norm": 1.1568127923932405, "learning_rate": 1.9948417952724014e-05, "loss": 0.6201, "step": 756 }, { "epoch": 0.06, "grad_norm": 1.0679478432065364, "learning_rate": 1.9948150573530054e-05, "loss": 0.6118, "step": 757 }, { "epoch": 0.06, "grad_norm": 1.2389782363966395, "learning_rate": 1.994788250493808e-05, "loss": 0.6178, "step": 758 }, { "epoch": 0.06, "grad_norm": 1.1320990625121634, "learning_rate": 1.9947613746966678e-05, "loss": 0.5965, "step": 759 }, { "epoch": 0.06, "grad_norm": 1.055659570175554, "learning_rate": 1.9947344299634464e-05, "loss": 0.6197, "step": 760 }, { "epoch": 0.06, "grad_norm": 1.0158590766537203, "learning_rate": 1.9947074162960113e-05, "loss": 0.6052, "step": 761 }, { "epoch": 0.06, "grad_norm": 1.213723272660111, "learning_rate": 1.9946803336962346e-05, "loss": 0.5871, "step": 762 }, { "epoch": 0.06, "grad_norm": 1.1870814088915298, "learning_rate": 1.994653182165993e-05, "loss": 0.6191, "step": 763 }, { "epoch": 0.06, "grad_norm": 1.2498400851275093, "learning_rate": 1.994625961707168e-05, "loss": 0.7248, "step": 764 }, { "epoch": 0.06, "grad_norm": 1.1683672596182015, "learning_rate": 1.9945986723216463e-05, "loss": 0.5657, "step": 765 }, { "epoch": 0.06, "grad_norm": 1.1686459028088645, "learning_rate": 1.9945713140113188e-05, "loss": 0.6563, "step": 766 }, { "epoch": 0.06, "grad_norm": 1.174814353567215, "learning_rate": 1.9945438867780814e-05, "loss": 0.5865, "step": 767 }, { "epoch": 0.06, "grad_norm": 1.1195506080858777, "learning_rate": 1.9945163906238347e-05, "loss": 0.6169, "step": 768 }, { "epoch": 0.06, "grad_norm": 1.2211466830279876, "learning_rate": 1.9944888255504846e-05, "loss": 0.6599, "step": 769 }, { "epoch": 0.06, "grad_norm": 1.098094186565308, "learning_rate": 1.994461191559941e-05, "loss": 0.5711, "step": 770 }, { "epoch": 0.06, "grad_norm": 1.0342907512334356, "learning_rate": 1.9944334886541184e-05, "loss": 0.6, "step": 771 }, { "epoch": 0.06, "grad_norm": 1.1118274230511065, "learning_rate": 1.9944057168349374e-05, "loss": 0.5939, "step": 772 }, { "epoch": 0.06, "grad_norm": 1.0468608125121714, "learning_rate": 1.9943778761043223e-05, "loss": 0.5908, "step": 773 }, { "epoch": 0.06, "grad_norm": 1.0957138400050503, "learning_rate": 1.994349966464202e-05, "loss": 0.6157, "step": 774 }, { "epoch": 0.06, "grad_norm": 1.1635606740555189, "learning_rate": 1.9943219879165113e-05, "loss": 0.6674, "step": 775 }, { "epoch": 0.06, "grad_norm": 1.0570904827261942, "learning_rate": 1.9942939404631893e-05, "loss": 0.5812, "step": 776 }, { "epoch": 0.06, "grad_norm": 1.1047314581338863, "learning_rate": 1.9942658241061785e-05, "loss": 0.6725, "step": 777 }, { "epoch": 0.06, "grad_norm": 1.1209323619076677, "learning_rate": 1.9942376388474282e-05, "loss": 0.6099, "step": 778 }, { "epoch": 0.06, "grad_norm": 1.0531072607859158, "learning_rate": 1.9942093846888912e-05, "loss": 0.654, "step": 779 }, { "epoch": 0.06, "grad_norm": 1.1245284813161793, "learning_rate": 1.9941810616325262e-05, "loss": 0.6879, "step": 780 }, { "epoch": 0.06, "grad_norm": 1.2806531327448978, "learning_rate": 1.994152669680295e-05, "loss": 0.6672, "step": 781 }, { "epoch": 0.06, "grad_norm": 1.1367446672914951, "learning_rate": 1.994124208834166e-05, "loss": 0.6449, "step": 782 }, { "epoch": 0.06, "grad_norm": 1.123488436414863, "learning_rate": 1.9940956790961108e-05, "loss": 0.6283, "step": 783 }, { "epoch": 0.06, "grad_norm": 1.1621898434373996, "learning_rate": 1.9940670804681068e-05, "loss": 0.6227, "step": 784 }, { "epoch": 0.06, "grad_norm": 1.1273964782620118, "learning_rate": 1.994038412952136e-05, "loss": 0.6902, "step": 785 }, { "epoch": 0.06, "grad_norm": 1.1176649212729894, "learning_rate": 1.994009676550185e-05, "loss": 0.6174, "step": 786 }, { "epoch": 0.06, "grad_norm": 1.1072199332026769, "learning_rate": 1.993980871264245e-05, "loss": 0.6046, "step": 787 }, { "epoch": 0.06, "grad_norm": 1.0486363905651077, "learning_rate": 1.993951997096312e-05, "loss": 0.5663, "step": 788 }, { "epoch": 0.06, "grad_norm": 1.033014159421138, "learning_rate": 1.9939230540483873e-05, "loss": 0.5875, "step": 789 }, { "epoch": 0.06, "grad_norm": 1.1065308629200603, "learning_rate": 1.9938940421224768e-05, "loss": 0.6169, "step": 790 }, { "epoch": 0.06, "grad_norm": 1.0786912963350155, "learning_rate": 1.9938649613205907e-05, "loss": 0.6306, "step": 791 }, { "epoch": 0.06, "grad_norm": 1.129046654293702, "learning_rate": 1.9938358116447444e-05, "loss": 0.631, "step": 792 }, { "epoch": 0.06, "grad_norm": 0.9863221302179781, "learning_rate": 1.9938065930969578e-05, "loss": 0.5559, "step": 793 }, { "epoch": 0.06, "grad_norm": 1.089501085757703, "learning_rate": 1.993777305679256e-05, "loss": 0.6775, "step": 794 }, { "epoch": 0.06, "grad_norm": 1.2022727340828454, "learning_rate": 1.993747949393668e-05, "loss": 0.6469, "step": 795 }, { "epoch": 0.06, "grad_norm": 1.1368424852076264, "learning_rate": 1.993718524242229e-05, "loss": 0.6475, "step": 796 }, { "epoch": 0.06, "grad_norm": 1.0820497403961937, "learning_rate": 1.9936890302269773e-05, "loss": 0.5831, "step": 797 }, { "epoch": 0.06, "grad_norm": 1.0898352110514833, "learning_rate": 1.9936594673499578e-05, "loss": 0.6381, "step": 798 }, { "epoch": 0.06, "grad_norm": 1.2294389908535341, "learning_rate": 1.993629835613218e-05, "loss": 0.4407, "step": 799 }, { "epoch": 0.07, "grad_norm": 1.0918414115098216, "learning_rate": 1.993600135018812e-05, "loss": 0.6081, "step": 800 }, { "epoch": 0.07, "grad_norm": 0.9537633730917028, "learning_rate": 1.9935703655687982e-05, "loss": 0.5166, "step": 801 }, { "epoch": 0.07, "grad_norm": 1.0660382716002714, "learning_rate": 1.993540527265239e-05, "loss": 0.6609, "step": 802 }, { "epoch": 0.07, "grad_norm": 1.1379226647047838, "learning_rate": 1.9935106201102032e-05, "loss": 0.6515, "step": 803 }, { "epoch": 0.07, "grad_norm": 1.0589300124561416, "learning_rate": 1.993480644105762e-05, "loss": 0.5814, "step": 804 }, { "epoch": 0.07, "grad_norm": 1.0175478881295723, "learning_rate": 1.9934505992539934e-05, "loss": 0.5841, "step": 805 }, { "epoch": 0.07, "grad_norm": 1.1396476742438806, "learning_rate": 1.99342048555698e-05, "loss": 0.6405, "step": 806 }, { "epoch": 0.07, "grad_norm": 1.1621349884764827, "learning_rate": 1.9933903030168075e-05, "loss": 0.7015, "step": 807 }, { "epoch": 0.07, "grad_norm": 1.0632458504231408, "learning_rate": 1.9933600516355684e-05, "loss": 0.6147, "step": 808 }, { "epoch": 0.07, "grad_norm": 1.06885761464636, "learning_rate": 1.9933297314153593e-05, "loss": 0.6383, "step": 809 }, { "epoch": 0.07, "grad_norm": 1.0962532335479325, "learning_rate": 1.99329934235828e-05, "loss": 0.5765, "step": 810 }, { "epoch": 0.07, "grad_norm": 1.1464346102920349, "learning_rate": 1.993268884466438e-05, "loss": 0.5723, "step": 811 }, { "epoch": 0.07, "grad_norm": 1.2509669034212627, "learning_rate": 1.9932383577419432e-05, "loss": 0.5771, "step": 812 }, { "epoch": 0.07, "grad_norm": 1.0414712734658653, "learning_rate": 1.9932077621869112e-05, "loss": 0.6552, "step": 813 }, { "epoch": 0.07, "grad_norm": 1.2035876318057512, "learning_rate": 1.993177097803462e-05, "loss": 0.6744, "step": 814 }, { "epoch": 0.07, "grad_norm": 1.118292884144844, "learning_rate": 1.993146364593721e-05, "loss": 0.6673, "step": 815 }, { "epoch": 0.07, "grad_norm": 1.2133348072449912, "learning_rate": 1.993115562559818e-05, "loss": 0.6326, "step": 816 }, { "epoch": 0.07, "grad_norm": 1.054058689703446, "learning_rate": 1.9930846917038873e-05, "loss": 0.6147, "step": 817 }, { "epoch": 0.07, "grad_norm": 1.2012351073159693, "learning_rate": 1.9930537520280684e-05, "loss": 0.6592, "step": 818 }, { "epoch": 0.07, "grad_norm": 1.164198742439426, "learning_rate": 1.9930227435345053e-05, "loss": 0.6111, "step": 819 }, { "epoch": 0.07, "grad_norm": 1.0662066688946203, "learning_rate": 1.992991666225347e-05, "loss": 0.5349, "step": 820 }, { "epoch": 0.07, "grad_norm": 0.999433802047172, "learning_rate": 1.9929605201027468e-05, "loss": 0.6664, "step": 821 }, { "epoch": 0.07, "grad_norm": 1.1356669429058914, "learning_rate": 1.9929293051688634e-05, "loss": 0.5944, "step": 822 }, { "epoch": 0.07, "grad_norm": 1.0915185271313848, "learning_rate": 1.9928980214258597e-05, "loss": 0.6321, "step": 823 }, { "epoch": 0.07, "grad_norm": 1.1512780011586605, "learning_rate": 1.992866668875904e-05, "loss": 0.6711, "step": 824 }, { "epoch": 0.07, "grad_norm": 1.211306666748613, "learning_rate": 1.992835247521169e-05, "loss": 0.6296, "step": 825 }, { "epoch": 0.07, "grad_norm": 1.1157941995912357, "learning_rate": 1.9928037573638316e-05, "loss": 0.6819, "step": 826 }, { "epoch": 0.07, "grad_norm": 1.1529125372179114, "learning_rate": 1.9927721984060747e-05, "loss": 0.6221, "step": 827 }, { "epoch": 0.07, "grad_norm": 1.172281554294927, "learning_rate": 1.992740570650085e-05, "loss": 0.662, "step": 828 }, { "epoch": 0.07, "grad_norm": 1.150850811381565, "learning_rate": 1.992708874098054e-05, "loss": 0.683, "step": 829 }, { "epoch": 0.07, "grad_norm": 1.0835533671026325, "learning_rate": 1.992677108752179e-05, "loss": 0.6373, "step": 830 }, { "epoch": 0.07, "grad_norm": 1.1311275755980512, "learning_rate": 1.9926452746146605e-05, "loss": 0.6024, "step": 831 }, { "epoch": 0.07, "grad_norm": 1.0663614825308445, "learning_rate": 1.992613371687705e-05, "loss": 0.4717, "step": 832 }, { "epoch": 0.07, "grad_norm": 1.122966022638328, "learning_rate": 1.9925813999735238e-05, "loss": 0.6189, "step": 833 }, { "epoch": 0.07, "grad_norm": 1.2502857431361363, "learning_rate": 1.992549359474332e-05, "loss": 0.6694, "step": 834 }, { "epoch": 0.07, "grad_norm": 0.9883676453300628, "learning_rate": 1.992517250192349e-05, "loss": 0.6629, "step": 835 }, { "epoch": 0.07, "grad_norm": 1.0843568044317529, "learning_rate": 1.9924850721298017e-05, "loss": 0.5797, "step": 836 }, { "epoch": 0.07, "grad_norm": 1.1302426860122095, "learning_rate": 1.992452825288919e-05, "loss": 0.6683, "step": 837 }, { "epoch": 0.07, "grad_norm": 1.1279702636357174, "learning_rate": 1.992420509671936e-05, "loss": 0.6624, "step": 838 }, { "epoch": 0.07, "grad_norm": 1.1548522444197566, "learning_rate": 1.9923881252810917e-05, "loss": 0.6606, "step": 839 }, { "epoch": 0.07, "grad_norm": 1.2189793417281167, "learning_rate": 1.9923556721186308e-05, "loss": 0.6906, "step": 840 }, { "epoch": 0.07, "grad_norm": 1.0057515507994603, "learning_rate": 1.9923231501868018e-05, "loss": 0.6407, "step": 841 }, { "epoch": 0.07, "grad_norm": 1.03480201906667, "learning_rate": 1.992290559487859e-05, "loss": 0.6212, "step": 842 }, { "epoch": 0.07, "grad_norm": 1.2312157692437504, "learning_rate": 1.9922579000240602e-05, "loss": 0.6539, "step": 843 }, { "epoch": 0.07, "grad_norm": 1.1510651239803475, "learning_rate": 1.9922251717976697e-05, "loss": 0.6423, "step": 844 }, { "epoch": 0.07, "grad_norm": 1.228234587897638, "learning_rate": 1.992192374810954e-05, "loss": 0.6523, "step": 845 }, { "epoch": 0.07, "grad_norm": 1.3035456156567933, "learning_rate": 1.9921595090661872e-05, "loss": 0.6438, "step": 846 }, { "epoch": 0.07, "grad_norm": 1.125010510695676, "learning_rate": 1.9921265745656466e-05, "loss": 0.6128, "step": 847 }, { "epoch": 0.07, "grad_norm": 1.0702335652144521, "learning_rate": 1.9920935713116144e-05, "loss": 0.6311, "step": 848 }, { "epoch": 0.07, "grad_norm": 1.1455341862560653, "learning_rate": 1.9920604993063777e-05, "loss": 0.6406, "step": 849 }, { "epoch": 0.07, "grad_norm": 1.203676850522986, "learning_rate": 1.992027358552228e-05, "loss": 0.7314, "step": 850 }, { "epoch": 0.07, "grad_norm": 1.0271676262087595, "learning_rate": 1.991994149051463e-05, "loss": 0.5971, "step": 851 }, { "epoch": 0.07, "grad_norm": 1.205644875995286, "learning_rate": 1.9919608708063826e-05, "loss": 0.6107, "step": 852 }, { "epoch": 0.07, "grad_norm": 1.2046978136025066, "learning_rate": 1.991927523819294e-05, "loss": 0.6406, "step": 853 }, { "epoch": 0.07, "grad_norm": 1.0945845324462102, "learning_rate": 1.991894108092508e-05, "loss": 0.5819, "step": 854 }, { "epoch": 0.07, "grad_norm": 1.1130735560807379, "learning_rate": 1.99186062362834e-05, "loss": 0.6191, "step": 855 }, { "epoch": 0.07, "grad_norm": 1.0389898930015686, "learning_rate": 1.9918270704291104e-05, "loss": 0.6716, "step": 856 }, { "epoch": 0.07, "grad_norm": 1.2716474232262376, "learning_rate": 1.991793448497145e-05, "loss": 0.6004, "step": 857 }, { "epoch": 0.07, "grad_norm": 1.0389131172541453, "learning_rate": 1.991759757834773e-05, "loss": 0.5947, "step": 858 }, { "epoch": 0.07, "grad_norm": 1.0950374146231157, "learning_rate": 1.9917259984443295e-05, "loss": 0.6049, "step": 859 }, { "epoch": 0.07, "grad_norm": 1.0364117726822706, "learning_rate": 1.991692170328154e-05, "loss": 0.6233, "step": 860 }, { "epoch": 0.07, "grad_norm": 1.1011938895926205, "learning_rate": 1.9916582734885906e-05, "loss": 0.7002, "step": 861 }, { "epoch": 0.07, "grad_norm": 1.061748573780894, "learning_rate": 1.991624307927989e-05, "loss": 0.5697, "step": 862 }, { "epoch": 0.07, "grad_norm": 1.0460051979027172, "learning_rate": 1.991590273648702e-05, "loss": 0.607, "step": 863 }, { "epoch": 0.07, "grad_norm": 1.0497730613084617, "learning_rate": 1.9915561706530882e-05, "loss": 0.5858, "step": 864 }, { "epoch": 0.07, "grad_norm": 1.0650382321114669, "learning_rate": 1.9915219989435117e-05, "loss": 0.6258, "step": 865 }, { "epoch": 0.07, "grad_norm": 1.0583106709875194, "learning_rate": 1.9914877585223403e-05, "loss": 0.6135, "step": 866 }, { "epoch": 0.07, "grad_norm": 1.073826426367363, "learning_rate": 1.9914534493919464e-05, "loss": 0.6618, "step": 867 }, { "epoch": 0.07, "grad_norm": 1.0532675508825666, "learning_rate": 1.991419071554708e-05, "loss": 0.6091, "step": 868 }, { "epoch": 0.07, "grad_norm": 1.126468696862115, "learning_rate": 1.9913846250130074e-05, "loss": 0.6568, "step": 869 }, { "epoch": 0.07, "grad_norm": 1.2196257672975, "learning_rate": 1.9913501097692312e-05, "loss": 0.6105, "step": 870 }, { "epoch": 0.07, "grad_norm": 1.1475489520819309, "learning_rate": 1.9913155258257724e-05, "loss": 0.5912, "step": 871 }, { "epoch": 0.07, "grad_norm": 1.1808552294384747, "learning_rate": 1.9912808731850265e-05, "loss": 0.6991, "step": 872 }, { "epoch": 0.07, "grad_norm": 1.1584108242615976, "learning_rate": 1.991246151849396e-05, "loss": 0.6977, "step": 873 }, { "epoch": 0.07, "grad_norm": 1.0289436915103616, "learning_rate": 1.9912113618212852e-05, "loss": 0.6501, "step": 874 }, { "epoch": 0.07, "grad_norm": 1.0439752354711855, "learning_rate": 1.991176503103107e-05, "loss": 0.6131, "step": 875 }, { "epoch": 0.07, "grad_norm": 1.1188632994545695, "learning_rate": 1.9911415756972764e-05, "loss": 0.6592, "step": 876 }, { "epoch": 0.07, "grad_norm": 1.2134771338604364, "learning_rate": 1.9911065796062137e-05, "loss": 0.6363, "step": 877 }, { "epoch": 0.07, "grad_norm": 1.1070321810757595, "learning_rate": 1.9910715148323438e-05, "loss": 0.6432, "step": 878 }, { "epoch": 0.07, "grad_norm": 1.1159201260679672, "learning_rate": 1.9910363813780975e-05, "loss": 0.5572, "step": 879 }, { "epoch": 0.07, "grad_norm": 1.082698575626692, "learning_rate": 1.9910011792459086e-05, "loss": 0.5655, "step": 880 }, { "epoch": 0.07, "grad_norm": 1.0551268128565945, "learning_rate": 1.9909659084382172e-05, "loss": 0.5494, "step": 881 }, { "epoch": 0.07, "grad_norm": 1.1463091432748589, "learning_rate": 1.9909305689574672e-05, "loss": 0.6349, "step": 882 }, { "epoch": 0.07, "grad_norm": 1.0274960646007008, "learning_rate": 1.9908951608061078e-05, "loss": 0.6146, "step": 883 }, { "epoch": 0.07, "grad_norm": 1.0541607414609717, "learning_rate": 1.9908596839865927e-05, "loss": 0.5856, "step": 884 }, { "epoch": 0.07, "grad_norm": 1.416168177554952, "learning_rate": 1.9908241385013804e-05, "loss": 0.4781, "step": 885 }, { "epoch": 0.07, "grad_norm": 1.068463683699775, "learning_rate": 1.990788524352934e-05, "loss": 0.5868, "step": 886 }, { "epoch": 0.07, "grad_norm": 1.1117140695290186, "learning_rate": 1.990752841543722e-05, "loss": 0.681, "step": 887 }, { "epoch": 0.07, "grad_norm": 1.108831597329571, "learning_rate": 1.9907170900762164e-05, "loss": 0.6391, "step": 888 }, { "epoch": 0.07, "grad_norm": 1.1082692126241613, "learning_rate": 1.9906812699528956e-05, "loss": 0.6302, "step": 889 }, { "epoch": 0.07, "grad_norm": 1.084315446777715, "learning_rate": 1.9906453811762415e-05, "loss": 0.5957, "step": 890 }, { "epoch": 0.07, "grad_norm": 1.0997233557871868, "learning_rate": 1.990609423748741e-05, "loss": 0.6264, "step": 891 }, { "epoch": 0.07, "grad_norm": 1.1273421993059851, "learning_rate": 1.9905733976728862e-05, "loss": 0.6095, "step": 892 }, { "epoch": 0.07, "grad_norm": 1.1057839655029467, "learning_rate": 1.990537302951174e-05, "loss": 0.571, "step": 893 }, { "epoch": 0.07, "grad_norm": 1.0514643831580244, "learning_rate": 1.9905011395861048e-05, "loss": 0.5988, "step": 894 }, { "epoch": 0.07, "grad_norm": 1.0361450310583142, "learning_rate": 1.9904649075801852e-05, "loss": 0.5558, "step": 895 }, { "epoch": 0.07, "grad_norm": 1.150559012228703, "learning_rate": 1.9904286069359263e-05, "loss": 0.6765, "step": 896 }, { "epoch": 0.07, "grad_norm": 1.005249808253615, "learning_rate": 1.9903922376558432e-05, "loss": 0.5878, "step": 897 }, { "epoch": 0.07, "grad_norm": 0.9205219013292413, "learning_rate": 1.9903557997424565e-05, "loss": 0.5287, "step": 898 }, { "epoch": 0.07, "grad_norm": 1.0380631233474868, "learning_rate": 1.9903192931982916e-05, "loss": 0.635, "step": 899 }, { "epoch": 0.07, "grad_norm": 1.1151631244071423, "learning_rate": 1.9902827180258778e-05, "loss": 0.6371, "step": 900 }, { "epoch": 0.07, "grad_norm": 1.1385580867661342, "learning_rate": 1.99024607422775e-05, "loss": 0.6381, "step": 901 }, { "epoch": 0.07, "grad_norm": 1.1749410505707398, "learning_rate": 1.9902093618064483e-05, "loss": 0.676, "step": 902 }, { "epoch": 0.07, "grad_norm": 1.0927232181593396, "learning_rate": 1.9901725807645154e-05, "loss": 0.6469, "step": 903 }, { "epoch": 0.07, "grad_norm": 1.0974191506180526, "learning_rate": 1.990135731104501e-05, "loss": 0.5542, "step": 904 }, { "epoch": 0.07, "grad_norm": 1.124203680324317, "learning_rate": 1.9900988128289593e-05, "loss": 0.6122, "step": 905 }, { "epoch": 0.07, "grad_norm": 1.0088657889858017, "learning_rate": 1.990061825940447e-05, "loss": 0.6634, "step": 906 }, { "epoch": 0.07, "grad_norm": 1.0044706318253844, "learning_rate": 1.990024770441529e-05, "loss": 0.5614, "step": 907 }, { "epoch": 0.07, "grad_norm": 1.1291401742975522, "learning_rate": 1.9899876463347727e-05, "loss": 0.5792, "step": 908 }, { "epoch": 0.07, "grad_norm": 1.1340186318427607, "learning_rate": 1.9899504536227505e-05, "loss": 0.6871, "step": 909 }, { "epoch": 0.07, "grad_norm": 1.1230863530970432, "learning_rate": 1.98991319230804e-05, "loss": 0.6134, "step": 910 }, { "epoch": 0.07, "grad_norm": 1.1644758801324757, "learning_rate": 1.989875862393223e-05, "loss": 0.5701, "step": 911 }, { "epoch": 0.07, "grad_norm": 1.094310361279521, "learning_rate": 1.989838463880887e-05, "loss": 0.6342, "step": 912 }, { "epoch": 0.07, "grad_norm": 1.0246166924294686, "learning_rate": 1.9898009967736236e-05, "loss": 0.5564, "step": 913 }, { "epoch": 0.07, "grad_norm": 1.029422191382958, "learning_rate": 1.989763461074029e-05, "loss": 0.5958, "step": 914 }, { "epoch": 0.07, "grad_norm": 1.107913272959802, "learning_rate": 1.989725856784704e-05, "loss": 0.5847, "step": 915 }, { "epoch": 0.07, "grad_norm": 1.096318807524696, "learning_rate": 1.9896881839082554e-05, "loss": 0.6037, "step": 916 }, { "epoch": 0.07, "grad_norm": 1.2018583652830437, "learning_rate": 1.9896504424472936e-05, "loss": 0.6224, "step": 917 }, { "epoch": 0.07, "grad_norm": 1.1961833371889385, "learning_rate": 1.9896126324044338e-05, "loss": 0.6426, "step": 918 }, { "epoch": 0.07, "grad_norm": 1.2817682573246538, "learning_rate": 1.9895747537822965e-05, "loss": 0.6955, "step": 919 }, { "epoch": 0.07, "grad_norm": 1.14955929893788, "learning_rate": 1.989536806583506e-05, "loss": 0.6328, "step": 920 }, { "epoch": 0.07, "grad_norm": 1.1540859308447506, "learning_rate": 1.9894987908106933e-05, "loss": 0.6459, "step": 921 }, { "epoch": 0.07, "grad_norm": 1.0375748859320604, "learning_rate": 1.9894607064664914e-05, "loss": 0.6201, "step": 922 }, { "epoch": 0.08, "grad_norm": 1.1298354214589668, "learning_rate": 1.9894225535535407e-05, "loss": 0.6913, "step": 923 }, { "epoch": 0.08, "grad_norm": 1.0021066744486649, "learning_rate": 1.9893843320744845e-05, "loss": 0.5619, "step": 924 }, { "epoch": 0.08, "grad_norm": 1.0168328583368194, "learning_rate": 1.9893460420319716e-05, "loss": 0.6607, "step": 925 }, { "epoch": 0.08, "grad_norm": 0.9896091925300838, "learning_rate": 1.9893076834286557e-05, "loss": 0.5867, "step": 926 }, { "epoch": 0.08, "grad_norm": 1.2502531797998122, "learning_rate": 1.9892692562671944e-05, "loss": 0.718, "step": 927 }, { "epoch": 0.08, "grad_norm": 1.0353809825535139, "learning_rate": 1.9892307605502514e-05, "loss": 0.5753, "step": 928 }, { "epoch": 0.08, "grad_norm": 1.0514084405024644, "learning_rate": 1.9891921962804942e-05, "loss": 0.5604, "step": 929 }, { "epoch": 0.08, "grad_norm": 1.0772823283544717, "learning_rate": 1.9891535634605954e-05, "loss": 0.5712, "step": 930 }, { "epoch": 0.08, "grad_norm": 1.0807441080391822, "learning_rate": 1.989114862093232e-05, "loss": 0.6179, "step": 931 }, { "epoch": 0.08, "grad_norm": 1.0458888781155307, "learning_rate": 1.9890760921810856e-05, "loss": 0.6274, "step": 932 }, { "epoch": 0.08, "grad_norm": 1.0511812832675538, "learning_rate": 1.9890372537268433e-05, "loss": 0.6443, "step": 933 }, { "epoch": 0.08, "grad_norm": 1.078728574604796, "learning_rate": 1.988998346733197e-05, "loss": 0.6147, "step": 934 }, { "epoch": 0.08, "grad_norm": 1.1166802869475498, "learning_rate": 1.9889593712028422e-05, "loss": 0.6403, "step": 935 }, { "epoch": 0.08, "grad_norm": 1.0624234559060666, "learning_rate": 1.9889203271384803e-05, "loss": 0.5563, "step": 936 }, { "epoch": 0.08, "grad_norm": 1.0577128119712451, "learning_rate": 1.9888812145428172e-05, "loss": 0.673, "step": 937 }, { "epoch": 0.08, "grad_norm": 1.1039049361248896, "learning_rate": 1.9888420334185627e-05, "loss": 0.6499, "step": 938 }, { "epoch": 0.08, "grad_norm": 1.2272573165212521, "learning_rate": 1.9888027837684326e-05, "loss": 0.6061, "step": 939 }, { "epoch": 0.08, "grad_norm": 1.1160443934529196, "learning_rate": 1.9887634655951464e-05, "loss": 0.6287, "step": 940 }, { "epoch": 0.08, "grad_norm": 1.104634072856478, "learning_rate": 1.988724078901429e-05, "loss": 0.6851, "step": 941 }, { "epoch": 0.08, "grad_norm": 1.105786043699008, "learning_rate": 1.9886846236900102e-05, "loss": 0.7122, "step": 942 }, { "epoch": 0.08, "grad_norm": 1.0171741574616147, "learning_rate": 1.9886450999636243e-05, "loss": 0.5775, "step": 943 }, { "epoch": 0.08, "grad_norm": 1.2819694827608143, "learning_rate": 1.9886055077250092e-05, "loss": 0.7037, "step": 944 }, { "epoch": 0.08, "grad_norm": 1.1271597552993238, "learning_rate": 1.9885658469769094e-05, "loss": 0.5979, "step": 945 }, { "epoch": 0.08, "grad_norm": 1.1455028853874523, "learning_rate": 1.9885261177220737e-05, "loss": 0.5962, "step": 946 }, { "epoch": 0.08, "grad_norm": 1.0639290716254242, "learning_rate": 1.9884863199632546e-05, "loss": 0.6347, "step": 947 }, { "epoch": 0.08, "grad_norm": 1.0144440835622914, "learning_rate": 1.9884464537032103e-05, "loss": 0.6397, "step": 948 }, { "epoch": 0.08, "grad_norm": 1.036460767711225, "learning_rate": 1.9884065189447036e-05, "loss": 0.6134, "step": 949 }, { "epoch": 0.08, "grad_norm": 1.0006656100539892, "learning_rate": 1.9883665156905015e-05, "loss": 0.6241, "step": 950 }, { "epoch": 0.08, "grad_norm": 1.058228820011543, "learning_rate": 1.988326443943377e-05, "loss": 0.6417, "step": 951 }, { "epoch": 0.08, "grad_norm": 1.1734905485423708, "learning_rate": 1.988286303706106e-05, "loss": 0.626, "step": 952 }, { "epoch": 0.08, "grad_norm": 1.1270162617667199, "learning_rate": 1.9882460949814716e-05, "loss": 0.5621, "step": 953 }, { "epoch": 0.08, "grad_norm": 1.1408949836430282, "learning_rate": 1.988205817772259e-05, "loss": 0.6351, "step": 954 }, { "epoch": 0.08, "grad_norm": 1.1396736021976535, "learning_rate": 1.9881654720812594e-05, "loss": 0.6667, "step": 955 }, { "epoch": 0.08, "grad_norm": 1.0169098488256985, "learning_rate": 1.9881250579112694e-05, "loss": 0.6072, "step": 956 }, { "epoch": 0.08, "grad_norm": 1.0907079731731821, "learning_rate": 1.9880845752650896e-05, "loss": 0.5655, "step": 957 }, { "epoch": 0.08, "grad_norm": 1.169059792113472, "learning_rate": 1.988044024145525e-05, "loss": 0.6416, "step": 958 }, { "epoch": 0.08, "grad_norm": 1.1156233349996894, "learning_rate": 1.9880034045553858e-05, "loss": 0.6269, "step": 959 }, { "epoch": 0.08, "grad_norm": 1.1084136434129432, "learning_rate": 1.9879627164974868e-05, "loss": 0.6429, "step": 960 }, { "epoch": 0.08, "grad_norm": 1.1176885510223709, "learning_rate": 1.9879219599746486e-05, "loss": 0.6685, "step": 961 }, { "epoch": 0.08, "grad_norm": 0.8867395514211373, "learning_rate": 1.987881134989694e-05, "loss": 0.4933, "step": 962 }, { "epoch": 0.08, "grad_norm": 0.990682541637075, "learning_rate": 1.9878402415454534e-05, "loss": 0.6724, "step": 963 }, { "epoch": 0.08, "grad_norm": 1.0056387200473818, "learning_rate": 1.9877992796447604e-05, "loss": 0.5682, "step": 964 }, { "epoch": 0.08, "grad_norm": 1.1467756136126264, "learning_rate": 1.9877582492904533e-05, "loss": 0.695, "step": 965 }, { "epoch": 0.08, "grad_norm": 1.1088054224503603, "learning_rate": 1.987717150485376e-05, "loss": 0.6608, "step": 966 }, { "epoch": 0.08, "grad_norm": 1.0436562408251457, "learning_rate": 1.9876759832323756e-05, "loss": 0.6355, "step": 967 }, { "epoch": 0.08, "grad_norm": 1.024013032934522, "learning_rate": 1.9876347475343062e-05, "loss": 0.6029, "step": 968 }, { "epoch": 0.08, "grad_norm": 1.123425037307103, "learning_rate": 1.9875934433940248e-05, "loss": 0.6084, "step": 969 }, { "epoch": 0.08, "grad_norm": 1.0661739928360128, "learning_rate": 1.9875520708143933e-05, "loss": 0.6411, "step": 970 }, { "epoch": 0.08, "grad_norm": 1.0218598836399087, "learning_rate": 1.9875106297982798e-05, "loss": 0.6291, "step": 971 }, { "epoch": 0.08, "grad_norm": 1.1134477148440822, "learning_rate": 1.987469120348555e-05, "loss": 0.6534, "step": 972 }, { "epoch": 0.08, "grad_norm": 1.1517210895453271, "learning_rate": 1.9874275424680966e-05, "loss": 0.6704, "step": 973 }, { "epoch": 0.08, "grad_norm": 1.1140417226147281, "learning_rate": 1.987385896159785e-05, "loss": 0.621, "step": 974 }, { "epoch": 0.08, "grad_norm": 1.015703441507511, "learning_rate": 1.987344181426507e-05, "loss": 0.6387, "step": 975 }, { "epoch": 0.08, "grad_norm": 1.031557022872771, "learning_rate": 1.987302398271153e-05, "loss": 0.619, "step": 976 }, { "epoch": 0.08, "grad_norm": 1.179749176771803, "learning_rate": 1.987260546696618e-05, "loss": 0.62, "step": 977 }, { "epoch": 0.08, "grad_norm": 1.1224651935196714, "learning_rate": 1.987218626705803e-05, "loss": 0.677, "step": 978 }, { "epoch": 0.08, "grad_norm": 1.0949746162974316, "learning_rate": 1.9871766383016127e-05, "loss": 0.6365, "step": 979 }, { "epoch": 0.08, "grad_norm": 1.0828147418781917, "learning_rate": 1.9871345814869575e-05, "loss": 0.6582, "step": 980 }, { "epoch": 0.08, "grad_norm": 1.0417686332976923, "learning_rate": 1.9870924562647512e-05, "loss": 0.6457, "step": 981 }, { "epoch": 0.08, "grad_norm": 1.2736955707692121, "learning_rate": 1.9870502626379127e-05, "loss": 0.6502, "step": 982 }, { "epoch": 0.08, "grad_norm": 1.0004273125024994, "learning_rate": 1.9870080006093674e-05, "loss": 0.578, "step": 983 }, { "epoch": 0.08, "grad_norm": 1.1096665328492759, "learning_rate": 1.9869656701820424e-05, "loss": 0.5603, "step": 984 }, { "epoch": 0.08, "grad_norm": 1.2125446185211894, "learning_rate": 1.9869232713588724e-05, "loss": 0.464, "step": 985 }, { "epoch": 0.08, "grad_norm": 1.1221372062657058, "learning_rate": 1.9868808041427948e-05, "loss": 0.5887, "step": 986 }, { "epoch": 0.08, "grad_norm": 0.996001225280882, "learning_rate": 1.9868382685367533e-05, "loss": 0.6238, "step": 987 }, { "epoch": 0.08, "grad_norm": 1.1367037471800612, "learning_rate": 1.9867956645436944e-05, "loss": 0.6152, "step": 988 }, { "epoch": 0.08, "grad_norm": 1.1129390935514154, "learning_rate": 1.9867529921665713e-05, "loss": 0.6287, "step": 989 }, { "epoch": 0.08, "grad_norm": 1.2269397679391247, "learning_rate": 1.9867102514083415e-05, "loss": 0.6655, "step": 990 }, { "epoch": 0.08, "grad_norm": 1.26827046275962, "learning_rate": 1.9866674422719666e-05, "loss": 0.6604, "step": 991 }, { "epoch": 0.08, "grad_norm": 1.0594102839215263, "learning_rate": 1.9866245647604128e-05, "loss": 0.6356, "step": 992 }, { "epoch": 0.08, "grad_norm": 1.0773242595576222, "learning_rate": 1.9865816188766516e-05, "loss": 0.6223, "step": 993 }, { "epoch": 0.08, "grad_norm": 1.1461188807936942, "learning_rate": 1.9865386046236597e-05, "loss": 0.6325, "step": 994 }, { "epoch": 0.08, "grad_norm": 1.2375769131659358, "learning_rate": 1.9864955220044175e-05, "loss": 0.6041, "step": 995 }, { "epoch": 0.08, "grad_norm": 1.1529673106809384, "learning_rate": 1.9864523710219107e-05, "loss": 0.6545, "step": 996 }, { "epoch": 0.08, "grad_norm": 1.0715029734939707, "learning_rate": 1.986409151679129e-05, "loss": 0.567, "step": 997 }, { "epoch": 0.08, "grad_norm": 0.9652259721065576, "learning_rate": 1.9863658639790686e-05, "loss": 0.622, "step": 998 }, { "epoch": 0.08, "grad_norm": 1.1205944623699782, "learning_rate": 1.9863225079247286e-05, "loss": 0.6425, "step": 999 }, { "epoch": 0.08, "grad_norm": 1.251314945813738, "learning_rate": 1.9862790835191137e-05, "loss": 0.6876, "step": 1000 }, { "epoch": 0.08, "grad_norm": 0.9323839167997477, "learning_rate": 1.9862355907652332e-05, "loss": 0.5943, "step": 1001 }, { "epoch": 0.08, "grad_norm": 0.9863978580378259, "learning_rate": 1.986192029666101e-05, "loss": 0.5605, "step": 1002 }, { "epoch": 0.08, "grad_norm": 1.0486628796809216, "learning_rate": 1.9861484002247357e-05, "loss": 0.5606, "step": 1003 }, { "epoch": 0.08, "grad_norm": 1.1196085211902809, "learning_rate": 1.9861047024441614e-05, "loss": 0.6215, "step": 1004 }, { "epoch": 0.08, "grad_norm": 1.0658424922162064, "learning_rate": 1.9860609363274056e-05, "loss": 0.6589, "step": 1005 }, { "epoch": 0.08, "grad_norm": 1.0050420253517978, "learning_rate": 1.9860171018775018e-05, "loss": 0.5239, "step": 1006 }, { "epoch": 0.08, "grad_norm": 1.0584283466966222, "learning_rate": 1.9859731990974867e-05, "loss": 0.6244, "step": 1007 }, { "epoch": 0.08, "grad_norm": 1.0585855584799557, "learning_rate": 1.9859292279904043e-05, "loss": 0.5561, "step": 1008 }, { "epoch": 0.08, "grad_norm": 0.9990564994313156, "learning_rate": 1.9858851885593004e-05, "loss": 0.4815, "step": 1009 }, { "epoch": 0.08, "grad_norm": 1.0178981238404532, "learning_rate": 1.9858410808072278e-05, "loss": 0.58, "step": 1010 }, { "epoch": 0.08, "grad_norm": 0.9590816481827416, "learning_rate": 1.9857969047372422e-05, "loss": 0.5601, "step": 1011 }, { "epoch": 0.08, "grad_norm": 1.0622275660970883, "learning_rate": 1.985752660352406e-05, "loss": 0.5345, "step": 1012 }, { "epoch": 0.08, "grad_norm": 1.1863129467779967, "learning_rate": 1.9857083476557846e-05, "loss": 0.6909, "step": 1013 }, { "epoch": 0.08, "grad_norm": 1.052063234705519, "learning_rate": 1.9856639666504492e-05, "loss": 0.5748, "step": 1014 }, { "epoch": 0.08, "grad_norm": 1.002701005360324, "learning_rate": 1.9856195173394754e-05, "loss": 0.589, "step": 1015 }, { "epoch": 0.08, "grad_norm": 0.938763483372456, "learning_rate": 1.985574999725943e-05, "loss": 0.5357, "step": 1016 }, { "epoch": 0.08, "grad_norm": 1.0966699932094541, "learning_rate": 1.985530413812937e-05, "loss": 0.6467, "step": 1017 }, { "epoch": 0.08, "grad_norm": 0.9847762667527534, "learning_rate": 1.9854857596035476e-05, "loss": 0.636, "step": 1018 }, { "epoch": 0.08, "grad_norm": 1.1229985693999278, "learning_rate": 1.9854410371008693e-05, "loss": 0.6408, "step": 1019 }, { "epoch": 0.08, "grad_norm": 1.0914436618280507, "learning_rate": 1.9853962463080013e-05, "loss": 0.6649, "step": 1020 }, { "epoch": 0.08, "grad_norm": 1.063621268903449, "learning_rate": 1.9853513872280476e-05, "loss": 0.6554, "step": 1021 }, { "epoch": 0.08, "grad_norm": 1.0312095056964494, "learning_rate": 1.985306459864117e-05, "loss": 0.6262, "step": 1022 }, { "epoch": 0.08, "grad_norm": 1.117013859147517, "learning_rate": 1.985261464219322e-05, "loss": 0.6764, "step": 1023 }, { "epoch": 0.08, "grad_norm": 1.036906745742879, "learning_rate": 1.9852164002967818e-05, "loss": 0.6228, "step": 1024 }, { "epoch": 0.08, "grad_norm": 1.0208147244129753, "learning_rate": 1.9851712680996188e-05, "loss": 0.6193, "step": 1025 }, { "epoch": 0.08, "grad_norm": 0.9589392180387819, "learning_rate": 1.985126067630961e-05, "loss": 0.6018, "step": 1026 }, { "epoch": 0.08, "grad_norm": 1.1675221592473077, "learning_rate": 1.9850807988939405e-05, "loss": 0.6075, "step": 1027 }, { "epoch": 0.08, "grad_norm": 1.0544466663271506, "learning_rate": 1.9850354618916942e-05, "loss": 0.6635, "step": 1028 }, { "epoch": 0.08, "grad_norm": 1.0481367773090586, "learning_rate": 1.9849900566273642e-05, "loss": 0.5868, "step": 1029 }, { "epoch": 0.08, "grad_norm": 0.969942783915277, "learning_rate": 1.984944583104097e-05, "loss": 0.5484, "step": 1030 }, { "epoch": 0.08, "grad_norm": 0.9991758991063624, "learning_rate": 1.9848990413250436e-05, "loss": 0.5921, "step": 1031 }, { "epoch": 0.08, "grad_norm": 0.9426679105129601, "learning_rate": 1.9848534312933606e-05, "loss": 0.5454, "step": 1032 }, { "epoch": 0.08, "grad_norm": 1.108668062134207, "learning_rate": 1.9848077530122083e-05, "loss": 0.6393, "step": 1033 }, { "epoch": 0.08, "grad_norm": 1.163748991275181, "learning_rate": 1.9847620064847522e-05, "loss": 0.6919, "step": 1034 }, { "epoch": 0.08, "grad_norm": 1.1011142742465692, "learning_rate": 1.9847161917141626e-05, "loss": 0.6098, "step": 1035 }, { "epoch": 0.08, "grad_norm": 1.1603642028495122, "learning_rate": 1.984670308703614e-05, "loss": 0.6514, "step": 1036 }, { "epoch": 0.08, "grad_norm": 0.9787388127791858, "learning_rate": 1.9846243574562866e-05, "loss": 0.5918, "step": 1037 }, { "epoch": 0.08, "grad_norm": 1.1162496201649623, "learning_rate": 1.9845783379753648e-05, "loss": 0.627, "step": 1038 }, { "epoch": 0.08, "grad_norm": 1.104476587435251, "learning_rate": 1.9845322502640374e-05, "loss": 0.5546, "step": 1039 }, { "epoch": 0.08, "grad_norm": 1.1535168566665752, "learning_rate": 1.9844860943254983e-05, "loss": 0.6147, "step": 1040 }, { "epoch": 0.08, "grad_norm": 1.1166245608533367, "learning_rate": 1.984439870162946e-05, "loss": 0.6482, "step": 1041 }, { "epoch": 0.08, "grad_norm": 1.0427963654809782, "learning_rate": 1.984393577779584e-05, "loss": 0.5811, "step": 1042 }, { "epoch": 0.08, "grad_norm": 1.0767589535184312, "learning_rate": 1.9843472171786204e-05, "loss": 0.7003, "step": 1043 }, { "epoch": 0.08, "grad_norm": 0.9479657664193446, "learning_rate": 1.9843007883632674e-05, "loss": 0.5707, "step": 1044 }, { "epoch": 0.08, "grad_norm": 1.0463700626631678, "learning_rate": 1.984254291336743e-05, "loss": 0.6024, "step": 1045 }, { "epoch": 0.09, "grad_norm": 1.0773084022527417, "learning_rate": 1.984207726102269e-05, "loss": 0.6155, "step": 1046 }, { "epoch": 0.09, "grad_norm": 1.0902382782675282, "learning_rate": 1.984161092663073e-05, "loss": 0.5474, "step": 1047 }, { "epoch": 0.09, "grad_norm": 0.9644531785730229, "learning_rate": 1.984114391022386e-05, "loss": 0.529, "step": 1048 }, { "epoch": 0.09, "grad_norm": 1.127131720367706, "learning_rate": 1.984067621183445e-05, "loss": 0.5964, "step": 1049 }, { "epoch": 0.09, "grad_norm": 1.094880066095062, "learning_rate": 1.9840207831494903e-05, "loss": 0.6238, "step": 1050 }, { "epoch": 0.09, "grad_norm": 1.1393198512420912, "learning_rate": 1.983973876923768e-05, "loss": 0.7522, "step": 1051 }, { "epoch": 0.09, "grad_norm": 1.0059937311188745, "learning_rate": 1.9839269025095293e-05, "loss": 0.5853, "step": 1052 }, { "epoch": 0.09, "grad_norm": 1.4109933801849883, "learning_rate": 1.9838798599100286e-05, "loss": 0.6001, "step": 1053 }, { "epoch": 0.09, "grad_norm": 1.1132444057463866, "learning_rate": 1.9838327491285266e-05, "loss": 0.5767, "step": 1054 }, { "epoch": 0.09, "grad_norm": 0.9701633973894861, "learning_rate": 1.9837855701682875e-05, "loss": 0.5808, "step": 1055 }, { "epoch": 0.09, "grad_norm": 1.1247541865700246, "learning_rate": 1.983738323032581e-05, "loss": 0.6039, "step": 1056 }, { "epoch": 0.09, "grad_norm": 1.0092591983705888, "learning_rate": 1.9836910077246813e-05, "loss": 0.6334, "step": 1057 }, { "epoch": 0.09, "grad_norm": 0.9705654534952596, "learning_rate": 1.9836436242478676e-05, "loss": 0.5847, "step": 1058 }, { "epoch": 0.09, "grad_norm": 0.9904458846833634, "learning_rate": 1.9835961726054228e-05, "loss": 0.6517, "step": 1059 }, { "epoch": 0.09, "grad_norm": 1.1197036023727012, "learning_rate": 1.983548652800636e-05, "loss": 0.6023, "step": 1060 }, { "epoch": 0.09, "grad_norm": 1.1771133490975412, "learning_rate": 1.9835010648368e-05, "loss": 0.6963, "step": 1061 }, { "epoch": 0.09, "grad_norm": 1.0699175937730134, "learning_rate": 1.9834534087172126e-05, "loss": 0.6147, "step": 1062 }, { "epoch": 0.09, "grad_norm": 1.1551718492888678, "learning_rate": 1.983405684445176e-05, "loss": 0.6549, "step": 1063 }, { "epoch": 0.09, "grad_norm": 1.0603070951538438, "learning_rate": 1.983357892023998e-05, "loss": 0.6094, "step": 1064 }, { "epoch": 0.09, "grad_norm": 1.0402062222116446, "learning_rate": 1.98331003145699e-05, "loss": 0.6219, "step": 1065 }, { "epoch": 0.09, "grad_norm": 0.9939060911914949, "learning_rate": 1.983262102747469e-05, "loss": 0.6104, "step": 1066 }, { "epoch": 0.09, "grad_norm": 0.9774031117289627, "learning_rate": 1.983214105898757e-05, "loss": 0.5856, "step": 1067 }, { "epoch": 0.09, "grad_norm": 1.0914284577741291, "learning_rate": 1.983166040914179e-05, "loss": 0.5925, "step": 1068 }, { "epoch": 0.09, "grad_norm": 1.0680432076381006, "learning_rate": 1.983117907797067e-05, "loss": 0.682, "step": 1069 }, { "epoch": 0.09, "grad_norm": 1.0942550327326452, "learning_rate": 1.9830697065507554e-05, "loss": 0.5849, "step": 1070 }, { "epoch": 0.09, "grad_norm": 1.0321035757944577, "learning_rate": 1.9830214371785858e-05, "loss": 0.5567, "step": 1071 }, { "epoch": 0.09, "grad_norm": 1.0419076031734429, "learning_rate": 1.982973099683902e-05, "loss": 0.6708, "step": 1072 }, { "epoch": 0.09, "grad_norm": 1.023472160735429, "learning_rate": 1.9829246940700543e-05, "loss": 0.6217, "step": 1073 }, { "epoch": 0.09, "grad_norm": 0.9986553016654867, "learning_rate": 1.9828762203403973e-05, "loss": 0.5932, "step": 1074 }, { "epoch": 0.09, "grad_norm": 1.078259151860152, "learning_rate": 1.98282767849829e-05, "loss": 0.6501, "step": 1075 }, { "epoch": 0.09, "grad_norm": 0.9435008967700403, "learning_rate": 1.9827790685470963e-05, "loss": 0.5278, "step": 1076 }, { "epoch": 0.09, "grad_norm": 1.058301399489676, "learning_rate": 1.9827303904901853e-05, "loss": 0.6602, "step": 1077 }, { "epoch": 0.09, "grad_norm": 1.1175064502389007, "learning_rate": 1.9826816443309294e-05, "loss": 0.6301, "step": 1078 }, { "epoch": 0.09, "grad_norm": 0.99965556015855, "learning_rate": 1.9826328300727074e-05, "loss": 0.6088, "step": 1079 }, { "epoch": 0.09, "grad_norm": 1.1386141728126398, "learning_rate": 1.9825839477189017e-05, "loss": 0.676, "step": 1080 }, { "epoch": 0.09, "grad_norm": 1.082303668746878, "learning_rate": 1.9825349972729003e-05, "loss": 0.6496, "step": 1081 }, { "epoch": 0.09, "grad_norm": 0.9824070847379709, "learning_rate": 1.982485978738095e-05, "loss": 0.6093, "step": 1082 }, { "epoch": 0.09, "grad_norm": 1.0204342899951846, "learning_rate": 1.9824368921178825e-05, "loss": 0.6787, "step": 1083 }, { "epoch": 0.09, "grad_norm": 1.004722031213302, "learning_rate": 1.9823877374156647e-05, "loss": 0.6069, "step": 1084 }, { "epoch": 0.09, "grad_norm": 1.036218483815217, "learning_rate": 1.9823385146348485e-05, "loss": 0.6353, "step": 1085 }, { "epoch": 0.09, "grad_norm": 1.0611860755386595, "learning_rate": 1.9822892237788448e-05, "loss": 0.5991, "step": 1086 }, { "epoch": 0.09, "grad_norm": 1.1588271079524333, "learning_rate": 1.9822398648510684e-05, "loss": 0.7502, "step": 1087 }, { "epoch": 0.09, "grad_norm": 1.03100217892194, "learning_rate": 1.9821904378549414e-05, "loss": 0.5934, "step": 1088 }, { "epoch": 0.09, "grad_norm": 1.052827521858649, "learning_rate": 1.9821409427938878e-05, "loss": 0.6325, "step": 1089 }, { "epoch": 0.09, "grad_norm": 0.9408025909515155, "learning_rate": 1.982091379671338e-05, "loss": 0.5921, "step": 1090 }, { "epoch": 0.09, "grad_norm": 1.1543920923976936, "learning_rate": 1.982041748490727e-05, "loss": 0.6921, "step": 1091 }, { "epoch": 0.09, "grad_norm": 1.0745995961987254, "learning_rate": 1.9819920492554935e-05, "loss": 0.651, "step": 1092 }, { "epoch": 0.09, "grad_norm": 1.101414274214897, "learning_rate": 1.9819422819690824e-05, "loss": 0.6983, "step": 1093 }, { "epoch": 0.09, "grad_norm": 1.0657813614519533, "learning_rate": 1.9818924466349422e-05, "loss": 0.6255, "step": 1094 }, { "epoch": 0.09, "grad_norm": 1.0039151439925784, "learning_rate": 1.981842543256526e-05, "loss": 0.613, "step": 1095 }, { "epoch": 0.09, "grad_norm": 0.9727132720114751, "learning_rate": 1.981792571837293e-05, "loss": 0.6061, "step": 1096 }, { "epoch": 0.09, "grad_norm": 1.089952183620595, "learning_rate": 1.981742532380705e-05, "loss": 0.6576, "step": 1097 }, { "epoch": 0.09, "grad_norm": 1.1378152254765532, "learning_rate": 1.9816924248902304e-05, "loss": 0.6077, "step": 1098 }, { "epoch": 0.09, "grad_norm": 1.0000243008860976, "learning_rate": 1.9816422493693417e-05, "loss": 0.6, "step": 1099 }, { "epoch": 0.09, "grad_norm": 1.0262601717430377, "learning_rate": 1.9815920058215157e-05, "loss": 0.5675, "step": 1100 }, { "epoch": 0.09, "grad_norm": 1.0424400127184634, "learning_rate": 1.9815416942502346e-05, "loss": 0.6893, "step": 1101 }, { "epoch": 0.09, "grad_norm": 1.1959462863993415, "learning_rate": 1.9814913146589847e-05, "loss": 0.621, "step": 1102 }, { "epoch": 0.09, "grad_norm": 1.0532552468562892, "learning_rate": 1.9814408670512572e-05, "loss": 0.6032, "step": 1103 }, { "epoch": 0.09, "grad_norm": 1.0106259099736372, "learning_rate": 1.981390351430548e-05, "loss": 0.5914, "step": 1104 }, { "epoch": 0.09, "grad_norm": 1.1403825524665405, "learning_rate": 1.981339767800358e-05, "loss": 0.6111, "step": 1105 }, { "epoch": 0.09, "grad_norm": 1.043511667204388, "learning_rate": 1.9812891161641927e-05, "loss": 0.6132, "step": 1106 }, { "epoch": 0.09, "grad_norm": 1.1476441506456099, "learning_rate": 1.981238396525562e-05, "loss": 0.6988, "step": 1107 }, { "epoch": 0.09, "grad_norm": 1.0601807753560835, "learning_rate": 1.9811876088879808e-05, "loss": 0.6176, "step": 1108 }, { "epoch": 0.09, "grad_norm": 1.1079517298243802, "learning_rate": 1.9811367532549686e-05, "loss": 0.5924, "step": 1109 }, { "epoch": 0.09, "grad_norm": 1.0216589393619098, "learning_rate": 1.9810858296300496e-05, "loss": 0.5809, "step": 1110 }, { "epoch": 0.09, "grad_norm": 1.1727244067131004, "learning_rate": 1.9810348380167527e-05, "loss": 0.666, "step": 1111 }, { "epoch": 0.09, "grad_norm": 1.0590507767162933, "learning_rate": 1.9809837784186117e-05, "loss": 0.5926, "step": 1112 }, { "epoch": 0.09, "grad_norm": 1.0800020752477553, "learning_rate": 1.9809326508391653e-05, "loss": 0.6388, "step": 1113 }, { "epoch": 0.09, "grad_norm": 1.113372749895697, "learning_rate": 1.980881455281956e-05, "loss": 0.6648, "step": 1114 }, { "epoch": 0.09, "grad_norm": 1.1485772029405572, "learning_rate": 1.980830191750532e-05, "loss": 0.6499, "step": 1115 }, { "epoch": 0.09, "grad_norm": 0.8997588895538492, "learning_rate": 1.980778860248446e-05, "loss": 0.533, "step": 1116 }, { "epoch": 0.09, "grad_norm": 1.027167278167877, "learning_rate": 1.9807274607792545e-05, "loss": 0.6515, "step": 1117 }, { "epoch": 0.09, "grad_norm": 1.0088579416146564, "learning_rate": 1.98067599334652e-05, "loss": 0.5844, "step": 1118 }, { "epoch": 0.09, "grad_norm": 1.1311769529954938, "learning_rate": 1.980624457953809e-05, "loss": 0.6711, "step": 1119 }, { "epoch": 0.09, "grad_norm": 1.0829367163761348, "learning_rate": 1.980572854604693e-05, "loss": 0.5857, "step": 1120 }, { "epoch": 0.09, "grad_norm": 1.1391803149878377, "learning_rate": 1.980521183302748e-05, "loss": 0.6258, "step": 1121 }, { "epoch": 0.09, "grad_norm": 1.125268403164557, "learning_rate": 1.980469444051554e-05, "loss": 0.6142, "step": 1122 }, { "epoch": 0.09, "grad_norm": 1.0778526344981947, "learning_rate": 1.980417636854698e-05, "loss": 0.5555, "step": 1123 }, { "epoch": 0.09, "grad_norm": 1.082392494479735, "learning_rate": 1.9803657617157693e-05, "loss": 0.603, "step": 1124 }, { "epoch": 0.09, "grad_norm": 1.0569189959865126, "learning_rate": 1.9803138186383628e-05, "loss": 0.6449, "step": 1125 }, { "epoch": 0.09, "grad_norm": 1.0852474738625402, "learning_rate": 1.9802618076260784e-05, "loss": 0.6242, "step": 1126 }, { "epoch": 0.09, "grad_norm": 1.1175750495002248, "learning_rate": 1.9802097286825197e-05, "loss": 0.6315, "step": 1127 }, { "epoch": 0.09, "grad_norm": 1.0773717623972658, "learning_rate": 1.9801575818112964e-05, "loss": 0.664, "step": 1128 }, { "epoch": 0.09, "grad_norm": 1.162007196070372, "learning_rate": 1.980105367016022e-05, "loss": 0.6763, "step": 1129 }, { "epoch": 0.09, "grad_norm": 1.136601052174248, "learning_rate": 1.9800530843003157e-05, "loss": 0.6117, "step": 1130 }, { "epoch": 0.09, "grad_norm": 1.143985763323429, "learning_rate": 1.9800007336677994e-05, "loss": 0.6176, "step": 1131 }, { "epoch": 0.09, "grad_norm": 1.0404379302337705, "learning_rate": 1.979948315122102e-05, "loss": 0.6334, "step": 1132 }, { "epoch": 0.09, "grad_norm": 0.9958232338449807, "learning_rate": 1.979895828666855e-05, "loss": 0.6101, "step": 1133 }, { "epoch": 0.09, "grad_norm": 1.0200571531476168, "learning_rate": 1.9798432743056964e-05, "loss": 0.5884, "step": 1134 }, { "epoch": 0.09, "grad_norm": 1.0324428907512637, "learning_rate": 1.979790652042268e-05, "loss": 0.6089, "step": 1135 }, { "epoch": 0.09, "grad_norm": 1.1115915371843572, "learning_rate": 1.9797379618802163e-05, "loss": 0.6658, "step": 1136 }, { "epoch": 0.09, "grad_norm": 1.1328477500696639, "learning_rate": 1.9796852038231932e-05, "loss": 0.6374, "step": 1137 }, { "epoch": 0.09, "grad_norm": 1.2353642392222939, "learning_rate": 1.9796323778748544e-05, "loss": 0.6538, "step": 1138 }, { "epoch": 0.09, "grad_norm": 1.053963539963818, "learning_rate": 1.9795794840388605e-05, "loss": 0.568, "step": 1139 }, { "epoch": 0.09, "grad_norm": 1.1496538850517588, "learning_rate": 1.9795265223188775e-05, "loss": 0.5619, "step": 1140 }, { "epoch": 0.09, "grad_norm": 1.1157661180851675, "learning_rate": 1.9794734927185756e-05, "loss": 0.6437, "step": 1141 }, { "epoch": 0.09, "grad_norm": 1.1599396320995672, "learning_rate": 1.979420395241629e-05, "loss": 0.6507, "step": 1142 }, { "epoch": 0.09, "grad_norm": 1.112262206229225, "learning_rate": 1.9793672298917178e-05, "loss": 0.6211, "step": 1143 }, { "epoch": 0.09, "grad_norm": 1.0729221065115775, "learning_rate": 1.9793139966725264e-05, "loss": 0.5873, "step": 1144 }, { "epoch": 0.09, "grad_norm": 1.1470954186434454, "learning_rate": 1.9792606955877437e-05, "loss": 0.6533, "step": 1145 }, { "epoch": 0.09, "grad_norm": 1.0966433183148414, "learning_rate": 1.979207326641063e-05, "loss": 0.6198, "step": 1146 }, { "epoch": 0.09, "grad_norm": 1.0887800674211725, "learning_rate": 1.979153889836184e-05, "loss": 0.6142, "step": 1147 }, { "epoch": 0.09, "grad_norm": 1.0505608118632632, "learning_rate": 1.979100385176808e-05, "loss": 0.6201, "step": 1148 }, { "epoch": 0.09, "grad_norm": 1.0637276276013699, "learning_rate": 1.979046812666644e-05, "loss": 0.6334, "step": 1149 }, { "epoch": 0.09, "grad_norm": 1.0959557081058233, "learning_rate": 1.9789931723094046e-05, "loss": 0.5997, "step": 1150 }, { "epoch": 0.09, "grad_norm": 1.1145558485853704, "learning_rate": 1.9789394641088068e-05, "loss": 0.6324, "step": 1151 }, { "epoch": 0.09, "grad_norm": 1.1470672684736019, "learning_rate": 1.978885688068572e-05, "loss": 0.6474, "step": 1152 }, { "epoch": 0.09, "grad_norm": 1.059988951594, "learning_rate": 1.9788318441924276e-05, "loss": 0.6878, "step": 1153 }, { "epoch": 0.09, "grad_norm": 1.0449670410814962, "learning_rate": 1.9787779324841045e-05, "loss": 0.6136, "step": 1154 }, { "epoch": 0.09, "grad_norm": 1.1036223223053379, "learning_rate": 1.978723952947339e-05, "loss": 0.4832, "step": 1155 }, { "epoch": 0.09, "grad_norm": 0.9439899179136536, "learning_rate": 1.9786699055858715e-05, "loss": 0.5875, "step": 1156 }, { "epoch": 0.09, "grad_norm": 1.1271381575003854, "learning_rate": 1.9786157904034476e-05, "loss": 0.6729, "step": 1157 }, { "epoch": 0.09, "grad_norm": 1.1559246313644351, "learning_rate": 1.9785616074038177e-05, "loss": 0.6624, "step": 1158 }, { "epoch": 0.09, "grad_norm": 1.1229234851699839, "learning_rate": 1.978507356590736e-05, "loss": 0.5696, "step": 1159 }, { "epoch": 0.09, "grad_norm": 1.1417411033525822, "learning_rate": 1.978453037967963e-05, "loss": 0.6108, "step": 1160 }, { "epoch": 0.09, "grad_norm": 0.9829968484078301, "learning_rate": 1.978398651539262e-05, "loss": 0.6475, "step": 1161 }, { "epoch": 0.09, "grad_norm": 1.0605076321999238, "learning_rate": 1.9783441973084023e-05, "loss": 0.61, "step": 1162 }, { "epoch": 0.09, "grad_norm": 0.9719439884646509, "learning_rate": 1.9782896752791576e-05, "loss": 0.574, "step": 1163 }, { "epoch": 0.09, "grad_norm": 1.0185093398635798, "learning_rate": 1.978235085455306e-05, "loss": 0.6139, "step": 1164 }, { "epoch": 0.09, "grad_norm": 0.9513968903641321, "learning_rate": 1.9781804278406308e-05, "loss": 0.6326, "step": 1165 }, { "epoch": 0.09, "grad_norm": 1.0634305191214797, "learning_rate": 1.9781257024389194e-05, "loss": 0.6322, "step": 1166 }, { "epoch": 0.09, "grad_norm": 0.9513957696631159, "learning_rate": 1.9780709092539647e-05, "loss": 0.6513, "step": 1167 }, { "epoch": 0.09, "grad_norm": 0.899895870947211, "learning_rate": 1.9780160482895633e-05, "loss": 0.5656, "step": 1168 }, { "epoch": 0.1, "grad_norm": 1.0053864478482981, "learning_rate": 1.9779611195495177e-05, "loss": 0.6136, "step": 1169 }, { "epoch": 0.1, "grad_norm": 1.0556166213683182, "learning_rate": 1.9779061230376334e-05, "loss": 0.5876, "step": 1170 }, { "epoch": 0.1, "grad_norm": 1.0620148974297947, "learning_rate": 1.9778510587577226e-05, "loss": 0.6286, "step": 1171 }, { "epoch": 0.1, "grad_norm": 1.0944898098419875, "learning_rate": 1.9777959267136005e-05, "loss": 0.6408, "step": 1172 }, { "epoch": 0.1, "grad_norm": 1.1262428474198718, "learning_rate": 1.977740726909088e-05, "loss": 0.5868, "step": 1173 }, { "epoch": 0.1, "grad_norm": 1.1348189568565432, "learning_rate": 1.9776854593480107e-05, "loss": 0.6325, "step": 1174 }, { "epoch": 0.1, "grad_norm": 1.0265007969572906, "learning_rate": 1.977630124034198e-05, "loss": 0.6002, "step": 1175 }, { "epoch": 0.1, "grad_norm": 1.0045863640942072, "learning_rate": 1.9775747209714847e-05, "loss": 0.4731, "step": 1176 }, { "epoch": 0.1, "grad_norm": 1.1767076857181433, "learning_rate": 1.9775192501637104e-05, "loss": 0.5284, "step": 1177 }, { "epoch": 0.1, "grad_norm": 1.0981349566632057, "learning_rate": 1.9774637116147194e-05, "loss": 0.6136, "step": 1178 }, { "epoch": 0.1, "grad_norm": 1.1161235671651721, "learning_rate": 1.97740810532836e-05, "loss": 0.6547, "step": 1179 }, { "epoch": 0.1, "grad_norm": 1.0884324364235982, "learning_rate": 1.9773524313084857e-05, "loss": 0.6237, "step": 1180 }, { "epoch": 0.1, "grad_norm": 1.1129008869770969, "learning_rate": 1.977296689558955e-05, "loss": 0.6, "step": 1181 }, { "epoch": 0.1, "grad_norm": 1.096193804673158, "learning_rate": 1.9772408800836308e-05, "loss": 0.6041, "step": 1182 }, { "epoch": 0.1, "grad_norm": 0.9666265897405518, "learning_rate": 1.9771850028863802e-05, "loss": 0.5926, "step": 1183 }, { "epoch": 0.1, "grad_norm": 1.2019818907270623, "learning_rate": 1.977129057971076e-05, "loss": 0.6472, "step": 1184 }, { "epoch": 0.1, "grad_norm": 1.0893335099998267, "learning_rate": 1.977073045341594e-05, "loss": 0.6338, "step": 1185 }, { "epoch": 0.1, "grad_norm": 1.0960683097540216, "learning_rate": 1.977016965001817e-05, "loss": 0.6508, "step": 1186 }, { "epoch": 0.1, "grad_norm": 1.2142205404788615, "learning_rate": 1.9769608169556314e-05, "loss": 0.6288, "step": 1187 }, { "epoch": 0.1, "grad_norm": 1.0610027884391453, "learning_rate": 1.9769046012069273e-05, "loss": 0.611, "step": 1188 }, { "epoch": 0.1, "grad_norm": 1.0705797801046106, "learning_rate": 1.9768483177596008e-05, "loss": 0.6007, "step": 1189 }, { "epoch": 0.1, "grad_norm": 0.9870195777939947, "learning_rate": 1.9767919666175526e-05, "loss": 0.579, "step": 1190 }, { "epoch": 0.1, "grad_norm": 1.0850603051142014, "learning_rate": 1.976735547784687e-05, "loss": 0.5486, "step": 1191 }, { "epoch": 0.1, "grad_norm": 0.976803281526567, "learning_rate": 1.976679061264915e-05, "loss": 0.5985, "step": 1192 }, { "epoch": 0.1, "grad_norm": 1.0568799468567787, "learning_rate": 1.97662250706215e-05, "loss": 0.5601, "step": 1193 }, { "epoch": 0.1, "grad_norm": 1.0791769089869665, "learning_rate": 1.9765658851803116e-05, "loss": 0.593, "step": 1194 }, { "epoch": 0.1, "grad_norm": 1.0301803244650218, "learning_rate": 1.9765091956233235e-05, "loss": 0.5781, "step": 1195 }, { "epoch": 0.1, "grad_norm": 1.1232153943591479, "learning_rate": 1.9764524383951147e-05, "loss": 0.681, "step": 1196 }, { "epoch": 0.1, "grad_norm": 0.978549446797339, "learning_rate": 1.9763956134996176e-05, "loss": 0.6041, "step": 1197 }, { "epoch": 0.1, "grad_norm": 1.0988170899210346, "learning_rate": 1.9763387209407706e-05, "loss": 0.6253, "step": 1198 }, { "epoch": 0.1, "grad_norm": 0.9992464023597344, "learning_rate": 1.9762817607225163e-05, "loss": 0.5952, "step": 1199 }, { "epoch": 0.1, "grad_norm": 1.0648084957537778, "learning_rate": 1.976224732848802e-05, "loss": 0.6382, "step": 1200 }, { "epoch": 0.1, "grad_norm": 1.0094155962654219, "learning_rate": 1.9761676373235797e-05, "loss": 0.5395, "step": 1201 }, { "epoch": 0.1, "grad_norm": 0.9813734130095451, "learning_rate": 1.976110474150806e-05, "loss": 0.6242, "step": 1202 }, { "epoch": 0.1, "grad_norm": 0.9800418269518998, "learning_rate": 1.976053243334442e-05, "loss": 0.5816, "step": 1203 }, { "epoch": 0.1, "grad_norm": 1.0461807540210366, "learning_rate": 1.975995944878454e-05, "loss": 0.6228, "step": 1204 }, { "epoch": 0.1, "grad_norm": 1.1634591956582694, "learning_rate": 1.9759385787868128e-05, "loss": 0.642, "step": 1205 }, { "epoch": 0.1, "grad_norm": 1.0139084190327168, "learning_rate": 1.9758811450634936e-05, "loss": 0.5179, "step": 1206 }, { "epoch": 0.1, "grad_norm": 1.1264320315929353, "learning_rate": 1.9758236437124768e-05, "loss": 0.6243, "step": 1207 }, { "epoch": 0.1, "grad_norm": 1.0325778222003517, "learning_rate": 1.975766074737747e-05, "loss": 0.5712, "step": 1208 }, { "epoch": 0.1, "grad_norm": 1.0593457101808865, "learning_rate": 1.975708438143294e-05, "loss": 0.6367, "step": 1209 }, { "epoch": 0.1, "grad_norm": 1.1923816644294367, "learning_rate": 1.9756507339331115e-05, "loss": 0.6276, "step": 1210 }, { "epoch": 0.1, "grad_norm": 0.9729967459849977, "learning_rate": 1.9755929621111985e-05, "loss": 0.6313, "step": 1211 }, { "epoch": 0.1, "grad_norm": 1.0460689918701482, "learning_rate": 1.9755351226815586e-05, "loss": 0.6645, "step": 1212 }, { "epoch": 0.1, "grad_norm": 0.9693482523850563, "learning_rate": 1.9754772156482e-05, "loss": 0.5295, "step": 1213 }, { "epoch": 0.1, "grad_norm": 1.124750196318656, "learning_rate": 1.9754192410151357e-05, "loss": 0.5686, "step": 1214 }, { "epoch": 0.1, "grad_norm": 1.073152082986142, "learning_rate": 1.975361198786383e-05, "loss": 0.6052, "step": 1215 }, { "epoch": 0.1, "grad_norm": 1.023364933183271, "learning_rate": 1.9753030889659644e-05, "loss": 0.5647, "step": 1216 }, { "epoch": 0.1, "grad_norm": 1.081775326478639, "learning_rate": 1.975244911557907e-05, "loss": 0.632, "step": 1217 }, { "epoch": 0.1, "grad_norm": 1.1735100931930877, "learning_rate": 1.9751866665662424e-05, "loss": 0.5785, "step": 1218 }, { "epoch": 0.1, "grad_norm": 1.0741437321863894, "learning_rate": 1.9751283539950065e-05, "loss": 0.6068, "step": 1219 }, { "epoch": 0.1, "grad_norm": 1.0562432053843551, "learning_rate": 1.9750699738482403e-05, "loss": 0.636, "step": 1220 }, { "epoch": 0.1, "grad_norm": 1.0989013126993985, "learning_rate": 1.9750115261299903e-05, "loss": 0.6054, "step": 1221 }, { "epoch": 0.1, "grad_norm": 1.0119053434260734, "learning_rate": 1.9749530108443063e-05, "loss": 0.6065, "step": 1222 }, { "epoch": 0.1, "grad_norm": 1.079234448458603, "learning_rate": 1.9748944279952433e-05, "loss": 0.6835, "step": 1223 }, { "epoch": 0.1, "grad_norm": 1.0915946403546992, "learning_rate": 1.9748357775868615e-05, "loss": 0.6293, "step": 1224 }, { "epoch": 0.1, "grad_norm": 1.0920504111740896, "learning_rate": 1.9747770596232247e-05, "loss": 0.7179, "step": 1225 }, { "epoch": 0.1, "grad_norm": 1.1510367657359035, "learning_rate": 1.974718274108402e-05, "loss": 0.6247, "step": 1226 }, { "epoch": 0.1, "grad_norm": 0.9528179845476008, "learning_rate": 1.974659421046468e-05, "loss": 0.5738, "step": 1227 }, { "epoch": 0.1, "grad_norm": 1.0980600804946157, "learning_rate": 1.9746005004415004e-05, "loss": 0.603, "step": 1228 }, { "epoch": 0.1, "grad_norm": 1.112778681258694, "learning_rate": 1.9745415122975825e-05, "loss": 0.6356, "step": 1229 }, { "epoch": 0.1, "grad_norm": 1.0704814670074079, "learning_rate": 1.9744824566188027e-05, "loss": 0.6157, "step": 1230 }, { "epoch": 0.1, "grad_norm": 1.0033594940133337, "learning_rate": 1.9744233334092525e-05, "loss": 0.6019, "step": 1231 }, { "epoch": 0.1, "grad_norm": 1.2024696465537366, "learning_rate": 1.9743641426730297e-05, "loss": 0.6621, "step": 1232 }, { "epoch": 0.1, "grad_norm": 0.9820245364504157, "learning_rate": 1.9743048844142364e-05, "loss": 0.6052, "step": 1233 }, { "epoch": 0.1, "grad_norm": 1.143158745985614, "learning_rate": 1.9742455586369786e-05, "loss": 0.6646, "step": 1234 }, { "epoch": 0.1, "grad_norm": 1.2942234573798506, "learning_rate": 1.9741861653453672e-05, "loss": 0.6221, "step": 1235 }, { "epoch": 0.1, "grad_norm": 1.2151223441956862, "learning_rate": 1.9741267045435193e-05, "loss": 0.6063, "step": 1236 }, { "epoch": 0.1, "grad_norm": 1.0487514310045238, "learning_rate": 1.9740671762355548e-05, "loss": 0.5818, "step": 1237 }, { "epoch": 0.1, "grad_norm": 1.0123848172684526, "learning_rate": 1.9740075804255987e-05, "loss": 0.6388, "step": 1238 }, { "epoch": 0.1, "grad_norm": 0.9570590630526808, "learning_rate": 1.9739479171177816e-05, "loss": 0.6237, "step": 1239 }, { "epoch": 0.1, "grad_norm": 1.156696361880616, "learning_rate": 1.9738881863162372e-05, "loss": 0.7091, "step": 1240 }, { "epoch": 0.1, "grad_norm": 1.0897508472632145, "learning_rate": 1.973828388025106e-05, "loss": 0.6423, "step": 1241 }, { "epoch": 0.1, "grad_norm": 1.1737063919986686, "learning_rate": 1.9737685222485307e-05, "loss": 0.6774, "step": 1242 }, { "epoch": 0.1, "grad_norm": 1.0938566735390922, "learning_rate": 1.9737085889906608e-05, "loss": 0.6219, "step": 1243 }, { "epoch": 0.1, "grad_norm": 1.1437393726188527, "learning_rate": 1.9736485882556495e-05, "loss": 0.6295, "step": 1244 }, { "epoch": 0.1, "grad_norm": 1.0791457714353598, "learning_rate": 1.9735885200476545e-05, "loss": 0.6326, "step": 1245 }, { "epoch": 0.1, "grad_norm": 1.105337478068499, "learning_rate": 1.9735283843708384e-05, "loss": 0.5601, "step": 1246 }, { "epoch": 0.1, "grad_norm": 0.9900255732554479, "learning_rate": 1.973468181229369e-05, "loss": 0.612, "step": 1247 }, { "epoch": 0.1, "grad_norm": 0.9638114049500947, "learning_rate": 1.9734079106274185e-05, "loss": 0.5639, "step": 1248 }, { "epoch": 0.1, "grad_norm": 1.155607623256461, "learning_rate": 1.9733475725691627e-05, "loss": 0.656, "step": 1249 }, { "epoch": 0.1, "grad_norm": 0.8552961484865126, "learning_rate": 1.9732871670587835e-05, "loss": 0.5105, "step": 1250 }, { "epoch": 0.1, "grad_norm": 1.010264435160565, "learning_rate": 1.973226694100467e-05, "loss": 0.6195, "step": 1251 }, { "epoch": 0.1, "grad_norm": 1.7056228909411109, "learning_rate": 1.9731661536984038e-05, "loss": 0.4986, "step": 1252 }, { "epoch": 0.1, "grad_norm": 0.9955221005221241, "learning_rate": 1.9731055458567895e-05, "loss": 0.6386, "step": 1253 }, { "epoch": 0.1, "grad_norm": 1.3420947012150832, "learning_rate": 1.973044870579824e-05, "loss": 0.5083, "step": 1254 }, { "epoch": 0.1, "grad_norm": 1.0090464596512863, "learning_rate": 1.972984127871712e-05, "loss": 0.6675, "step": 1255 }, { "epoch": 0.1, "grad_norm": 1.0806608477579371, "learning_rate": 1.972923317736663e-05, "loss": 0.6017, "step": 1256 }, { "epoch": 0.1, "grad_norm": 1.12514539614613, "learning_rate": 1.9728624401788908e-05, "loss": 0.625, "step": 1257 }, { "epoch": 0.1, "grad_norm": 1.0903568183651684, "learning_rate": 1.972801495202615e-05, "loss": 0.6665, "step": 1258 }, { "epoch": 0.1, "grad_norm": 0.9965450584629888, "learning_rate": 1.972740482812058e-05, "loss": 0.589, "step": 1259 }, { "epoch": 0.1, "grad_norm": 1.0261512414679927, "learning_rate": 1.9726794030114484e-05, "loss": 0.4879, "step": 1260 }, { "epoch": 0.1, "grad_norm": 1.2427966939144413, "learning_rate": 1.972618255805019e-05, "loss": 0.6667, "step": 1261 }, { "epoch": 0.1, "grad_norm": 1.101568478078574, "learning_rate": 1.9725570411970074e-05, "loss": 0.6222, "step": 1262 }, { "epoch": 0.1, "grad_norm": 1.0828180407176138, "learning_rate": 1.972495759191655e-05, "loss": 0.6233, "step": 1263 }, { "epoch": 0.1, "grad_norm": 1.0487641243984773, "learning_rate": 1.9724344097932097e-05, "loss": 0.6602, "step": 1264 }, { "epoch": 0.1, "grad_norm": 1.1526684403152978, "learning_rate": 1.972372993005922e-05, "loss": 0.6448, "step": 1265 }, { "epoch": 0.1, "grad_norm": 1.029942884679848, "learning_rate": 1.9723115088340483e-05, "loss": 0.6205, "step": 1266 }, { "epoch": 0.1, "grad_norm": 1.0887794032526703, "learning_rate": 1.9722499572818496e-05, "loss": 0.5804, "step": 1267 }, { "epoch": 0.1, "grad_norm": 1.315334282162586, "learning_rate": 1.972188338353591e-05, "loss": 0.6214, "step": 1268 }, { "epoch": 0.1, "grad_norm": 1.1271196813059783, "learning_rate": 1.9721266520535435e-05, "loss": 0.6682, "step": 1269 }, { "epoch": 0.1, "grad_norm": 1.163495523822124, "learning_rate": 1.972064898385981e-05, "loss": 0.6164, "step": 1270 }, { "epoch": 0.1, "grad_norm": 1.0811227308269724, "learning_rate": 1.972003077355183e-05, "loss": 0.6423, "step": 1271 }, { "epoch": 0.1, "grad_norm": 1.0236819263499997, "learning_rate": 1.971941188965434e-05, "loss": 0.6196, "step": 1272 }, { "epoch": 0.1, "grad_norm": 1.0352641260022146, "learning_rate": 1.971879233221023e-05, "loss": 0.6194, "step": 1273 }, { "epoch": 0.1, "grad_norm": 1.0848684317413644, "learning_rate": 1.971817210126243e-05, "loss": 0.6133, "step": 1274 }, { "epoch": 0.1, "grad_norm": 1.0566291958722285, "learning_rate": 1.9717551196853925e-05, "loss": 0.5973, "step": 1275 }, { "epoch": 0.1, "grad_norm": 1.0355223511760434, "learning_rate": 1.9716929619027734e-05, "loss": 0.5948, "step": 1276 }, { "epoch": 0.1, "grad_norm": 1.1732139139098121, "learning_rate": 1.971630736782695e-05, "loss": 0.7294, "step": 1277 }, { "epoch": 0.1, "grad_norm": 0.9992577370321353, "learning_rate": 1.9715684443294677e-05, "loss": 0.6432, "step": 1278 }, { "epoch": 0.1, "grad_norm": 1.0221808293116248, "learning_rate": 1.971506084547409e-05, "loss": 0.5805, "step": 1279 }, { "epoch": 0.1, "grad_norm": 1.0768858195353592, "learning_rate": 1.9714436574408408e-05, "loss": 0.6353, "step": 1280 }, { "epoch": 0.1, "grad_norm": 0.9226845697587781, "learning_rate": 1.9713811630140885e-05, "loss": 0.592, "step": 1281 }, { "epoch": 0.1, "grad_norm": 1.0268897107440507, "learning_rate": 1.971318601271483e-05, "loss": 0.6324, "step": 1282 }, { "epoch": 0.1, "grad_norm": 1.0428076161671564, "learning_rate": 1.9712559722173602e-05, "loss": 0.6377, "step": 1283 }, { "epoch": 0.1, "grad_norm": 1.0859639624879796, "learning_rate": 1.9711932758560604e-05, "loss": 0.6022, "step": 1284 }, { "epoch": 0.1, "grad_norm": 1.0210349956207485, "learning_rate": 1.971130512191928e-05, "loss": 0.6474, "step": 1285 }, { "epoch": 0.1, "grad_norm": 1.0599033695812297, "learning_rate": 1.971067681229312e-05, "loss": 0.6067, "step": 1286 }, { "epoch": 0.1, "grad_norm": 1.120979412441857, "learning_rate": 1.971004782972567e-05, "loss": 0.6269, "step": 1287 }, { "epoch": 0.1, "grad_norm": 1.2802519696010277, "learning_rate": 1.9709418174260523e-05, "loss": 0.547, "step": 1288 }, { "epoch": 0.1, "grad_norm": 1.0022250775277506, "learning_rate": 1.9708787845941306e-05, "loss": 0.6117, "step": 1289 }, { "epoch": 0.1, "grad_norm": 1.0787715179894646, "learning_rate": 1.97081568448117e-05, "loss": 0.6176, "step": 1290 }, { "epoch": 0.1, "grad_norm": 1.0037129554011988, "learning_rate": 1.970752517091544e-05, "loss": 0.6323, "step": 1291 }, { "epoch": 0.11, "grad_norm": 1.0334591262363808, "learning_rate": 1.9706892824296297e-05, "loss": 0.6304, "step": 1292 }, { "epoch": 0.11, "grad_norm": 1.0374628218351345, "learning_rate": 1.9706259804998093e-05, "loss": 0.4893, "step": 1293 }, { "epoch": 0.11, "grad_norm": 0.9865355112514083, "learning_rate": 1.970562611306469e-05, "loss": 0.5467, "step": 1294 }, { "epoch": 0.11, "grad_norm": 1.116948900462893, "learning_rate": 1.9704991748540004e-05, "loss": 0.611, "step": 1295 }, { "epoch": 0.11, "grad_norm": 1.084751087051627, "learning_rate": 1.9704356711468e-05, "loss": 0.644, "step": 1296 }, { "epoch": 0.11, "grad_norm": 1.1208899298642612, "learning_rate": 1.9703721001892685e-05, "loss": 0.6399, "step": 1297 }, { "epoch": 0.11, "grad_norm": 1.0538062893584283, "learning_rate": 1.9703084619858112e-05, "loss": 0.6364, "step": 1298 }, { "epoch": 0.11, "grad_norm": 0.9944067068990796, "learning_rate": 1.9702447565408382e-05, "loss": 0.6172, "step": 1299 }, { "epoch": 0.11, "grad_norm": 1.022208238295205, "learning_rate": 1.970180983858764e-05, "loss": 0.6118, "step": 1300 }, { "epoch": 0.11, "grad_norm": 1.0199055126077872, "learning_rate": 1.970117143944008e-05, "loss": 0.6156, "step": 1301 }, { "epoch": 0.11, "grad_norm": 0.982700829054324, "learning_rate": 1.9700532368009947e-05, "loss": 0.5647, "step": 1302 }, { "epoch": 0.11, "grad_norm": 1.0121386373965509, "learning_rate": 1.9699892624341527e-05, "loss": 0.604, "step": 1303 }, { "epoch": 0.11, "grad_norm": 0.9843522129415369, "learning_rate": 1.9699252208479147e-05, "loss": 0.4893, "step": 1304 }, { "epoch": 0.11, "grad_norm": 1.0805374392292688, "learning_rate": 1.9698611120467196e-05, "loss": 0.6397, "step": 1305 }, { "epoch": 0.11, "grad_norm": 1.0497480677687236, "learning_rate": 1.9697969360350098e-05, "loss": 0.5588, "step": 1306 }, { "epoch": 0.11, "grad_norm": 1.0958726599081376, "learning_rate": 1.9697326928172323e-05, "loss": 0.6535, "step": 1307 }, { "epoch": 0.11, "grad_norm": 0.9666448228023902, "learning_rate": 1.9696683823978392e-05, "loss": 0.576, "step": 1308 }, { "epoch": 0.11, "grad_norm": 0.9946083522677421, "learning_rate": 1.9696040047812874e-05, "loss": 0.6022, "step": 1309 }, { "epoch": 0.11, "grad_norm": 1.0237237273450226, "learning_rate": 1.9695395599720385e-05, "loss": 0.6396, "step": 1310 }, { "epoch": 0.11, "grad_norm": 1.0853962147117462, "learning_rate": 1.9694750479745573e-05, "loss": 0.6019, "step": 1311 }, { "epoch": 0.11, "grad_norm": 1.002745208425993, "learning_rate": 1.969410468793316e-05, "loss": 0.576, "step": 1312 }, { "epoch": 0.11, "grad_norm": 1.033603915650126, "learning_rate": 1.9693458224327886e-05, "loss": 0.5707, "step": 1313 }, { "epoch": 0.11, "grad_norm": 1.0369646792701839, "learning_rate": 1.9692811088974556e-05, "loss": 0.5698, "step": 1314 }, { "epoch": 0.11, "grad_norm": 1.0388865698456857, "learning_rate": 1.9692163281918016e-05, "loss": 0.623, "step": 1315 }, { "epoch": 0.11, "grad_norm": 0.9886888943099889, "learning_rate": 1.9691514803203157e-05, "loss": 0.6026, "step": 1316 }, { "epoch": 0.11, "grad_norm": 1.0760978377547694, "learning_rate": 1.969086565287492e-05, "loss": 0.6156, "step": 1317 }, { "epoch": 0.11, "grad_norm": 1.0024953962331788, "learning_rate": 1.9690215830978286e-05, "loss": 0.6297, "step": 1318 }, { "epoch": 0.11, "grad_norm": 1.092907645473543, "learning_rate": 1.968956533755829e-05, "loss": 0.6497, "step": 1319 }, { "epoch": 0.11, "grad_norm": 1.0441774948342657, "learning_rate": 1.968891417266001e-05, "loss": 0.6091, "step": 1320 }, { "epoch": 0.11, "grad_norm": 0.9898586379948117, "learning_rate": 1.9688262336328576e-05, "loss": 0.5755, "step": 1321 }, { "epoch": 0.11, "grad_norm": 0.9397873413080308, "learning_rate": 1.9687609828609156e-05, "loss": 0.6226, "step": 1322 }, { "epoch": 0.11, "grad_norm": 1.0412928068520495, "learning_rate": 1.9686956649546964e-05, "loss": 0.5881, "step": 1323 }, { "epoch": 0.11, "grad_norm": 0.9954215004050639, "learning_rate": 1.9686302799187272e-05, "loss": 0.6102, "step": 1324 }, { "epoch": 0.11, "grad_norm": 1.1638641323105938, "learning_rate": 1.9685648277575385e-05, "loss": 0.6323, "step": 1325 }, { "epoch": 0.11, "grad_norm": 0.9463279359139796, "learning_rate": 1.9684993084756664e-05, "loss": 0.5582, "step": 1326 }, { "epoch": 0.11, "grad_norm": 1.071442865518808, "learning_rate": 1.9684337220776514e-05, "loss": 0.5199, "step": 1327 }, { "epoch": 0.11, "grad_norm": 1.0170811281607908, "learning_rate": 1.9683680685680382e-05, "loss": 0.5709, "step": 1328 }, { "epoch": 0.11, "grad_norm": 0.9828891043357619, "learning_rate": 1.9683023479513768e-05, "loss": 0.6285, "step": 1329 }, { "epoch": 0.11, "grad_norm": 0.9806341501252827, "learning_rate": 1.968236560232222e-05, "loss": 0.6161, "step": 1330 }, { "epoch": 0.11, "grad_norm": 1.0613457807345021, "learning_rate": 1.968170705415132e-05, "loss": 0.6188, "step": 1331 }, { "epoch": 0.11, "grad_norm": 1.0685738980115753, "learning_rate": 1.9681047835046708e-05, "loss": 0.6087, "step": 1332 }, { "epoch": 0.11, "grad_norm": 0.9840438544734612, "learning_rate": 1.9680387945054073e-05, "loss": 0.5443, "step": 1333 }, { "epoch": 0.11, "grad_norm": 1.140764435462584, "learning_rate": 1.9679727384219137e-05, "loss": 0.6209, "step": 1334 }, { "epoch": 0.11, "grad_norm": 1.0182327607866783, "learning_rate": 1.967906615258768e-05, "loss": 0.6131, "step": 1335 }, { "epoch": 0.11, "grad_norm": 1.0021196334036369, "learning_rate": 1.9678404250205522e-05, "loss": 0.6491, "step": 1336 }, { "epoch": 0.11, "grad_norm": 0.9975411361548076, "learning_rate": 1.9677741677118536e-05, "loss": 0.5964, "step": 1337 }, { "epoch": 0.11, "grad_norm": 0.924058375002181, "learning_rate": 1.9677078433372635e-05, "loss": 0.6155, "step": 1338 }, { "epoch": 0.11, "grad_norm": 1.1148488713326163, "learning_rate": 1.9676414519013782e-05, "loss": 0.6719, "step": 1339 }, { "epoch": 0.11, "grad_norm": 1.0508767522012055, "learning_rate": 1.9675749934087988e-05, "loss": 0.6367, "step": 1340 }, { "epoch": 0.11, "grad_norm": 1.0723392595965906, "learning_rate": 1.9675084678641303e-05, "loss": 0.615, "step": 1341 }, { "epoch": 0.11, "grad_norm": 1.0466812905500391, "learning_rate": 1.9674418752719835e-05, "loss": 0.6187, "step": 1342 }, { "epoch": 0.11, "grad_norm": 1.0192894391011744, "learning_rate": 1.9673752156369726e-05, "loss": 0.6268, "step": 1343 }, { "epoch": 0.11, "grad_norm": 0.9594859291872142, "learning_rate": 1.9673084889637172e-05, "loss": 0.5834, "step": 1344 }, { "epoch": 0.11, "grad_norm": 1.0881934969983387, "learning_rate": 1.9672416952568416e-05, "loss": 0.6063, "step": 1345 }, { "epoch": 0.11, "grad_norm": 1.1647734316504663, "learning_rate": 1.9671748345209746e-05, "loss": 0.6656, "step": 1346 }, { "epoch": 0.11, "grad_norm": 1.1321357566390124, "learning_rate": 1.9671079067607495e-05, "loss": 0.683, "step": 1347 }, { "epoch": 0.11, "grad_norm": 1.0163862126726073, "learning_rate": 1.9670409119808042e-05, "loss": 0.6353, "step": 1348 }, { "epoch": 0.11, "grad_norm": 1.148257709769176, "learning_rate": 1.9669738501857812e-05, "loss": 0.6746, "step": 1349 }, { "epoch": 0.11, "grad_norm": 1.0982853234518442, "learning_rate": 1.9669067213803287e-05, "loss": 0.6424, "step": 1350 }, { "epoch": 0.11, "grad_norm": 1.126143133493401, "learning_rate": 1.9668395255690975e-05, "loss": 0.686, "step": 1351 }, { "epoch": 0.11, "grad_norm": 0.9792936415990227, "learning_rate": 1.966772262756745e-05, "loss": 0.6292, "step": 1352 }, { "epoch": 0.11, "grad_norm": 1.1548354716465838, "learning_rate": 1.966704932947932e-05, "loss": 0.6186, "step": 1353 }, { "epoch": 0.11, "grad_norm": 1.000836433337724, "learning_rate": 1.966637536147325e-05, "loss": 0.5942, "step": 1354 }, { "epoch": 0.11, "grad_norm": 1.0117949869053557, "learning_rate": 1.966570072359594e-05, "loss": 0.5791, "step": 1355 }, { "epoch": 0.11, "grad_norm": 1.054083198075337, "learning_rate": 1.966502541589414e-05, "loss": 0.5827, "step": 1356 }, { "epoch": 0.11, "grad_norm": 1.1281975366177772, "learning_rate": 1.9664349438414656e-05, "loss": 0.6375, "step": 1357 }, { "epoch": 0.11, "grad_norm": 1.0217925796235703, "learning_rate": 1.9663672791204328e-05, "loss": 0.6147, "step": 1358 }, { "epoch": 0.11, "grad_norm": 1.1560400317141977, "learning_rate": 1.9662995474310042e-05, "loss": 0.6497, "step": 1359 }, { "epoch": 0.11, "grad_norm": 1.0824588534286652, "learning_rate": 1.9662317487778745e-05, "loss": 0.6342, "step": 1360 }, { "epoch": 0.11, "grad_norm": 1.066511214990864, "learning_rate": 1.9661638831657414e-05, "loss": 0.6056, "step": 1361 }, { "epoch": 0.11, "grad_norm": 1.1489912524465191, "learning_rate": 1.9660959505993086e-05, "loss": 0.6158, "step": 1362 }, { "epoch": 0.11, "grad_norm": 1.0074515766780978, "learning_rate": 1.966027951083283e-05, "loss": 0.6348, "step": 1363 }, { "epoch": 0.11, "grad_norm": 1.0830137168473368, "learning_rate": 1.9659598846223775e-05, "loss": 0.6652, "step": 1364 }, { "epoch": 0.11, "grad_norm": 0.9800063722365412, "learning_rate": 1.9658917512213084e-05, "loss": 0.5851, "step": 1365 }, { "epoch": 0.11, "grad_norm": 0.9938348549484941, "learning_rate": 1.9658235508847982e-05, "loss": 0.6333, "step": 1366 }, { "epoch": 0.11, "grad_norm": 0.9375863809230456, "learning_rate": 1.9657552836175725e-05, "loss": 0.6282, "step": 1367 }, { "epoch": 0.11, "grad_norm": 1.2414875465032795, "learning_rate": 1.965686949424362e-05, "loss": 0.6691, "step": 1368 }, { "epoch": 0.11, "grad_norm": 0.9846915989441325, "learning_rate": 1.9656185483099027e-05, "loss": 0.6228, "step": 1369 }, { "epoch": 0.11, "grad_norm": 0.9855716559708518, "learning_rate": 1.9655500802789342e-05, "loss": 0.5688, "step": 1370 }, { "epoch": 0.11, "grad_norm": 0.9690623160434885, "learning_rate": 1.9654815453362016e-05, "loss": 0.5783, "step": 1371 }, { "epoch": 0.11, "grad_norm": 1.0343685845795267, "learning_rate": 1.9654129434864545e-05, "loss": 0.6056, "step": 1372 }, { "epoch": 0.11, "grad_norm": 1.0140780390900874, "learning_rate": 1.965344274734447e-05, "loss": 0.5836, "step": 1373 }, { "epoch": 0.11, "grad_norm": 1.0009332862499594, "learning_rate": 1.965275539084937e-05, "loss": 0.6123, "step": 1374 }, { "epoch": 0.11, "grad_norm": 1.016702561351821, "learning_rate": 1.9652067365426887e-05, "loss": 0.5728, "step": 1375 }, { "epoch": 0.11, "grad_norm": 1.0162934957404974, "learning_rate": 1.96513786711247e-05, "loss": 0.6103, "step": 1376 }, { "epoch": 0.11, "grad_norm": 1.0463167905694821, "learning_rate": 1.9650689307990522e-05, "loss": 0.5792, "step": 1377 }, { "epoch": 0.11, "grad_norm": 1.0308461139411522, "learning_rate": 1.964999927607214e-05, "loss": 0.5638, "step": 1378 }, { "epoch": 0.11, "grad_norm": 1.0748914011352861, "learning_rate": 1.9649308575417372e-05, "loss": 0.6312, "step": 1379 }, { "epoch": 0.11, "grad_norm": 1.0594179627576494, "learning_rate": 1.9648617206074073e-05, "loss": 0.624, "step": 1380 }, { "epoch": 0.11, "grad_norm": 0.9715447357154672, "learning_rate": 1.9647925168090162e-05, "loss": 0.5731, "step": 1381 }, { "epoch": 0.11, "grad_norm": 1.0014903173207736, "learning_rate": 1.9647232461513597e-05, "loss": 0.6465, "step": 1382 }, { "epoch": 0.11, "grad_norm": 1.0461118034809385, "learning_rate": 1.9646539086392376e-05, "loss": 0.6042, "step": 1383 }, { "epoch": 0.11, "grad_norm": 1.1278315412347864, "learning_rate": 1.9645845042774555e-05, "loss": 0.6311, "step": 1384 }, { "epoch": 0.11, "grad_norm": 0.9916310533107556, "learning_rate": 1.9645150330708225e-05, "loss": 0.5808, "step": 1385 }, { "epoch": 0.11, "grad_norm": 1.1730449228013495, "learning_rate": 1.9644454950241532e-05, "loss": 0.6499, "step": 1386 }, { "epoch": 0.11, "grad_norm": 1.0680519469964425, "learning_rate": 1.9643758901422673e-05, "loss": 0.6125, "step": 1387 }, { "epoch": 0.11, "grad_norm": 1.0495618373555042, "learning_rate": 1.964306218429987e-05, "loss": 0.6534, "step": 1388 }, { "epoch": 0.11, "grad_norm": 0.991210791391256, "learning_rate": 1.964236479892141e-05, "loss": 0.5843, "step": 1389 }, { "epoch": 0.11, "grad_norm": 1.1017210183429609, "learning_rate": 1.9641666745335626e-05, "loss": 0.6282, "step": 1390 }, { "epoch": 0.11, "grad_norm": 1.1241495120151075, "learning_rate": 1.9640968023590887e-05, "loss": 0.6856, "step": 1391 }, { "epoch": 0.11, "grad_norm": 1.02748779670214, "learning_rate": 1.9640268633735616e-05, "loss": 0.587, "step": 1392 }, { "epoch": 0.11, "grad_norm": 1.1048813470548953, "learning_rate": 1.963956857581828e-05, "loss": 0.6463, "step": 1393 }, { "epoch": 0.11, "grad_norm": 0.9678613319611744, "learning_rate": 1.963886784988739e-05, "loss": 0.6009, "step": 1394 }, { "epoch": 0.11, "grad_norm": 0.9820811076689967, "learning_rate": 1.9638166455991508e-05, "loss": 0.6384, "step": 1395 }, { "epoch": 0.11, "grad_norm": 1.0235020554571306, "learning_rate": 1.963746439417924e-05, "loss": 0.6096, "step": 1396 }, { "epoch": 0.11, "grad_norm": 1.0644057126732112, "learning_rate": 1.963676166449924e-05, "loss": 0.6501, "step": 1397 }, { "epoch": 0.11, "grad_norm": 1.0743452283499784, "learning_rate": 1.9636058267000203e-05, "loss": 0.6937, "step": 1398 }, { "epoch": 0.11, "grad_norm": 1.0666403475309791, "learning_rate": 1.9635354201730874e-05, "loss": 0.6369, "step": 1399 }, { "epoch": 0.11, "grad_norm": 1.0795702053351024, "learning_rate": 1.9634649468740048e-05, "loss": 0.5609, "step": 1400 }, { "epoch": 0.11, "grad_norm": 1.0842891736824818, "learning_rate": 1.963394406807656e-05, "loss": 0.6054, "step": 1401 }, { "epoch": 0.11, "grad_norm": 1.232357196608936, "learning_rate": 1.963323799978929e-05, "loss": 0.7096, "step": 1402 }, { "epoch": 0.11, "grad_norm": 1.0627038461122897, "learning_rate": 1.9632531263927173e-05, "loss": 0.5423, "step": 1403 }, { "epoch": 0.11, "grad_norm": 1.158864262721317, "learning_rate": 1.963182386053918e-05, "loss": 0.6784, "step": 1404 }, { "epoch": 0.11, "grad_norm": 1.1047999928694838, "learning_rate": 1.9631115789674343e-05, "loss": 0.5742, "step": 1405 }, { "epoch": 0.11, "grad_norm": 1.0348252416708916, "learning_rate": 1.963040705138172e-05, "loss": 0.6467, "step": 1406 }, { "epoch": 0.11, "grad_norm": 1.100756898992012, "learning_rate": 1.9629697645710432e-05, "loss": 0.6564, "step": 1407 }, { "epoch": 0.11, "grad_norm": 1.0134805396263211, "learning_rate": 1.962898757270964e-05, "loss": 0.6013, "step": 1408 }, { "epoch": 0.11, "grad_norm": 1.1551860789638277, "learning_rate": 1.9628276832428548e-05, "loss": 0.6122, "step": 1409 }, { "epoch": 0.11, "grad_norm": 1.0523592965335977, "learning_rate": 1.962756542491641e-05, "loss": 0.6456, "step": 1410 }, { "epoch": 0.11, "grad_norm": 1.0072420388751542, "learning_rate": 1.9626853350222535e-05, "loss": 0.6206, "step": 1411 }, { "epoch": 0.11, "grad_norm": 0.9389508294024854, "learning_rate": 1.962614060839626e-05, "loss": 0.5318, "step": 1412 }, { "epoch": 0.11, "grad_norm": 0.9785971976060436, "learning_rate": 1.9625427199486973e-05, "loss": 0.5988, "step": 1413 }, { "epoch": 0.11, "grad_norm": 1.0565134094275008, "learning_rate": 1.962471312354412e-05, "loss": 0.6017, "step": 1414 }, { "epoch": 0.12, "grad_norm": 1.0769950453386896, "learning_rate": 1.9623998380617187e-05, "loss": 0.5855, "step": 1415 }, { "epoch": 0.12, "grad_norm": 1.0424660062990199, "learning_rate": 1.9623282970755702e-05, "loss": 0.6113, "step": 1416 }, { "epoch": 0.12, "grad_norm": 0.9809188594461099, "learning_rate": 1.9622566894009247e-05, "loss": 0.6357, "step": 1417 }, { "epoch": 0.12, "grad_norm": 1.0366035114434953, "learning_rate": 1.962185015042744e-05, "loss": 0.5987, "step": 1418 }, { "epoch": 0.12, "grad_norm": 1.0288494452139783, "learning_rate": 1.962113274005995e-05, "loss": 0.5874, "step": 1419 }, { "epoch": 0.12, "grad_norm": 1.0099375316753343, "learning_rate": 1.9620414662956494e-05, "loss": 0.5621, "step": 1420 }, { "epoch": 0.12, "grad_norm": 0.9666975626620397, "learning_rate": 1.9619695919166836e-05, "loss": 0.5965, "step": 1421 }, { "epoch": 0.12, "grad_norm": 0.9862166627261453, "learning_rate": 1.9618976508740782e-05, "loss": 0.5448, "step": 1422 }, { "epoch": 0.12, "grad_norm": 0.8706924255202901, "learning_rate": 1.961825643172819e-05, "loss": 0.4272, "step": 1423 }, { "epoch": 0.12, "grad_norm": 0.9861330858786174, "learning_rate": 1.961753568817896e-05, "loss": 0.6266, "step": 1424 }, { "epoch": 0.12, "grad_norm": 0.9746193528706869, "learning_rate": 1.9616814278143038e-05, "loss": 0.6087, "step": 1425 }, { "epoch": 0.12, "grad_norm": 1.0273042877502343, "learning_rate": 1.9616092201670415e-05, "loss": 0.5207, "step": 1426 }, { "epoch": 0.12, "grad_norm": 0.9975075185558374, "learning_rate": 1.961536945881113e-05, "loss": 0.5777, "step": 1427 }, { "epoch": 0.12, "grad_norm": 1.074244733642685, "learning_rate": 1.9614646049615273e-05, "loss": 0.6868, "step": 1428 }, { "epoch": 0.12, "grad_norm": 1.019900271457737, "learning_rate": 1.961392197413297e-05, "loss": 0.5854, "step": 1429 }, { "epoch": 0.12, "grad_norm": 1.0169010906420888, "learning_rate": 1.9613197232414405e-05, "loss": 0.6737, "step": 1430 }, { "epoch": 0.12, "grad_norm": 1.0616525197387021, "learning_rate": 1.96124718245098e-05, "loss": 0.6442, "step": 1431 }, { "epoch": 0.12, "grad_norm": 1.0010044272938432, "learning_rate": 1.961174575046942e-05, "loss": 0.5957, "step": 1432 }, { "epoch": 0.12, "grad_norm": 1.0575345130965779, "learning_rate": 1.9611019010343585e-05, "loss": 0.5818, "step": 1433 }, { "epoch": 0.12, "grad_norm": 1.1147214676236437, "learning_rate": 1.9610291604182658e-05, "loss": 0.6263, "step": 1434 }, { "epoch": 0.12, "grad_norm": 1.1387158998687177, "learning_rate": 1.960956353203705e-05, "loss": 0.6204, "step": 1435 }, { "epoch": 0.12, "grad_norm": 1.2318473099005043, "learning_rate": 1.960883479395721e-05, "loss": 0.5864, "step": 1436 }, { "epoch": 0.12, "grad_norm": 1.0377594896122029, "learning_rate": 1.9608105389993644e-05, "loss": 0.5611, "step": 1437 }, { "epoch": 0.12, "grad_norm": 0.9970888793455268, "learning_rate": 1.9607375320196892e-05, "loss": 0.6519, "step": 1438 }, { "epoch": 0.12, "grad_norm": 1.0175219908144566, "learning_rate": 1.960664458461756e-05, "loss": 0.6332, "step": 1439 }, { "epoch": 0.12, "grad_norm": 1.0990453693654425, "learning_rate": 1.9605913183306272e-05, "loss": 0.4677, "step": 1440 }, { "epoch": 0.12, "grad_norm": 1.1027567360059916, "learning_rate": 1.9605181116313725e-05, "loss": 0.641, "step": 1441 }, { "epoch": 0.12, "grad_norm": 0.9380109470025033, "learning_rate": 1.9604448383690644e-05, "loss": 0.6192, "step": 1442 }, { "epoch": 0.12, "grad_norm": 0.9869363044160382, "learning_rate": 1.9603714985487813e-05, "loss": 0.6524, "step": 1443 }, { "epoch": 0.12, "grad_norm": 1.013485737719734, "learning_rate": 1.9602980921756046e-05, "loss": 0.5573, "step": 1444 }, { "epoch": 0.12, "grad_norm": 0.9702032466898539, "learning_rate": 1.9602246192546224e-05, "loss": 0.6087, "step": 1445 }, { "epoch": 0.12, "grad_norm": 1.1239818572944578, "learning_rate": 1.9601510797909257e-05, "loss": 0.6198, "step": 1446 }, { "epoch": 0.12, "grad_norm": 1.0349287078717646, "learning_rate": 1.9600774737896106e-05, "loss": 0.6169, "step": 1447 }, { "epoch": 0.12, "grad_norm": 1.0023446170801873, "learning_rate": 1.960003801255778e-05, "loss": 0.5426, "step": 1448 }, { "epoch": 0.12, "grad_norm": 1.0838020795025674, "learning_rate": 1.959930062194534e-05, "loss": 0.6661, "step": 1449 }, { "epoch": 0.12, "grad_norm": 1.1445880774155552, "learning_rate": 1.959856256610988e-05, "loss": 0.6241, "step": 1450 }, { "epoch": 0.12, "grad_norm": 1.0756990406078983, "learning_rate": 1.959782384510255e-05, "loss": 0.596, "step": 1451 }, { "epoch": 0.12, "grad_norm": 1.0238726201838086, "learning_rate": 1.959708445897454e-05, "loss": 0.6427, "step": 1452 }, { "epoch": 0.12, "grad_norm": 1.1307731133424648, "learning_rate": 1.9596344407777085e-05, "loss": 0.643, "step": 1453 }, { "epoch": 0.12, "grad_norm": 1.0285910561385603, "learning_rate": 1.9595603691561477e-05, "loss": 0.6756, "step": 1454 }, { "epoch": 0.12, "grad_norm": 0.9949420844408791, "learning_rate": 1.9594862310379046e-05, "loss": 0.6079, "step": 1455 }, { "epoch": 0.12, "grad_norm": 1.040586161086886, "learning_rate": 1.959412026428117e-05, "loss": 0.5854, "step": 1456 }, { "epoch": 0.12, "grad_norm": 0.9126335616675043, "learning_rate": 1.959337755331926e-05, "loss": 0.5551, "step": 1457 }, { "epoch": 0.12, "grad_norm": 1.0176160660649483, "learning_rate": 1.9592634177544803e-05, "loss": 0.5906, "step": 1458 }, { "epoch": 0.12, "grad_norm": 1.086794686195696, "learning_rate": 1.9591890137009308e-05, "loss": 0.6345, "step": 1459 }, { "epoch": 0.12, "grad_norm": 1.0365211797145388, "learning_rate": 1.9591145431764327e-05, "loss": 0.6051, "step": 1460 }, { "epoch": 0.12, "grad_norm": 1.1849633403345523, "learning_rate": 1.959040006186148e-05, "loss": 0.6648, "step": 1461 }, { "epoch": 0.12, "grad_norm": 1.0349945536963618, "learning_rate": 1.9589654027352412e-05, "loss": 0.6387, "step": 1462 }, { "epoch": 0.12, "grad_norm": 0.9929411170861682, "learning_rate": 1.958890732828883e-05, "loss": 0.5947, "step": 1463 }, { "epoch": 0.12, "grad_norm": 0.9923671261831932, "learning_rate": 1.9588159964722474e-05, "loss": 0.5977, "step": 1464 }, { "epoch": 0.12, "grad_norm": 1.0723403570973749, "learning_rate": 1.9587411936705135e-05, "loss": 0.6061, "step": 1465 }, { "epoch": 0.12, "grad_norm": 1.1309137191054779, "learning_rate": 1.9586663244288655e-05, "loss": 0.6172, "step": 1466 }, { "epoch": 0.12, "grad_norm": 1.0228119546392276, "learning_rate": 1.9585913887524914e-05, "loss": 0.5978, "step": 1467 }, { "epoch": 0.12, "grad_norm": 1.055575387496972, "learning_rate": 1.9585163866465847e-05, "loss": 0.6854, "step": 1468 }, { "epoch": 0.12, "grad_norm": 1.0321171852377182, "learning_rate": 1.958441318116342e-05, "loss": 0.6108, "step": 1469 }, { "epoch": 0.12, "grad_norm": 0.9265607906492737, "learning_rate": 1.9583661831669664e-05, "loss": 0.5967, "step": 1470 }, { "epoch": 0.12, "grad_norm": 1.1081882645274406, "learning_rate": 1.9582909818036648e-05, "loss": 0.5702, "step": 1471 }, { "epoch": 0.12, "grad_norm": 1.03719923918571, "learning_rate": 1.9582157140316472e-05, "loss": 0.5178, "step": 1472 }, { "epoch": 0.12, "grad_norm": 1.119059131475791, "learning_rate": 1.9581403798561314e-05, "loss": 0.5767, "step": 1473 }, { "epoch": 0.12, "grad_norm": 1.0311596675673023, "learning_rate": 1.9580649792823368e-05, "loss": 0.61, "step": 1474 }, { "epoch": 0.12, "grad_norm": 1.0583962918596255, "learning_rate": 1.957989512315489e-05, "loss": 0.579, "step": 1475 }, { "epoch": 0.12, "grad_norm": 1.084334762026287, "learning_rate": 1.957913978960818e-05, "loss": 0.6214, "step": 1476 }, { "epoch": 0.12, "grad_norm": 1.0887905371592597, "learning_rate": 1.9578383792235573e-05, "loss": 0.5538, "step": 1477 }, { "epoch": 0.12, "grad_norm": 1.057229505151521, "learning_rate": 1.957762713108947e-05, "loss": 0.5807, "step": 1478 }, { "epoch": 0.12, "grad_norm": 1.081309748615656, "learning_rate": 1.95768698062223e-05, "loss": 0.7256, "step": 1479 }, { "epoch": 0.12, "grad_norm": 1.0835482753095111, "learning_rate": 1.957611181768655e-05, "loss": 0.6008, "step": 1480 }, { "epoch": 0.12, "grad_norm": 0.9472949645396779, "learning_rate": 1.957535316553474e-05, "loss": 0.6263, "step": 1481 }, { "epoch": 0.12, "grad_norm": 1.0463220262272768, "learning_rate": 1.9574593849819453e-05, "loss": 0.5526, "step": 1482 }, { "epoch": 0.12, "grad_norm": 1.0668226136034622, "learning_rate": 1.9573833870593307e-05, "loss": 0.6201, "step": 1483 }, { "epoch": 0.12, "grad_norm": 1.2624562734003215, "learning_rate": 1.957307322790896e-05, "loss": 0.6532, "step": 1484 }, { "epoch": 0.12, "grad_norm": 1.0860664742619095, "learning_rate": 1.9572311921819135e-05, "loss": 0.5966, "step": 1485 }, { "epoch": 0.12, "grad_norm": 1.0627301848870327, "learning_rate": 1.957154995237658e-05, "loss": 0.656, "step": 1486 }, { "epoch": 0.12, "grad_norm": 1.0151482279119024, "learning_rate": 1.9570787319634107e-05, "loss": 0.6252, "step": 1487 }, { "epoch": 0.12, "grad_norm": 1.0381758566758736, "learning_rate": 1.957002402364456e-05, "loss": 0.5607, "step": 1488 }, { "epoch": 0.12, "grad_norm": 1.318043710299476, "learning_rate": 1.9569260064460837e-05, "loss": 0.7168, "step": 1489 }, { "epoch": 0.12, "grad_norm": 1.0484950881712423, "learning_rate": 1.9568495442135878e-05, "loss": 0.5741, "step": 1490 }, { "epoch": 0.12, "grad_norm": 1.257735140002447, "learning_rate": 1.9567730156722672e-05, "loss": 0.6263, "step": 1491 }, { "epoch": 0.12, "grad_norm": 1.1015010819265607, "learning_rate": 1.9566964208274254e-05, "loss": 0.5963, "step": 1492 }, { "epoch": 0.12, "grad_norm": 0.9913609664622401, "learning_rate": 1.9566197596843702e-05, "loss": 0.5649, "step": 1493 }, { "epoch": 0.12, "grad_norm": 1.0272315395873561, "learning_rate": 1.956543032248414e-05, "loss": 0.5699, "step": 1494 }, { "epoch": 0.12, "grad_norm": 1.0427619537229365, "learning_rate": 1.9564662385248743e-05, "loss": 0.5767, "step": 1495 }, { "epoch": 0.12, "grad_norm": 1.0631391429171742, "learning_rate": 1.9563893785190728e-05, "loss": 0.6465, "step": 1496 }, { "epoch": 0.12, "grad_norm": 0.9870702083748742, "learning_rate": 1.9563124522363357e-05, "loss": 0.6024, "step": 1497 }, { "epoch": 0.12, "grad_norm": 1.2315661829412479, "learning_rate": 1.9562354596819938e-05, "loss": 0.6791, "step": 1498 }, { "epoch": 0.12, "grad_norm": 1.1144355699428936, "learning_rate": 1.9561584008613826e-05, "loss": 0.6348, "step": 1499 }, { "epoch": 0.12, "grad_norm": 1.1130939812444343, "learning_rate": 1.9560812757798423e-05, "loss": 0.6291, "step": 1500 }, { "epoch": 0.12, "grad_norm": 1.2017247690800552, "learning_rate": 1.956004084442718e-05, "loss": 0.6411, "step": 1501 }, { "epoch": 0.12, "grad_norm": 1.126123159735174, "learning_rate": 1.955926826855358e-05, "loss": 0.6034, "step": 1502 }, { "epoch": 0.12, "grad_norm": 1.003296902988338, "learning_rate": 1.9558495030231174e-05, "loss": 0.6248, "step": 1503 }, { "epoch": 0.12, "grad_norm": 0.9657714590592112, "learning_rate": 1.9557721129513538e-05, "loss": 0.5877, "step": 1504 }, { "epoch": 0.12, "grad_norm": 0.9224512012672073, "learning_rate": 1.9556946566454308e-05, "loss": 0.5968, "step": 1505 }, { "epoch": 0.12, "grad_norm": 1.032329308432359, "learning_rate": 1.9556171341107152e-05, "loss": 0.5938, "step": 1506 }, { "epoch": 0.12, "grad_norm": 1.0343115532513394, "learning_rate": 1.9555395453525806e-05, "loss": 0.5759, "step": 1507 }, { "epoch": 0.12, "grad_norm": 1.1010501401367896, "learning_rate": 1.9554618903764026e-05, "loss": 0.6259, "step": 1508 }, { "epoch": 0.12, "grad_norm": 1.0039742973115253, "learning_rate": 1.9553841691875632e-05, "loss": 0.655, "step": 1509 }, { "epoch": 0.12, "grad_norm": 1.100021199901766, "learning_rate": 1.9553063817914482e-05, "loss": 0.4912, "step": 1510 }, { "epoch": 0.12, "grad_norm": 1.0913235913602368, "learning_rate": 1.9552285281934484e-05, "loss": 0.5687, "step": 1511 }, { "epoch": 0.12, "grad_norm": 1.1085906438760087, "learning_rate": 1.9551506083989592e-05, "loss": 0.6476, "step": 1512 }, { "epoch": 0.12, "grad_norm": 1.0239746332752369, "learning_rate": 1.9550726224133795e-05, "loss": 0.6609, "step": 1513 }, { "epoch": 0.12, "grad_norm": 1.0811194986543498, "learning_rate": 1.9549945702421144e-05, "loss": 0.5572, "step": 1514 }, { "epoch": 0.12, "grad_norm": 1.1327547585506779, "learning_rate": 1.9549164518905727e-05, "loss": 0.6541, "step": 1515 }, { "epoch": 0.12, "grad_norm": 1.1449440442891867, "learning_rate": 1.954838267364168e-05, "loss": 0.6387, "step": 1516 }, { "epoch": 0.12, "grad_norm": 1.0080649164608988, "learning_rate": 1.9547600166683184e-05, "loss": 0.6361, "step": 1517 }, { "epoch": 0.12, "grad_norm": 1.0634607963289726, "learning_rate": 1.954681699808446e-05, "loss": 0.58, "step": 1518 }, { "epoch": 0.12, "grad_norm": 1.0450288464842377, "learning_rate": 1.9546033167899788e-05, "loss": 0.6228, "step": 1519 }, { "epoch": 0.12, "grad_norm": 0.9794147398904083, "learning_rate": 1.9545248676183486e-05, "loss": 0.646, "step": 1520 }, { "epoch": 0.12, "grad_norm": 0.9439257970714194, "learning_rate": 1.9544463522989917e-05, "loss": 0.6007, "step": 1521 }, { "epoch": 0.12, "grad_norm": 0.9593080154837881, "learning_rate": 1.9543677708373496e-05, "loss": 0.5715, "step": 1522 }, { "epoch": 0.12, "grad_norm": 1.0550240084527236, "learning_rate": 1.954289123238867e-05, "loss": 0.698, "step": 1523 }, { "epoch": 0.12, "grad_norm": 1.010980510854376, "learning_rate": 1.9542104095089946e-05, "loss": 0.5256, "step": 1524 }, { "epoch": 0.12, "grad_norm": 1.0780155128709732, "learning_rate": 1.9541316296531875e-05, "loss": 0.6156, "step": 1525 }, { "epoch": 0.12, "grad_norm": 1.0783401810137738, "learning_rate": 1.9540527836769047e-05, "loss": 0.6011, "step": 1526 }, { "epoch": 0.12, "grad_norm": 1.0220845234322393, "learning_rate": 1.95397387158561e-05, "loss": 0.6436, "step": 1527 }, { "epoch": 0.12, "grad_norm": 1.006269151060652, "learning_rate": 1.9538948933847727e-05, "loss": 0.5765, "step": 1528 }, { "epoch": 0.12, "grad_norm": 1.0502520128791262, "learning_rate": 1.953815849079865e-05, "loss": 0.5935, "step": 1529 }, { "epoch": 0.12, "grad_norm": 1.0099151207552917, "learning_rate": 1.953736738676365e-05, "loss": 0.5783, "step": 1530 }, { "epoch": 0.12, "grad_norm": 1.0115633381309157, "learning_rate": 1.9536575621797546e-05, "loss": 0.7009, "step": 1531 }, { "epoch": 0.12, "grad_norm": 1.0314261395753932, "learning_rate": 1.9535783195955215e-05, "loss": 0.625, "step": 1532 }, { "epoch": 0.12, "grad_norm": 1.0305413122838198, "learning_rate": 1.9534990109291568e-05, "loss": 0.6119, "step": 1533 }, { "epoch": 0.12, "grad_norm": 1.0589889413813205, "learning_rate": 1.953419636186156e-05, "loss": 0.6151, "step": 1534 }, { "epoch": 0.12, "grad_norm": 1.0537572089897898, "learning_rate": 1.9533401953720204e-05, "loss": 0.732, "step": 1535 }, { "epoch": 0.12, "grad_norm": 1.1561489357870232, "learning_rate": 1.9532606884922547e-05, "loss": 0.6193, "step": 1536 }, { "epoch": 0.12, "grad_norm": 1.036129485903809, "learning_rate": 1.953181115552369e-05, "loss": 0.6357, "step": 1537 }, { "epoch": 0.12, "grad_norm": 0.9621713367722898, "learning_rate": 1.9531014765578774e-05, "loss": 0.5871, "step": 1538 }, { "epoch": 0.13, "grad_norm": 1.094823343969492, "learning_rate": 1.9530217715142987e-05, "loss": 0.6485, "step": 1539 }, { "epoch": 0.13, "grad_norm": 0.9785272655837435, "learning_rate": 1.9529420004271568e-05, "loss": 0.5039, "step": 1540 }, { "epoch": 0.13, "grad_norm": 1.083586772678874, "learning_rate": 1.9528621633019792e-05, "loss": 0.6447, "step": 1541 }, { "epoch": 0.13, "grad_norm": 1.0002925976491284, "learning_rate": 1.952782260144299e-05, "loss": 0.536, "step": 1542 }, { "epoch": 0.13, "grad_norm": 0.9262307923258066, "learning_rate": 1.9527022909596537e-05, "loss": 0.54, "step": 1543 }, { "epoch": 0.13, "grad_norm": 0.9800418846378972, "learning_rate": 1.9526222557535842e-05, "loss": 0.6442, "step": 1544 }, { "epoch": 0.13, "grad_norm": 0.9628400126389941, "learning_rate": 1.9525421545316378e-05, "loss": 0.5566, "step": 1545 }, { "epoch": 0.13, "grad_norm": 0.9428595296385236, "learning_rate": 1.9524619872993648e-05, "loss": 0.5485, "step": 1546 }, { "epoch": 0.13, "grad_norm": 1.1165341782891196, "learning_rate": 1.9523817540623208e-05, "loss": 0.5906, "step": 1547 }, { "epoch": 0.13, "grad_norm": 1.0112460723323362, "learning_rate": 1.9523014548260657e-05, "loss": 0.635, "step": 1548 }, { "epoch": 0.13, "grad_norm": 1.0185326086531783, "learning_rate": 1.9522210895961648e-05, "loss": 0.5875, "step": 1549 }, { "epoch": 0.13, "grad_norm": 1.00351955353462, "learning_rate": 1.9521406583781872e-05, "loss": 0.6074, "step": 1550 }, { "epoch": 0.13, "grad_norm": 1.0794945145826804, "learning_rate": 1.9520601611777065e-05, "loss": 0.6069, "step": 1551 }, { "epoch": 0.13, "grad_norm": 0.9679784995016987, "learning_rate": 1.9519795980003007e-05, "loss": 0.614, "step": 1552 }, { "epoch": 0.13, "grad_norm": 1.1043952426403454, "learning_rate": 1.9518989688515533e-05, "loss": 0.5914, "step": 1553 }, { "epoch": 0.13, "grad_norm": 1.1527097920161586, "learning_rate": 1.9518182737370515e-05, "loss": 0.7452, "step": 1554 }, { "epoch": 0.13, "grad_norm": 1.1059153106910997, "learning_rate": 1.9517375126623882e-05, "loss": 0.6206, "step": 1555 }, { "epoch": 0.13, "grad_norm": 0.9977261438193731, "learning_rate": 1.9516566856331593e-05, "loss": 0.6321, "step": 1556 }, { "epoch": 0.13, "grad_norm": 1.0201871612144144, "learning_rate": 1.951575792654966e-05, "loss": 0.5564, "step": 1557 }, { "epoch": 0.13, "grad_norm": 0.9601699304044047, "learning_rate": 1.9514948337334144e-05, "loss": 0.4204, "step": 1558 }, { "epoch": 0.13, "grad_norm": 0.9655527593749033, "learning_rate": 1.9514138088741146e-05, "loss": 0.5994, "step": 1559 }, { "epoch": 0.13, "grad_norm": 1.021938738185002, "learning_rate": 1.951332718082682e-05, "loss": 0.6092, "step": 1560 }, { "epoch": 0.13, "grad_norm": 1.1164618252129554, "learning_rate": 1.9512515613647358e-05, "loss": 0.6469, "step": 1561 }, { "epoch": 0.13, "grad_norm": 0.9661864554484725, "learning_rate": 1.9511703387259e-05, "loss": 0.543, "step": 1562 }, { "epoch": 0.13, "grad_norm": 0.9647944075014523, "learning_rate": 1.9510890501718037e-05, "loss": 0.5907, "step": 1563 }, { "epoch": 0.13, "grad_norm": 1.0424734503862871, "learning_rate": 1.95100769570808e-05, "loss": 0.6481, "step": 1564 }, { "epoch": 0.13, "grad_norm": 1.0588923703277024, "learning_rate": 1.9509262753403656e-05, "loss": 0.65, "step": 1565 }, { "epoch": 0.13, "grad_norm": 0.8810729326303413, "learning_rate": 1.950844789074305e-05, "loss": 0.5341, "step": 1566 }, { "epoch": 0.13, "grad_norm": 1.0265294325695518, "learning_rate": 1.950763236915543e-05, "loss": 0.6191, "step": 1567 }, { "epoch": 0.13, "grad_norm": 1.059898351367038, "learning_rate": 1.9506816188697322e-05, "loss": 0.6289, "step": 1568 }, { "epoch": 0.13, "grad_norm": 1.0009322743834865, "learning_rate": 1.950599934942529e-05, "loss": 0.5897, "step": 1569 }, { "epoch": 0.13, "grad_norm": 1.061526486833266, "learning_rate": 1.9505181851395928e-05, "loss": 0.6192, "step": 1570 }, { "epoch": 0.13, "grad_norm": 1.081852926219257, "learning_rate": 1.9504363694665897e-05, "loss": 0.6464, "step": 1571 }, { "epoch": 0.13, "grad_norm": 1.1260267561080988, "learning_rate": 1.9503544879291893e-05, "loss": 0.6535, "step": 1572 }, { "epoch": 0.13, "grad_norm": 1.1258767221569566, "learning_rate": 1.950272540533066e-05, "loss": 0.6928, "step": 1573 }, { "epoch": 0.13, "grad_norm": 1.0346108077510237, "learning_rate": 1.9501905272838983e-05, "loss": 0.5869, "step": 1574 }, { "epoch": 0.13, "grad_norm": 1.149899484431447, "learning_rate": 1.95010844818737e-05, "loss": 0.664, "step": 1575 }, { "epoch": 0.13, "grad_norm": 1.1020327223722928, "learning_rate": 1.9500263032491688e-05, "loss": 0.6623, "step": 1576 }, { "epoch": 0.13, "grad_norm": 1.0297737960298794, "learning_rate": 1.9499440924749878e-05, "loss": 0.6437, "step": 1577 }, { "epoch": 0.13, "grad_norm": 0.9343334803708131, "learning_rate": 1.9498618158705235e-05, "loss": 0.6093, "step": 1578 }, { "epoch": 0.13, "grad_norm": 1.0287446925236747, "learning_rate": 1.9497794734414782e-05, "loss": 0.5599, "step": 1579 }, { "epoch": 0.13, "grad_norm": 1.0489118928687258, "learning_rate": 1.9496970651935575e-05, "loss": 0.6262, "step": 1580 }, { "epoch": 0.13, "grad_norm": 1.1295782786398594, "learning_rate": 1.9496145911324724e-05, "loss": 0.6088, "step": 1581 }, { "epoch": 0.13, "grad_norm": 1.042843824083153, "learning_rate": 1.949532051263939e-05, "loss": 0.5938, "step": 1582 }, { "epoch": 0.13, "grad_norm": 1.1030489445427343, "learning_rate": 1.9494494455936763e-05, "loss": 0.6502, "step": 1583 }, { "epoch": 0.13, "grad_norm": 1.0155020298359885, "learning_rate": 1.9493667741274093e-05, "loss": 0.5839, "step": 1584 }, { "epoch": 0.13, "grad_norm": 1.065572552200458, "learning_rate": 1.9492840368708668e-05, "loss": 0.5917, "step": 1585 }, { "epoch": 0.13, "grad_norm": 1.311197032426637, "learning_rate": 1.949201233829783e-05, "loss": 0.5324, "step": 1586 }, { "epoch": 0.13, "grad_norm": 3.3655083579169545, "learning_rate": 1.9491183650098953e-05, "loss": 0.5474, "step": 1587 }, { "epoch": 0.13, "grad_norm": 1.179113938400328, "learning_rate": 1.9490354304169467e-05, "loss": 0.6661, "step": 1588 }, { "epoch": 0.13, "grad_norm": 1.0957183409700058, "learning_rate": 1.9489524300566845e-05, "loss": 0.6467, "step": 1589 }, { "epoch": 0.13, "grad_norm": 1.051786424467458, "learning_rate": 1.948869363934861e-05, "loss": 0.6013, "step": 1590 }, { "epoch": 0.13, "grad_norm": 1.1924075929286637, "learning_rate": 1.948786232057232e-05, "loss": 0.6942, "step": 1591 }, { "epoch": 0.13, "grad_norm": 1.0584652636189806, "learning_rate": 1.9487030344295586e-05, "loss": 0.6294, "step": 1592 }, { "epoch": 0.13, "grad_norm": 0.9447168266155662, "learning_rate": 1.9486197710576063e-05, "loss": 0.5499, "step": 1593 }, { "epoch": 0.13, "grad_norm": 0.972937373205438, "learning_rate": 1.9485364419471454e-05, "loss": 0.664, "step": 1594 }, { "epoch": 0.13, "grad_norm": 0.9792901817040632, "learning_rate": 1.948453047103951e-05, "loss": 0.5629, "step": 1595 }, { "epoch": 0.13, "grad_norm": 1.0196538962335693, "learning_rate": 1.948369586533801e-05, "loss": 0.61, "step": 1596 }, { "epoch": 0.13, "grad_norm": 1.0479317330475613, "learning_rate": 1.94828606024248e-05, "loss": 0.6615, "step": 1597 }, { "epoch": 0.13, "grad_norm": 1.0404865485674981, "learning_rate": 1.948202468235776e-05, "loss": 0.6868, "step": 1598 }, { "epoch": 0.13, "grad_norm": 0.9845855698699403, "learning_rate": 1.9481188105194827e-05, "loss": 0.6699, "step": 1599 }, { "epoch": 0.13, "grad_norm": 1.0453875361089522, "learning_rate": 1.948035087099396e-05, "loss": 0.6004, "step": 1600 }, { "epoch": 0.13, "grad_norm": 0.9423906166078881, "learning_rate": 1.9479512979813193e-05, "loss": 0.5634, "step": 1601 }, { "epoch": 0.13, "grad_norm": 1.0283084189763398, "learning_rate": 1.947867443171058e-05, "loss": 0.5786, "step": 1602 }, { "epoch": 0.13, "grad_norm": 1.141933735094667, "learning_rate": 1.9477835226744243e-05, "loss": 0.5785, "step": 1603 }, { "epoch": 0.13, "grad_norm": 1.1207636909607657, "learning_rate": 1.9476995364972327e-05, "loss": 0.6612, "step": 1604 }, { "epoch": 0.13, "grad_norm": 1.1185052250128316, "learning_rate": 1.9476154846453037e-05, "loss": 0.5691, "step": 1605 }, { "epoch": 0.13, "grad_norm": 1.0395024380287932, "learning_rate": 1.9475313671244624e-05, "loss": 0.6653, "step": 1606 }, { "epoch": 0.13, "grad_norm": 1.1227322427576973, "learning_rate": 1.9474471839405377e-05, "loss": 0.6205, "step": 1607 }, { "epoch": 0.13, "grad_norm": 1.0231221460248499, "learning_rate": 1.9473629350993633e-05, "loss": 0.5498, "step": 1608 }, { "epoch": 0.13, "grad_norm": 0.9523212008119725, "learning_rate": 1.947278620606778e-05, "loss": 0.5849, "step": 1609 }, { "epoch": 0.13, "grad_norm": 1.094776684086471, "learning_rate": 1.9471942404686247e-05, "loss": 0.6354, "step": 1610 }, { "epoch": 0.13, "grad_norm": 1.0211375159615892, "learning_rate": 1.9471097946907506e-05, "loss": 0.6031, "step": 1611 }, { "epoch": 0.13, "grad_norm": 1.2412258044393085, "learning_rate": 1.947025283279008e-05, "loss": 0.6913, "step": 1612 }, { "epoch": 0.13, "grad_norm": 0.9912415664046444, "learning_rate": 1.9469407062392528e-05, "loss": 0.5713, "step": 1613 }, { "epoch": 0.13, "grad_norm": 0.9588341015952317, "learning_rate": 1.946856063577347e-05, "loss": 0.5535, "step": 1614 }, { "epoch": 0.13, "grad_norm": 1.0301367696445638, "learning_rate": 1.9467713552991557e-05, "loss": 0.6127, "step": 1615 }, { "epoch": 0.13, "grad_norm": 0.9800192103314544, "learning_rate": 1.9466865814105493e-05, "loss": 0.6247, "step": 1616 }, { "epoch": 0.13, "grad_norm": 0.9889924807240417, "learning_rate": 1.9466017419174027e-05, "loss": 0.5599, "step": 1617 }, { "epoch": 0.13, "grad_norm": 1.0075065416062952, "learning_rate": 1.9465168368255946e-05, "loss": 0.6166, "step": 1618 }, { "epoch": 0.13, "grad_norm": 1.0671090763975322, "learning_rate": 1.9464318661410097e-05, "loss": 0.6176, "step": 1619 }, { "epoch": 0.13, "grad_norm": 1.053639753510594, "learning_rate": 1.9463468298695357e-05, "loss": 0.6008, "step": 1620 }, { "epoch": 0.13, "grad_norm": 1.043584000165112, "learning_rate": 1.9462617280170657e-05, "loss": 0.5603, "step": 1621 }, { "epoch": 0.13, "grad_norm": 0.9855988604094579, "learning_rate": 1.9461765605894974e-05, "loss": 0.6136, "step": 1622 }, { "epoch": 0.13, "grad_norm": 0.9785494268307853, "learning_rate": 1.9460913275927326e-05, "loss": 0.5665, "step": 1623 }, { "epoch": 0.13, "grad_norm": 0.9185300086052195, "learning_rate": 1.9460060290326784e-05, "loss": 0.526, "step": 1624 }, { "epoch": 0.13, "grad_norm": 0.9419639343149278, "learning_rate": 1.9459206649152452e-05, "loss": 0.5603, "step": 1625 }, { "epoch": 0.13, "grad_norm": 1.0604969546819052, "learning_rate": 1.945835235246349e-05, "loss": 0.6027, "step": 1626 }, { "epoch": 0.13, "grad_norm": 0.9976023956722005, "learning_rate": 1.9457497400319097e-05, "loss": 0.6019, "step": 1627 }, { "epoch": 0.13, "grad_norm": 1.0103924418233958, "learning_rate": 1.9456641792778527e-05, "loss": 0.565, "step": 1628 }, { "epoch": 0.13, "grad_norm": 1.0289771525148863, "learning_rate": 1.9455785529901064e-05, "loss": 0.6336, "step": 1629 }, { "epoch": 0.13, "grad_norm": 0.958196944777394, "learning_rate": 1.945492861174606e-05, "loss": 0.4424, "step": 1630 }, { "epoch": 0.13, "grad_norm": 1.036359306845461, "learning_rate": 1.945407103837288e-05, "loss": 0.6501, "step": 1631 }, { "epoch": 0.13, "grad_norm": 1.0229971933809134, "learning_rate": 1.9453212809840965e-05, "loss": 0.6873, "step": 1632 }, { "epoch": 0.13, "grad_norm": 0.997114764638409, "learning_rate": 1.945235392620979e-05, "loss": 0.4905, "step": 1633 }, { "epoch": 0.13, "grad_norm": 0.9633455772418739, "learning_rate": 1.9451494387538873e-05, "loss": 0.5667, "step": 1634 }, { "epoch": 0.13, "grad_norm": 1.0761532151031492, "learning_rate": 1.9450634193887776e-05, "loss": 0.6529, "step": 1635 }, { "epoch": 0.13, "grad_norm": 1.1922896878104507, "learning_rate": 1.9449773345316113e-05, "loss": 0.6218, "step": 1636 }, { "epoch": 0.13, "grad_norm": 1.000698930279899, "learning_rate": 1.944891184188354e-05, "loss": 0.5953, "step": 1637 }, { "epoch": 0.13, "grad_norm": 1.0527551660365246, "learning_rate": 1.9448049683649753e-05, "loss": 0.6841, "step": 1638 }, { "epoch": 0.13, "grad_norm": 1.1807838377158926, "learning_rate": 1.9447186870674505e-05, "loss": 0.6784, "step": 1639 }, { "epoch": 0.13, "grad_norm": 1.2843482015093421, "learning_rate": 1.944632340301759e-05, "loss": 0.5106, "step": 1640 }, { "epoch": 0.13, "grad_norm": 0.9922564540621734, "learning_rate": 1.9445459280738838e-05, "loss": 0.5037, "step": 1641 }, { "epoch": 0.13, "grad_norm": 0.952673106460145, "learning_rate": 1.944459450389814e-05, "loss": 0.5759, "step": 1642 }, { "epoch": 0.13, "grad_norm": 1.0789735287269921, "learning_rate": 1.9443729072555417e-05, "loss": 0.5833, "step": 1643 }, { "epoch": 0.13, "grad_norm": 0.9929128562914025, "learning_rate": 1.9442862986770645e-05, "loss": 0.6195, "step": 1644 }, { "epoch": 0.13, "grad_norm": 0.9939666202490641, "learning_rate": 1.9441996246603848e-05, "loss": 0.6348, "step": 1645 }, { "epoch": 0.13, "grad_norm": 0.9146036701079484, "learning_rate": 1.9441128852115083e-05, "loss": 0.611, "step": 1646 }, { "epoch": 0.13, "grad_norm": 0.9358084165527047, "learning_rate": 1.9440260803364463e-05, "loss": 0.603, "step": 1647 }, { "epoch": 0.13, "grad_norm": 0.9996578346711003, "learning_rate": 1.9439392100412145e-05, "loss": 0.5301, "step": 1648 }, { "epoch": 0.13, "grad_norm": 1.1106725835561553, "learning_rate": 1.9438522743318327e-05, "loss": 0.581, "step": 1649 }, { "epoch": 0.13, "grad_norm": 1.0223341153440686, "learning_rate": 1.9437652732143252e-05, "loss": 0.594, "step": 1650 }, { "epoch": 0.13, "grad_norm": 1.061253275018964, "learning_rate": 1.9436782066947215e-05, "loss": 0.5873, "step": 1651 }, { "epoch": 0.13, "grad_norm": 1.0961476565306238, "learning_rate": 1.943591074779055e-05, "loss": 0.6504, "step": 1652 }, { "epoch": 0.13, "grad_norm": 1.1131513809579108, "learning_rate": 1.9435038774733644e-05, "loss": 0.6113, "step": 1653 }, { "epoch": 0.13, "grad_norm": 0.9959591458602137, "learning_rate": 1.9434166147836917e-05, "loss": 0.6193, "step": 1654 }, { "epoch": 0.13, "grad_norm": 1.0898356458340683, "learning_rate": 1.9433292867160843e-05, "loss": 0.6632, "step": 1655 }, { "epoch": 0.13, "grad_norm": 1.012741168027966, "learning_rate": 1.9432418932765942e-05, "loss": 0.6281, "step": 1656 }, { "epoch": 0.13, "grad_norm": 0.9008564402084792, "learning_rate": 1.9431544344712776e-05, "loss": 0.5148, "step": 1657 }, { "epoch": 0.13, "grad_norm": 0.9579588034766762, "learning_rate": 1.9430669103061953e-05, "loss": 0.5577, "step": 1658 }, { "epoch": 0.13, "grad_norm": 0.9758486715067173, "learning_rate": 1.9429793207874126e-05, "loss": 0.5811, "step": 1659 }, { "epoch": 0.13, "grad_norm": 0.9899522905676923, "learning_rate": 1.9428916659209995e-05, "loss": 0.6214, "step": 1660 }, { "epoch": 0.13, "grad_norm": 0.9120782629931777, "learning_rate": 1.94280394571303e-05, "loss": 0.5678, "step": 1661 }, { "epoch": 0.14, "grad_norm": 0.9752034280998241, "learning_rate": 1.9427161601695833e-05, "loss": 0.6131, "step": 1662 }, { "epoch": 0.14, "grad_norm": 1.0109589990611225, "learning_rate": 1.942628309296743e-05, "loss": 0.5384, "step": 1663 }, { "epoch": 0.14, "grad_norm": 1.1014070080522274, "learning_rate": 1.9425403931005968e-05, "loss": 0.677, "step": 1664 }, { "epoch": 0.14, "grad_norm": 1.009005362126033, "learning_rate": 1.9424524115872375e-05, "loss": 0.6068, "step": 1665 }, { "epoch": 0.14, "grad_norm": 1.008399993710228, "learning_rate": 1.9423643647627625e-05, "loss": 0.6021, "step": 1666 }, { "epoch": 0.14, "grad_norm": 0.9525007427813187, "learning_rate": 1.9422762526332723e-05, "loss": 0.5691, "step": 1667 }, { "epoch": 0.14, "grad_norm": 1.0926859628696797, "learning_rate": 1.942188075204874e-05, "loss": 0.6686, "step": 1668 }, { "epoch": 0.14, "grad_norm": 1.0722394254713838, "learning_rate": 1.9420998324836777e-05, "loss": 0.5931, "step": 1669 }, { "epoch": 0.14, "grad_norm": 0.9650163359668696, "learning_rate": 1.9420115244757985e-05, "loss": 0.6437, "step": 1670 }, { "epoch": 0.14, "grad_norm": 1.017397111933101, "learning_rate": 1.941923151187356e-05, "loss": 0.5942, "step": 1671 }, { "epoch": 0.14, "grad_norm": 1.0942653548901369, "learning_rate": 1.9418347126244754e-05, "loss": 0.6195, "step": 1672 }, { "epoch": 0.14, "grad_norm": 1.0256751446156467, "learning_rate": 1.941746208793284e-05, "loss": 0.5689, "step": 1673 }, { "epoch": 0.14, "grad_norm": 1.0420200004454279, "learning_rate": 1.9416576396999156e-05, "loss": 0.5454, "step": 1674 }, { "epoch": 0.14, "grad_norm": 1.1282627502666678, "learning_rate": 1.941569005350508e-05, "loss": 0.6112, "step": 1675 }, { "epoch": 0.14, "grad_norm": 1.1223142256728877, "learning_rate": 1.941480305751204e-05, "loss": 0.5911, "step": 1676 }, { "epoch": 0.14, "grad_norm": 0.9574998557547988, "learning_rate": 1.9413915409081496e-05, "loss": 0.5814, "step": 1677 }, { "epoch": 0.14, "grad_norm": 0.9789856639171853, "learning_rate": 1.9413027108274964e-05, "loss": 0.6239, "step": 1678 }, { "epoch": 0.14, "grad_norm": 0.9079658859319869, "learning_rate": 1.9412138155154e-05, "loss": 0.5424, "step": 1679 }, { "epoch": 0.14, "grad_norm": 0.997675552438307, "learning_rate": 1.941124854978022e-05, "loss": 0.5783, "step": 1680 }, { "epoch": 0.14, "grad_norm": 1.0240956770351866, "learning_rate": 1.9410358292215252e-05, "loss": 0.68, "step": 1681 }, { "epoch": 0.14, "grad_norm": 1.0531747697737566, "learning_rate": 1.9409467382520805e-05, "loss": 0.5563, "step": 1682 }, { "epoch": 0.14, "grad_norm": 1.0106078332022375, "learning_rate": 1.9408575820758616e-05, "loss": 0.6239, "step": 1683 }, { "epoch": 0.14, "grad_norm": 1.1336686212019484, "learning_rate": 1.940768360699047e-05, "loss": 0.5812, "step": 1684 }, { "epoch": 0.14, "grad_norm": 0.9427888254007594, "learning_rate": 1.9406790741278188e-05, "loss": 0.6002, "step": 1685 }, { "epoch": 0.14, "grad_norm": 1.0708900224334923, "learning_rate": 1.940589722368366e-05, "loss": 0.6759, "step": 1686 }, { "epoch": 0.14, "grad_norm": 1.0544731535821026, "learning_rate": 1.940500305426879e-05, "loss": 0.5946, "step": 1687 }, { "epoch": 0.14, "grad_norm": 1.000572268213837, "learning_rate": 1.9404108233095557e-05, "loss": 0.5565, "step": 1688 }, { "epoch": 0.14, "grad_norm": 0.9469861618458745, "learning_rate": 1.940321276022596e-05, "loss": 0.6116, "step": 1689 }, { "epoch": 0.14, "grad_norm": 0.984815886481496, "learning_rate": 1.9402316635722062e-05, "loss": 0.6322, "step": 1690 }, { "epoch": 0.14, "grad_norm": 1.0360745957580155, "learning_rate": 1.9401419859645958e-05, "loss": 0.5955, "step": 1691 }, { "epoch": 0.14, "grad_norm": 1.0390331354965414, "learning_rate": 1.9400522432059802e-05, "loss": 0.4928, "step": 1692 }, { "epoch": 0.14, "grad_norm": 1.095133833358097, "learning_rate": 1.9399624353025774e-05, "loss": 0.6053, "step": 1693 }, { "epoch": 0.14, "grad_norm": 1.0481745310455743, "learning_rate": 1.939872562260612e-05, "loss": 0.5901, "step": 1694 }, { "epoch": 0.14, "grad_norm": 0.9635325259605018, "learning_rate": 1.9397826240863113e-05, "loss": 0.5477, "step": 1695 }, { "epoch": 0.14, "grad_norm": 1.056255123418143, "learning_rate": 1.9396926207859085e-05, "loss": 0.5865, "step": 1696 }, { "epoch": 0.14, "grad_norm": 0.8758001421262852, "learning_rate": 1.939602552365641e-05, "loss": 0.5967, "step": 1697 }, { "epoch": 0.14, "grad_norm": 1.0623347770055493, "learning_rate": 1.9395124188317493e-05, "loss": 0.5864, "step": 1698 }, { "epoch": 0.14, "grad_norm": 1.046069421727186, "learning_rate": 1.9394222201904806e-05, "loss": 0.6322, "step": 1699 }, { "epoch": 0.14, "grad_norm": 1.0635388569825572, "learning_rate": 1.9393319564480854e-05, "loss": 0.5859, "step": 1700 }, { "epoch": 0.14, "grad_norm": 1.0396994085777351, "learning_rate": 1.9392416276108192e-05, "loss": 0.6369, "step": 1701 }, { "epoch": 0.14, "grad_norm": 1.069197810429597, "learning_rate": 1.9391512336849406e-05, "loss": 0.6187, "step": 1702 }, { "epoch": 0.14, "grad_norm": 0.9948691983607679, "learning_rate": 1.939060774676715e-05, "loss": 0.6283, "step": 1703 }, { "epoch": 0.14, "grad_norm": 0.9369154927339403, "learning_rate": 1.9389702505924106e-05, "loss": 0.5582, "step": 1704 }, { "epoch": 0.14, "grad_norm": 0.9110705801015708, "learning_rate": 1.9388796614383008e-05, "loss": 0.566, "step": 1705 }, { "epoch": 0.14, "grad_norm": 0.9312148329941633, "learning_rate": 1.938789007220663e-05, "loss": 0.5327, "step": 1706 }, { "epoch": 0.14, "grad_norm": 0.9391005657013416, "learning_rate": 1.9386982879457795e-05, "loss": 0.5459, "step": 1707 }, { "epoch": 0.14, "grad_norm": 1.0106676478778127, "learning_rate": 1.9386075036199378e-05, "loss": 0.5732, "step": 1708 }, { "epoch": 0.14, "grad_norm": 1.1020962008149404, "learning_rate": 1.938516654249428e-05, "loss": 0.6255, "step": 1709 }, { "epoch": 0.14, "grad_norm": 1.161045643502798, "learning_rate": 1.9384257398405473e-05, "loss": 0.6387, "step": 1710 }, { "epoch": 0.14, "grad_norm": 1.0899649970282215, "learning_rate": 1.938334760399595e-05, "loss": 0.6095, "step": 1711 }, { "epoch": 0.14, "grad_norm": 0.9554662845585481, "learning_rate": 1.9382437159328758e-05, "loss": 0.5472, "step": 1712 }, { "epoch": 0.14, "grad_norm": 1.0773572457764136, "learning_rate": 1.9381526064466995e-05, "loss": 0.4818, "step": 1713 }, { "epoch": 0.14, "grad_norm": 0.9980357980012017, "learning_rate": 1.9380614319473798e-05, "loss": 0.5929, "step": 1714 }, { "epoch": 0.14, "grad_norm": 1.0965656273453313, "learning_rate": 1.9379701924412344e-05, "loss": 0.6584, "step": 1715 }, { "epoch": 0.14, "grad_norm": 0.9565851250803735, "learning_rate": 1.937878887934587e-05, "loss": 0.6129, "step": 1716 }, { "epoch": 0.14, "grad_norm": 0.9284045836171315, "learning_rate": 1.9377875184337647e-05, "loss": 0.5907, "step": 1717 }, { "epoch": 0.14, "grad_norm": 1.1134948767026893, "learning_rate": 1.9376960839450988e-05, "loss": 0.5701, "step": 1718 }, { "epoch": 0.14, "grad_norm": 0.9792430599995569, "learning_rate": 1.9376045844749267e-05, "loss": 0.6148, "step": 1719 }, { "epoch": 0.14, "grad_norm": 0.9237201545998925, "learning_rate": 1.937513020029588e-05, "loss": 0.5524, "step": 1720 }, { "epoch": 0.14, "grad_norm": 0.9695000739742969, "learning_rate": 1.9374213906154286e-05, "loss": 0.6064, "step": 1721 }, { "epoch": 0.14, "grad_norm": 1.0509066073915099, "learning_rate": 1.9373296962387988e-05, "loss": 0.5842, "step": 1722 }, { "epoch": 0.14, "grad_norm": 1.0137941184215924, "learning_rate": 1.937237936906052e-05, "loss": 0.5337, "step": 1723 }, { "epoch": 0.14, "grad_norm": 1.0478769449551844, "learning_rate": 1.9371461126235474e-05, "loss": 0.6209, "step": 1724 }, { "epoch": 0.14, "grad_norm": 1.0231670900137617, "learning_rate": 1.937054223397649e-05, "loss": 0.6478, "step": 1725 }, { "epoch": 0.14, "grad_norm": 1.0087171579611938, "learning_rate": 1.9369622692347233e-05, "loss": 0.5918, "step": 1726 }, { "epoch": 0.14, "grad_norm": 1.0541550223771612, "learning_rate": 1.936870250141144e-05, "loss": 0.7013, "step": 1727 }, { "epoch": 0.14, "grad_norm": 1.0331248553066827, "learning_rate": 1.936778166123287e-05, "loss": 0.5751, "step": 1728 }, { "epoch": 0.14, "grad_norm": 1.0075349881213924, "learning_rate": 1.9366860171875345e-05, "loss": 0.5509, "step": 1729 }, { "epoch": 0.14, "grad_norm": 0.975520975539256, "learning_rate": 1.9365938033402715e-05, "loss": 0.5705, "step": 1730 }, { "epoch": 0.14, "grad_norm": 1.0499924966908039, "learning_rate": 1.9365015245878892e-05, "loss": 0.5684, "step": 1731 }, { "epoch": 0.14, "grad_norm": 0.9711685551889372, "learning_rate": 1.936409180936781e-05, "loss": 0.6005, "step": 1732 }, { "epoch": 0.14, "grad_norm": 1.052822654931391, "learning_rate": 1.9363167723933477e-05, "loss": 0.6444, "step": 1733 }, { "epoch": 0.14, "grad_norm": 0.9489608663159473, "learning_rate": 1.9362242989639926e-05, "loss": 0.518, "step": 1734 }, { "epoch": 0.14, "grad_norm": 1.0523021470731204, "learning_rate": 1.936131760655124e-05, "loss": 0.506, "step": 1735 }, { "epoch": 0.14, "grad_norm": 0.9665533177577489, "learning_rate": 1.9360391574731547e-05, "loss": 0.5292, "step": 1736 }, { "epoch": 0.14, "grad_norm": 1.005596907178615, "learning_rate": 1.935946489424502e-05, "loss": 0.6312, "step": 1737 }, { "epoch": 0.14, "grad_norm": 1.0262942511052289, "learning_rate": 1.935853756515588e-05, "loss": 0.6343, "step": 1738 }, { "epoch": 0.14, "grad_norm": 1.0438665597916685, "learning_rate": 1.9357609587528385e-05, "loss": 0.6709, "step": 1739 }, { "epoch": 0.14, "grad_norm": 1.0675839441468322, "learning_rate": 1.9356680961426847e-05, "loss": 0.6619, "step": 1740 }, { "epoch": 0.14, "grad_norm": 1.0499414830717126, "learning_rate": 1.9355751686915617e-05, "loss": 0.6456, "step": 1741 }, { "epoch": 0.14, "grad_norm": 0.9769748381304898, "learning_rate": 1.9354821764059094e-05, "loss": 0.6537, "step": 1742 }, { "epoch": 0.14, "grad_norm": 0.9619271758171696, "learning_rate": 1.935389119292172e-05, "loss": 0.6137, "step": 1743 }, { "epoch": 0.14, "grad_norm": 1.0273752317519276, "learning_rate": 1.9352959973567984e-05, "loss": 0.6247, "step": 1744 }, { "epoch": 0.14, "grad_norm": 1.0540513963093687, "learning_rate": 1.9352028106062417e-05, "loss": 0.6241, "step": 1745 }, { "epoch": 0.14, "grad_norm": 1.019786109985117, "learning_rate": 1.9351095590469596e-05, "loss": 0.5784, "step": 1746 }, { "epoch": 0.14, "grad_norm": 1.1593224796693433, "learning_rate": 1.9350162426854152e-05, "loss": 0.6446, "step": 1747 }, { "epoch": 0.14, "grad_norm": 1.003402218113443, "learning_rate": 1.9349228615280736e-05, "loss": 0.6559, "step": 1748 }, { "epoch": 0.14, "grad_norm": 0.9323779466330664, "learning_rate": 1.9348294155814078e-05, "loss": 0.5848, "step": 1749 }, { "epoch": 0.14, "grad_norm": 0.9649266600056559, "learning_rate": 1.934735904851892e-05, "loss": 0.5913, "step": 1750 }, { "epoch": 0.14, "grad_norm": 1.0189695057359012, "learning_rate": 1.9346423293460078e-05, "loss": 0.6156, "step": 1751 }, { "epoch": 0.14, "grad_norm": 0.9508957159234627, "learning_rate": 1.9345486890702386e-05, "loss": 0.5786, "step": 1752 }, { "epoch": 0.14, "grad_norm": 0.8771419924453374, "learning_rate": 1.9344549840310743e-05, "loss": 0.5926, "step": 1753 }, { "epoch": 0.14, "grad_norm": 1.0763350268212386, "learning_rate": 1.9343612142350085e-05, "loss": 0.5966, "step": 1754 }, { "epoch": 0.14, "grad_norm": 0.9946198344432027, "learning_rate": 1.9342673796885395e-05, "loss": 0.6017, "step": 1755 }, { "epoch": 0.14, "grad_norm": 0.9539611035425803, "learning_rate": 1.93417348039817e-05, "loss": 0.5984, "step": 1756 }, { "epoch": 0.14, "grad_norm": 0.9344252040983774, "learning_rate": 1.934079516370406e-05, "loss": 0.6351, "step": 1757 }, { "epoch": 0.14, "grad_norm": 1.0915301423903436, "learning_rate": 1.933985487611761e-05, "loss": 0.6586, "step": 1758 }, { "epoch": 0.14, "grad_norm": 0.8947065911393781, "learning_rate": 1.93389139412875e-05, "loss": 0.5607, "step": 1759 }, { "epoch": 0.14, "grad_norm": 1.1073120365984666, "learning_rate": 1.9337972359278935e-05, "loss": 0.5979, "step": 1760 }, { "epoch": 0.14, "grad_norm": 0.9693167721107211, "learning_rate": 1.9337030130157166e-05, "loss": 0.628, "step": 1761 }, { "epoch": 0.14, "grad_norm": 0.9625722731315666, "learning_rate": 1.9336087253987495e-05, "loss": 0.5942, "step": 1762 }, { "epoch": 0.14, "grad_norm": 0.9396578385873692, "learning_rate": 1.9335143730835258e-05, "loss": 0.5917, "step": 1763 }, { "epoch": 0.14, "grad_norm": 0.9613382312549996, "learning_rate": 1.933419956076584e-05, "loss": 0.5517, "step": 1764 }, { "epoch": 0.14, "grad_norm": 0.9495156501733618, "learning_rate": 1.933325474384467e-05, "loss": 0.5303, "step": 1765 }, { "epoch": 0.14, "grad_norm": 1.0290084286898358, "learning_rate": 1.9332309280137227e-05, "loss": 0.6439, "step": 1766 }, { "epoch": 0.14, "grad_norm": 0.9716484435289793, "learning_rate": 1.933136316970903e-05, "loss": 0.5967, "step": 1767 }, { "epoch": 0.14, "grad_norm": 0.968535746654822, "learning_rate": 1.933041641262564e-05, "loss": 0.603, "step": 1768 }, { "epoch": 0.14, "grad_norm": 1.045616747126607, "learning_rate": 1.9329469008952668e-05, "loss": 0.6284, "step": 1769 }, { "epoch": 0.14, "grad_norm": 1.069043035466113, "learning_rate": 1.932852095875577e-05, "loss": 0.6672, "step": 1770 }, { "epoch": 0.14, "grad_norm": 0.9707470847011743, "learning_rate": 1.9327572262100642e-05, "loss": 0.5747, "step": 1771 }, { "epoch": 0.14, "grad_norm": 0.8752792859825459, "learning_rate": 1.9326622919053034e-05, "loss": 0.5589, "step": 1772 }, { "epoch": 0.14, "grad_norm": 0.9614197721357719, "learning_rate": 1.9325672929678728e-05, "loss": 0.5549, "step": 1773 }, { "epoch": 0.14, "grad_norm": 1.0074353729006507, "learning_rate": 1.932472229404356e-05, "loss": 0.5593, "step": 1774 }, { "epoch": 0.14, "grad_norm": 0.9120956947478422, "learning_rate": 1.932377101221341e-05, "loss": 0.5694, "step": 1775 }, { "epoch": 0.14, "grad_norm": 0.9156991949273016, "learning_rate": 1.9322819084254197e-05, "loss": 0.5573, "step": 1776 }, { "epoch": 0.14, "grad_norm": 0.9199254380244064, "learning_rate": 1.9321866510231887e-05, "loss": 0.6179, "step": 1777 }, { "epoch": 0.14, "grad_norm": 1.0861265552584947, "learning_rate": 1.93209132902125e-05, "loss": 0.6715, "step": 1778 }, { "epoch": 0.14, "grad_norm": 1.1236405448947937, "learning_rate": 1.9319959424262092e-05, "loss": 0.6493, "step": 1779 }, { "epoch": 0.14, "grad_norm": 1.0467816961232732, "learning_rate": 1.931900491244676e-05, "loss": 0.621, "step": 1780 }, { "epoch": 0.14, "grad_norm": 1.0688593278210219, "learning_rate": 1.9318049754832656e-05, "loss": 0.6196, "step": 1781 }, { "epoch": 0.14, "grad_norm": 0.9995772369930963, "learning_rate": 1.9317093951485963e-05, "loss": 0.5881, "step": 1782 }, { "epoch": 0.14, "grad_norm": 1.0178102088596666, "learning_rate": 1.931613750247293e-05, "loss": 0.6829, "step": 1783 }, { "epoch": 0.14, "grad_norm": 1.0075909234904905, "learning_rate": 1.9315180407859828e-05, "loss": 0.6581, "step": 1784 }, { "epoch": 0.15, "grad_norm": 1.0256750596460658, "learning_rate": 1.9314222667712988e-05, "loss": 0.6293, "step": 1785 }, { "epoch": 0.15, "grad_norm": 1.0177433162472909, "learning_rate": 1.931326428209878e-05, "loss": 0.6093, "step": 1786 }, { "epoch": 0.15, "grad_norm": 1.0862864909273302, "learning_rate": 1.9312305251083613e-05, "loss": 0.5152, "step": 1787 }, { "epoch": 0.15, "grad_norm": 0.9256285006173005, "learning_rate": 1.9311345574733958e-05, "loss": 0.5556, "step": 1788 }, { "epoch": 0.15, "grad_norm": 1.0230687624511863, "learning_rate": 1.9310385253116307e-05, "loss": 0.6125, "step": 1789 }, { "epoch": 0.15, "grad_norm": 1.020000318712854, "learning_rate": 1.930942428629722e-05, "loss": 0.593, "step": 1790 }, { "epoch": 0.15, "grad_norm": 1.060391790298629, "learning_rate": 1.9308462674343288e-05, "loss": 0.6621, "step": 1791 }, { "epoch": 0.15, "grad_norm": 0.96873478565776, "learning_rate": 1.9307500417321154e-05, "loss": 0.6247, "step": 1792 }, { "epoch": 0.15, "grad_norm": 0.9259799452753706, "learning_rate": 1.930653751529749e-05, "loss": 0.6099, "step": 1793 }, { "epoch": 0.15, "grad_norm": 1.1069947124278503, "learning_rate": 1.9305573968339032e-05, "loss": 0.6069, "step": 1794 }, { "epoch": 0.15, "grad_norm": 1.058441133773948, "learning_rate": 1.930460977651255e-05, "loss": 0.6325, "step": 1795 }, { "epoch": 0.15, "grad_norm": 0.977173212643966, "learning_rate": 1.930364493988487e-05, "loss": 0.6179, "step": 1796 }, { "epoch": 0.15, "grad_norm": 1.0266606354391452, "learning_rate": 1.9302679458522844e-05, "loss": 0.5753, "step": 1797 }, { "epoch": 0.15, "grad_norm": 0.9743030871356761, "learning_rate": 1.9301713332493386e-05, "loss": 0.5387, "step": 1798 }, { "epoch": 0.15, "grad_norm": 0.9153132297294244, "learning_rate": 1.930074656186344e-05, "loss": 0.5802, "step": 1799 }, { "epoch": 0.15, "grad_norm": 1.0851112564975114, "learning_rate": 1.929977914670001e-05, "loss": 0.6124, "step": 1800 }, { "epoch": 0.15, "grad_norm": 1.0126714996986839, "learning_rate": 1.9298811087070134e-05, "loss": 0.6734, "step": 1801 }, { "epoch": 0.15, "grad_norm": 1.1123818538124828, "learning_rate": 1.9297842383040898e-05, "loss": 0.7079, "step": 1802 }, { "epoch": 0.15, "grad_norm": 1.0228876548907846, "learning_rate": 1.9296873034679427e-05, "loss": 0.6336, "step": 1803 }, { "epoch": 0.15, "grad_norm": 0.9516639256057758, "learning_rate": 1.9295903042052907e-05, "loss": 0.5531, "step": 1804 }, { "epoch": 0.15, "grad_norm": 0.9704534136451547, "learning_rate": 1.929493240522855e-05, "loss": 0.6174, "step": 1805 }, { "epoch": 0.15, "grad_norm": 0.9801162091417275, "learning_rate": 1.9293961124273623e-05, "loss": 0.6326, "step": 1806 }, { "epoch": 0.15, "grad_norm": 1.0425771118361655, "learning_rate": 1.929298919925543e-05, "loss": 0.6636, "step": 1807 }, { "epoch": 0.15, "grad_norm": 0.9599469190171349, "learning_rate": 1.9292016630241334e-05, "loss": 0.6003, "step": 1808 }, { "epoch": 0.15, "grad_norm": 0.8707841858381885, "learning_rate": 1.9291043417298723e-05, "loss": 0.5516, "step": 1809 }, { "epoch": 0.15, "grad_norm": 1.0020013467161477, "learning_rate": 1.9290069560495042e-05, "loss": 0.5347, "step": 1810 }, { "epoch": 0.15, "grad_norm": 1.095080749599255, "learning_rate": 1.9289095059897787e-05, "loss": 0.6277, "step": 1811 }, { "epoch": 0.15, "grad_norm": 1.090773935898295, "learning_rate": 1.9288119915574485e-05, "loss": 0.6111, "step": 1812 }, { "epoch": 0.15, "grad_norm": 0.978967808007286, "learning_rate": 1.9287144127592704e-05, "loss": 0.5623, "step": 1813 }, { "epoch": 0.15, "grad_norm": 0.9302789832381554, "learning_rate": 1.9286167696020076e-05, "loss": 0.549, "step": 1814 }, { "epoch": 0.15, "grad_norm": 0.974690334183085, "learning_rate": 1.9285190620924267e-05, "loss": 0.6169, "step": 1815 }, { "epoch": 0.15, "grad_norm": 0.9767003754849144, "learning_rate": 1.9284212902372978e-05, "loss": 0.6241, "step": 1816 }, { "epoch": 0.15, "grad_norm": 0.9907255241813573, "learning_rate": 1.928323454043397e-05, "loss": 0.6229, "step": 1817 }, { "epoch": 0.15, "grad_norm": 1.1142070623511469, "learning_rate": 1.9282255535175047e-05, "loss": 0.6103, "step": 1818 }, { "epoch": 0.15, "grad_norm": 1.0248511092891297, "learning_rate": 1.928127588666405e-05, "loss": 0.5623, "step": 1819 }, { "epoch": 0.15, "grad_norm": 0.994991335298417, "learning_rate": 1.9280295594968863e-05, "loss": 0.5865, "step": 1820 }, { "epoch": 0.15, "grad_norm": 0.9812727318761492, "learning_rate": 1.9279314660157423e-05, "loss": 0.6418, "step": 1821 }, { "epoch": 0.15, "grad_norm": 0.9630743577652199, "learning_rate": 1.927833308229771e-05, "loss": 0.5103, "step": 1822 }, { "epoch": 0.15, "grad_norm": 0.9996298075093891, "learning_rate": 1.927735086145774e-05, "loss": 0.6354, "step": 1823 }, { "epoch": 0.15, "grad_norm": 0.9805994014100998, "learning_rate": 1.9276367997705584e-05, "loss": 0.5739, "step": 1824 }, { "epoch": 0.15, "grad_norm": 0.9010497876657586, "learning_rate": 1.927538449110936e-05, "loss": 0.5258, "step": 1825 }, { "epoch": 0.15, "grad_norm": 0.9976777723502999, "learning_rate": 1.9274400341737214e-05, "loss": 0.5761, "step": 1826 }, { "epoch": 0.15, "grad_norm": 1.0130011150060951, "learning_rate": 1.927341554965735e-05, "loss": 0.5522, "step": 1827 }, { "epoch": 0.15, "grad_norm": 1.0349919103598368, "learning_rate": 1.9272430114938018e-05, "loss": 0.5707, "step": 1828 }, { "epoch": 0.15, "grad_norm": 1.0269075489815958, "learning_rate": 1.92714440376475e-05, "loss": 0.6424, "step": 1829 }, { "epoch": 0.15, "grad_norm": 0.9245815228929377, "learning_rate": 1.9270457317854135e-05, "loss": 0.5445, "step": 1830 }, { "epoch": 0.15, "grad_norm": 1.042268266511188, "learning_rate": 1.92694699556263e-05, "loss": 0.5685, "step": 1831 }, { "epoch": 0.15, "grad_norm": 1.0755570703561435, "learning_rate": 1.926848195103242e-05, "loss": 0.7127, "step": 1832 }, { "epoch": 0.15, "grad_norm": 0.8802799775002621, "learning_rate": 1.926749330414096e-05, "loss": 0.5497, "step": 1833 }, { "epoch": 0.15, "grad_norm": 0.9749207683721478, "learning_rate": 1.926650401502044e-05, "loss": 0.6509, "step": 1834 }, { "epoch": 0.15, "grad_norm": 0.9936837660861811, "learning_rate": 1.9265514083739404e-05, "loss": 0.6173, "step": 1835 }, { "epoch": 0.15, "grad_norm": 0.8941460948798987, "learning_rate": 1.9264523510366463e-05, "loss": 0.5846, "step": 1836 }, { "epoch": 0.15, "grad_norm": 0.9556447858034413, "learning_rate": 1.9263532294970263e-05, "loss": 0.5685, "step": 1837 }, { "epoch": 0.15, "grad_norm": 0.9581469049694387, "learning_rate": 1.9262540437619488e-05, "loss": 0.608, "step": 1838 }, { "epoch": 0.15, "grad_norm": 1.0159830950902855, "learning_rate": 1.926154793838288e-05, "loss": 0.6803, "step": 1839 }, { "epoch": 0.15, "grad_norm": 1.006614746978762, "learning_rate": 1.926055479732921e-05, "loss": 0.5681, "step": 1840 }, { "epoch": 0.15, "grad_norm": 0.9604027364713731, "learning_rate": 1.925956101452731e-05, "loss": 0.606, "step": 1841 }, { "epoch": 0.15, "grad_norm": 1.0826894078754479, "learning_rate": 1.9258566590046047e-05, "loss": 0.5971, "step": 1842 }, { "epoch": 0.15, "grad_norm": 1.0223462945685502, "learning_rate": 1.9257571523954328e-05, "loss": 0.5968, "step": 1843 }, { "epoch": 0.15, "grad_norm": 0.9614839255093992, "learning_rate": 1.9256575816321114e-05, "loss": 0.5565, "step": 1844 }, { "epoch": 0.15, "grad_norm": 0.924425557439055, "learning_rate": 1.925557946721541e-05, "loss": 0.5685, "step": 1845 }, { "epoch": 0.15, "grad_norm": 1.056866996923417, "learning_rate": 1.9254582476706254e-05, "loss": 0.6002, "step": 1846 }, { "epoch": 0.15, "grad_norm": 0.9334277386485584, "learning_rate": 1.9253584844862745e-05, "loss": 0.6184, "step": 1847 }, { "epoch": 0.15, "grad_norm": 0.8767870072335794, "learning_rate": 1.9252586571754013e-05, "loss": 0.5253, "step": 1848 }, { "epoch": 0.15, "grad_norm": 0.9113482795540835, "learning_rate": 1.925158765744924e-05, "loss": 0.5904, "step": 1849 }, { "epoch": 0.15, "grad_norm": 0.964622683320886, "learning_rate": 1.9250588102017643e-05, "loss": 0.6002, "step": 1850 }, { "epoch": 0.15, "grad_norm": 0.9209021189317579, "learning_rate": 1.92495879055285e-05, "loss": 0.5529, "step": 1851 }, { "epoch": 0.15, "grad_norm": 1.0517115998700501, "learning_rate": 1.924858706805112e-05, "loss": 0.6084, "step": 1852 }, { "epoch": 0.15, "grad_norm": 1.0121355327781139, "learning_rate": 1.924758558965486e-05, "loss": 0.5884, "step": 1853 }, { "epoch": 0.15, "grad_norm": 1.035522236552528, "learning_rate": 1.924658347040912e-05, "loss": 0.6331, "step": 1854 }, { "epoch": 0.15, "grad_norm": 0.9098667167283655, "learning_rate": 1.9245580710383344e-05, "loss": 0.5066, "step": 1855 }, { "epoch": 0.15, "grad_norm": 0.9882838782668055, "learning_rate": 1.924457730964703e-05, "loss": 0.6573, "step": 1856 }, { "epoch": 0.15, "grad_norm": 0.8170470779204368, "learning_rate": 1.9243573268269706e-05, "loss": 0.5192, "step": 1857 }, { "epoch": 0.15, "grad_norm": 1.1153812484779042, "learning_rate": 1.9242568586320956e-05, "loss": 0.6242, "step": 1858 }, { "epoch": 0.15, "grad_norm": 1.034925508619141, "learning_rate": 1.92415632638704e-05, "loss": 0.6048, "step": 1859 }, { "epoch": 0.15, "grad_norm": 0.943648489456906, "learning_rate": 1.9240557300987705e-05, "loss": 0.5746, "step": 1860 }, { "epoch": 0.15, "grad_norm": 1.0863408480572274, "learning_rate": 1.923955069774259e-05, "loss": 0.7041, "step": 1861 }, { "epoch": 0.15, "grad_norm": 1.0642094533973365, "learning_rate": 1.9238543454204802e-05, "loss": 0.6411, "step": 1862 }, { "epoch": 0.15, "grad_norm": 1.0069358328656293, "learning_rate": 1.923753557044415e-05, "loss": 0.6549, "step": 1863 }, { "epoch": 0.15, "grad_norm": 0.9637795959365351, "learning_rate": 1.9236527046530476e-05, "loss": 0.6205, "step": 1864 }, { "epoch": 0.15, "grad_norm": 0.9083232656443532, "learning_rate": 1.923551788253367e-05, "loss": 0.5804, "step": 1865 }, { "epoch": 0.15, "grad_norm": 1.0283659794931066, "learning_rate": 1.923450807852367e-05, "loss": 0.581, "step": 1866 }, { "epoch": 0.15, "grad_norm": 1.0104856417456372, "learning_rate": 1.9233497634570446e-05, "loss": 0.6503, "step": 1867 }, { "epoch": 0.15, "grad_norm": 1.105383745788613, "learning_rate": 1.923248655074403e-05, "loss": 0.7004, "step": 1868 }, { "epoch": 0.15, "grad_norm": 1.0774461890083162, "learning_rate": 1.923147482711448e-05, "loss": 0.6321, "step": 1869 }, { "epoch": 0.15, "grad_norm": 1.0318851189457057, "learning_rate": 1.923046246375192e-05, "loss": 0.6791, "step": 1870 }, { "epoch": 0.15, "grad_norm": 0.9327788418837317, "learning_rate": 1.9229449460726495e-05, "loss": 0.5738, "step": 1871 }, { "epoch": 0.15, "grad_norm": 0.9803706876908969, "learning_rate": 1.9228435818108408e-05, "loss": 0.6639, "step": 1872 }, { "epoch": 0.15, "grad_norm": 0.9657285086391221, "learning_rate": 1.9227421535967906e-05, "loss": 0.6011, "step": 1873 }, { "epoch": 0.15, "grad_norm": 0.9644259081561086, "learning_rate": 1.9226406614375276e-05, "loss": 0.4687, "step": 1874 }, { "epoch": 0.15, "grad_norm": 1.0937221730243076, "learning_rate": 1.922539105340085e-05, "loss": 0.5926, "step": 1875 }, { "epoch": 0.15, "grad_norm": 1.0355689113799338, "learning_rate": 1.922437485311501e-05, "loss": 0.6252, "step": 1876 }, { "epoch": 0.15, "grad_norm": 0.9493040784838319, "learning_rate": 1.9223358013588172e-05, "loss": 0.5499, "step": 1877 }, { "epoch": 0.15, "grad_norm": 0.9474996681844716, "learning_rate": 1.9222340534890803e-05, "loss": 0.5851, "step": 1878 }, { "epoch": 0.15, "grad_norm": 1.0338117204194743, "learning_rate": 1.922132241709342e-05, "loss": 0.6586, "step": 1879 }, { "epoch": 0.15, "grad_norm": 0.9455740270706581, "learning_rate": 1.9220303660266568e-05, "loss": 0.5658, "step": 1880 }, { "epoch": 0.15, "grad_norm": 0.9766084713327681, "learning_rate": 1.9219284264480854e-05, "loss": 0.5583, "step": 1881 }, { "epoch": 0.15, "grad_norm": 1.1407050220765134, "learning_rate": 1.9218264229806917e-05, "loss": 0.6909, "step": 1882 }, { "epoch": 0.15, "grad_norm": 1.0189913748914254, "learning_rate": 1.9217243556315445e-05, "loss": 0.5941, "step": 1883 }, { "epoch": 0.15, "grad_norm": 1.005943329132577, "learning_rate": 1.9216222244077173e-05, "loss": 0.6116, "step": 1884 }, { "epoch": 0.15, "grad_norm": 1.0085992680953428, "learning_rate": 1.921520029316287e-05, "loss": 0.5959, "step": 1885 }, { "epoch": 0.15, "grad_norm": 0.9530355673098095, "learning_rate": 1.9214177703643365e-05, "loss": 0.5689, "step": 1886 }, { "epoch": 0.15, "grad_norm": 0.9927108368990538, "learning_rate": 1.9213154475589513e-05, "loss": 0.4917, "step": 1887 }, { "epoch": 0.15, "grad_norm": 1.145381087906433, "learning_rate": 1.921213060907223e-05, "loss": 0.6237, "step": 1888 }, { "epoch": 0.15, "grad_norm": 0.9237290032126951, "learning_rate": 1.921110610416247e-05, "loss": 0.5401, "step": 1889 }, { "epoch": 0.15, "grad_norm": 1.1816105930140846, "learning_rate": 1.9210080960931224e-05, "loss": 0.6396, "step": 1890 }, { "epoch": 0.15, "grad_norm": 1.045015765819876, "learning_rate": 1.920905517944954e-05, "loss": 0.5987, "step": 1891 }, { "epoch": 0.15, "grad_norm": 1.0438281050104827, "learning_rate": 1.9208028759788496e-05, "loss": 0.6267, "step": 1892 }, { "epoch": 0.15, "grad_norm": 1.1258432834265213, "learning_rate": 1.920700170201923e-05, "loss": 0.6134, "step": 1893 }, { "epoch": 0.15, "grad_norm": 1.0806923013990861, "learning_rate": 1.920597400621291e-05, "loss": 0.7003, "step": 1894 }, { "epoch": 0.15, "grad_norm": 1.0215362002067059, "learning_rate": 1.9204945672440757e-05, "loss": 0.5316, "step": 1895 }, { "epoch": 0.15, "grad_norm": 1.1174940694287256, "learning_rate": 1.9203916700774035e-05, "loss": 0.688, "step": 1896 }, { "epoch": 0.15, "grad_norm": 1.0585002502030911, "learning_rate": 1.920288709128405e-05, "loss": 0.5828, "step": 1897 }, { "epoch": 0.15, "grad_norm": 1.10077227527449, "learning_rate": 1.920185684404215e-05, "loss": 0.6515, "step": 1898 }, { "epoch": 0.15, "grad_norm": 1.047385362560591, "learning_rate": 1.9200825959119736e-05, "loss": 0.6398, "step": 1899 }, { "epoch": 0.15, "grad_norm": 1.0580131941502218, "learning_rate": 1.9199794436588244e-05, "loss": 0.6211, "step": 1900 }, { "epoch": 0.15, "grad_norm": 1.0182865825702039, "learning_rate": 1.9198762276519156e-05, "loss": 0.6557, "step": 1901 }, { "epoch": 0.15, "grad_norm": 0.9618402593349183, "learning_rate": 1.9197729478984003e-05, "loss": 0.5659, "step": 1902 }, { "epoch": 0.15, "grad_norm": 0.9881111937134388, "learning_rate": 1.9196696044054354e-05, "loss": 0.6162, "step": 1903 }, { "epoch": 0.15, "grad_norm": 0.9798061834622409, "learning_rate": 1.9195661971801825e-05, "loss": 0.5748, "step": 1904 }, { "epoch": 0.15, "grad_norm": 1.0083357679462275, "learning_rate": 1.9194627262298082e-05, "loss": 0.5276, "step": 1905 }, { "epoch": 0.15, "grad_norm": 0.979514797051948, "learning_rate": 1.9193591915614824e-05, "loss": 0.5788, "step": 1906 }, { "epoch": 0.15, "grad_norm": 1.086160667609808, "learning_rate": 1.9192555931823798e-05, "loss": 0.6366, "step": 1907 }, { "epoch": 0.16, "grad_norm": 0.9793396668250293, "learning_rate": 1.9191519310996806e-05, "loss": 0.5931, "step": 1908 }, { "epoch": 0.16, "grad_norm": 0.9361137077940025, "learning_rate": 1.9190482053205673e-05, "loss": 0.5084, "step": 1909 }, { "epoch": 0.16, "grad_norm": 0.9618308068511757, "learning_rate": 1.9189444158522287e-05, "loss": 0.6516, "step": 1910 }, { "epoch": 0.16, "grad_norm": 0.9732343315606669, "learning_rate": 1.9188405627018573e-05, "loss": 0.5654, "step": 1911 }, { "epoch": 0.16, "grad_norm": 1.132773160557045, "learning_rate": 1.9187366458766497e-05, "loss": 0.6753, "step": 1912 }, { "epoch": 0.16, "grad_norm": 1.0402651050839569, "learning_rate": 1.9186326653838075e-05, "loss": 0.6445, "step": 1913 }, { "epoch": 0.16, "grad_norm": 0.996823757390506, "learning_rate": 1.918528621230537e-05, "loss": 0.6372, "step": 1914 }, { "epoch": 0.16, "grad_norm": 1.073328738789437, "learning_rate": 1.918424513424047e-05, "loss": 0.6432, "step": 1915 }, { "epoch": 0.16, "grad_norm": 0.9053958985827631, "learning_rate": 1.918320341971553e-05, "loss": 0.5817, "step": 1916 }, { "epoch": 0.16, "grad_norm": 0.9971311966062738, "learning_rate": 1.9182161068802742e-05, "loss": 0.548, "step": 1917 }, { "epoch": 0.16, "grad_norm": 0.9791716632717283, "learning_rate": 1.9181118081574336e-05, "loss": 0.5843, "step": 1918 }, { "epoch": 0.16, "grad_norm": 0.9691858920466895, "learning_rate": 1.918007445810259e-05, "loss": 0.5861, "step": 1919 }, { "epoch": 0.16, "grad_norm": 1.0086233222980516, "learning_rate": 1.9179030198459822e-05, "loss": 0.6408, "step": 1920 }, { "epoch": 0.16, "grad_norm": 0.9223756495219673, "learning_rate": 1.917798530271841e-05, "loss": 0.5596, "step": 1921 }, { "epoch": 0.16, "grad_norm": 1.0383476005597099, "learning_rate": 1.9176939770950753e-05, "loss": 0.5413, "step": 1922 }, { "epoch": 0.16, "grad_norm": 0.9672186642469593, "learning_rate": 1.917589360322931e-05, "loss": 0.5994, "step": 1923 }, { "epoch": 0.16, "grad_norm": 1.0489479306416298, "learning_rate": 1.9174846799626584e-05, "loss": 0.5887, "step": 1924 }, { "epoch": 0.16, "grad_norm": 1.0890574446451857, "learning_rate": 1.9173799360215106e-05, "loss": 0.5113, "step": 1925 }, { "epoch": 0.16, "grad_norm": 0.8828386718344657, "learning_rate": 1.917275128506747e-05, "loss": 0.5213, "step": 1926 }, { "epoch": 0.16, "grad_norm": 1.050030722223755, "learning_rate": 1.9171702574256314e-05, "loss": 0.6337, "step": 1927 }, { "epoch": 0.16, "grad_norm": 0.9529536751310449, "learning_rate": 1.91706532278543e-05, "loss": 0.5547, "step": 1928 }, { "epoch": 0.16, "grad_norm": 1.1090583641257683, "learning_rate": 1.916960324593415e-05, "loss": 0.697, "step": 1929 }, { "epoch": 0.16, "grad_norm": 0.8848474899547881, "learning_rate": 1.9168552628568632e-05, "loss": 0.5998, "step": 1930 }, { "epoch": 0.16, "grad_norm": 0.9502043834849424, "learning_rate": 1.9167501375830543e-05, "loss": 0.5589, "step": 1931 }, { "epoch": 0.16, "grad_norm": 1.0050861666142, "learning_rate": 1.9166449487792746e-05, "loss": 0.584, "step": 1932 }, { "epoch": 0.16, "grad_norm": 1.0436702439254533, "learning_rate": 1.916539696452813e-05, "loss": 0.634, "step": 1933 }, { "epoch": 0.16, "grad_norm": 0.9459373406054806, "learning_rate": 1.916434380610963e-05, "loss": 0.5895, "step": 1934 }, { "epoch": 0.16, "grad_norm": 1.054908650188775, "learning_rate": 1.916329001261024e-05, "loss": 0.568, "step": 1935 }, { "epoch": 0.16, "grad_norm": 0.9330365256475838, "learning_rate": 1.9162235584102973e-05, "loss": 0.5557, "step": 1936 }, { "epoch": 0.16, "grad_norm": 1.0055011485384573, "learning_rate": 1.916118052066091e-05, "loss": 0.6304, "step": 1937 }, { "epoch": 0.16, "grad_norm": 1.02211232342174, "learning_rate": 1.9160124822357162e-05, "loss": 0.6059, "step": 1938 }, { "epoch": 0.16, "grad_norm": 0.9944380434012641, "learning_rate": 1.915906848926489e-05, "loss": 0.6317, "step": 1939 }, { "epoch": 0.16, "grad_norm": 0.9493365884245534, "learning_rate": 1.9158011521457296e-05, "loss": 0.6037, "step": 1940 }, { "epoch": 0.16, "grad_norm": 1.0219440943167644, "learning_rate": 1.9156953919007625e-05, "loss": 0.6231, "step": 1941 }, { "epoch": 0.16, "grad_norm": 1.135401366313444, "learning_rate": 1.915589568198917e-05, "loss": 0.5845, "step": 1942 }, { "epoch": 0.16, "grad_norm": 0.9750445054134775, "learning_rate": 1.9154836810475266e-05, "loss": 0.5453, "step": 1943 }, { "epoch": 0.16, "grad_norm": 0.9264585897691986, "learning_rate": 1.9153777304539295e-05, "loss": 0.5591, "step": 1944 }, { "epoch": 0.16, "grad_norm": 1.0085873685393283, "learning_rate": 1.9152717164254668e-05, "loss": 0.664, "step": 1945 }, { "epoch": 0.16, "grad_norm": 1.0278958093912496, "learning_rate": 1.915165638969487e-05, "loss": 0.5891, "step": 1946 }, { "epoch": 0.16, "grad_norm": 1.064403603956213, "learning_rate": 1.9150594980933392e-05, "loss": 0.5634, "step": 1947 }, { "epoch": 0.16, "grad_norm": 0.9931787664117413, "learning_rate": 1.9149532938043803e-05, "loss": 0.6159, "step": 1948 }, { "epoch": 0.16, "grad_norm": 1.0329314277757113, "learning_rate": 1.9148470261099698e-05, "loss": 0.6072, "step": 1949 }, { "epoch": 0.16, "grad_norm": 1.1039277621455925, "learning_rate": 1.9147406950174715e-05, "loss": 0.5811, "step": 1950 }, { "epoch": 0.16, "grad_norm": 1.125424655598283, "learning_rate": 1.9146343005342546e-05, "loss": 0.6605, "step": 1951 }, { "epoch": 0.16, "grad_norm": 1.0717462179634367, "learning_rate": 1.914527842667692e-05, "loss": 0.6044, "step": 1952 }, { "epoch": 0.16, "grad_norm": 1.0645384893136742, "learning_rate": 1.914421321425161e-05, "loss": 0.6595, "step": 1953 }, { "epoch": 0.16, "grad_norm": 0.9880743315660508, "learning_rate": 1.914314736814044e-05, "loss": 0.516, "step": 1954 }, { "epoch": 0.16, "grad_norm": 0.990695340025184, "learning_rate": 1.914208088841726e-05, "loss": 0.5579, "step": 1955 }, { "epoch": 0.16, "grad_norm": 1.0861237703124709, "learning_rate": 1.914101377515599e-05, "loss": 0.5526, "step": 1956 }, { "epoch": 0.16, "grad_norm": 0.9343367407195712, "learning_rate": 1.9139946028430568e-05, "loss": 0.5441, "step": 1957 }, { "epoch": 0.16, "grad_norm": 1.0557213241662768, "learning_rate": 1.9138877648314994e-05, "loss": 0.6169, "step": 1958 }, { "epoch": 0.16, "grad_norm": 1.1145568689622156, "learning_rate": 1.9137808634883307e-05, "loss": 0.5841, "step": 1959 }, { "epoch": 0.16, "grad_norm": 0.9547072242413849, "learning_rate": 1.9136738988209585e-05, "loss": 0.6044, "step": 1960 }, { "epoch": 0.16, "grad_norm": 1.0031154617029014, "learning_rate": 1.913566870836796e-05, "loss": 0.6126, "step": 1961 }, { "epoch": 0.16, "grad_norm": 0.9713050505949087, "learning_rate": 1.913459779543259e-05, "loss": 0.6042, "step": 1962 }, { "epoch": 0.16, "grad_norm": 1.0593675597978944, "learning_rate": 1.91335262494777e-05, "loss": 0.579, "step": 1963 }, { "epoch": 0.16, "grad_norm": 1.1283732724343774, "learning_rate": 1.913245407057754e-05, "loss": 0.6801, "step": 1964 }, { "epoch": 0.16, "grad_norm": 0.8975735142461613, "learning_rate": 1.9131381258806417e-05, "loss": 0.5387, "step": 1965 }, { "epoch": 0.16, "grad_norm": 0.9882741415747127, "learning_rate": 1.9130307814238672e-05, "loss": 0.5941, "step": 1966 }, { "epoch": 0.16, "grad_norm": 0.9637366097998488, "learning_rate": 1.912923373694869e-05, "loss": 0.6374, "step": 1967 }, { "epoch": 0.16, "grad_norm": 0.9766823600750104, "learning_rate": 1.912815902701091e-05, "loss": 0.5561, "step": 1968 }, { "epoch": 0.16, "grad_norm": 0.9260778389900388, "learning_rate": 1.9127083684499805e-05, "loss": 0.5718, "step": 1969 }, { "epoch": 0.16, "grad_norm": 1.091377532249139, "learning_rate": 1.9126007709489896e-05, "loss": 0.5721, "step": 1970 }, { "epoch": 0.16, "grad_norm": 1.0418857534347066, "learning_rate": 1.912493110205575e-05, "loss": 0.6359, "step": 1971 }, { "epoch": 0.16, "grad_norm": 0.9657653994400671, "learning_rate": 1.912385386227197e-05, "loss": 0.5676, "step": 1972 }, { "epoch": 0.16, "grad_norm": 0.9697092686869903, "learning_rate": 1.9122775990213212e-05, "loss": 0.6102, "step": 1973 }, { "epoch": 0.16, "grad_norm": 1.0133425995852425, "learning_rate": 1.9121697485954168e-05, "loss": 0.6055, "step": 1974 }, { "epoch": 0.16, "grad_norm": 0.9679797921794574, "learning_rate": 1.912061834956958e-05, "loss": 0.581, "step": 1975 }, { "epoch": 0.16, "grad_norm": 0.9154155156284548, "learning_rate": 1.911953858113423e-05, "loss": 0.546, "step": 1976 }, { "epoch": 0.16, "grad_norm": 0.933799306983249, "learning_rate": 1.9118458180722945e-05, "loss": 0.6079, "step": 1977 }, { "epoch": 0.16, "grad_norm": 1.053758507399398, "learning_rate": 1.91173771484106e-05, "loss": 0.5769, "step": 1978 }, { "epoch": 0.16, "grad_norm": 1.138191845494953, "learning_rate": 1.9116295484272102e-05, "loss": 0.6154, "step": 1979 }, { "epoch": 0.16, "grad_norm": 0.9134720259284926, "learning_rate": 1.9115213188382413e-05, "loss": 0.5892, "step": 1980 }, { "epoch": 0.16, "grad_norm": 1.059907173398205, "learning_rate": 1.9114130260816534e-05, "loss": 0.6303, "step": 1981 }, { "epoch": 0.16, "grad_norm": 0.9855166008746266, "learning_rate": 1.9113046701649517e-05, "loss": 0.5789, "step": 1982 }, { "epoch": 0.16, "grad_norm": 1.0597633161782598, "learning_rate": 1.9111962510956442e-05, "loss": 0.5726, "step": 1983 }, { "epoch": 0.16, "grad_norm": 0.8120227538838763, "learning_rate": 1.9110877688812452e-05, "loss": 0.5493, "step": 1984 }, { "epoch": 0.16, "grad_norm": 1.022270679868322, "learning_rate": 1.9109792235292715e-05, "loss": 0.6067, "step": 1985 }, { "epoch": 0.16, "grad_norm": 1.0319294759236322, "learning_rate": 1.9108706150472457e-05, "loss": 0.5586, "step": 1986 }, { "epoch": 0.16, "grad_norm": 0.9043951748653178, "learning_rate": 1.9107619434426944e-05, "loss": 0.6206, "step": 1987 }, { "epoch": 0.16, "grad_norm": 0.8981673775030506, "learning_rate": 1.9106532087231483e-05, "loss": 0.5786, "step": 1988 }, { "epoch": 0.16, "grad_norm": 0.9107278405121401, "learning_rate": 1.9105444108961423e-05, "loss": 0.6036, "step": 1989 }, { "epoch": 0.16, "grad_norm": 1.044523349649377, "learning_rate": 1.9104355499692166e-05, "loss": 0.633, "step": 1990 }, { "epoch": 0.16, "grad_norm": 1.0117957050741717, "learning_rate": 1.9103266259499146e-05, "loss": 0.5685, "step": 1991 }, { "epoch": 0.16, "grad_norm": 0.965600636959254, "learning_rate": 1.910217638845785e-05, "loss": 0.6139, "step": 1992 }, { "epoch": 0.16, "grad_norm": 1.0093507104367392, "learning_rate": 1.9101085886643804e-05, "loss": 0.6291, "step": 1993 }, { "epoch": 0.16, "grad_norm": 1.0634999781981085, "learning_rate": 1.909999475413258e-05, "loss": 0.6767, "step": 1994 }, { "epoch": 0.16, "grad_norm": 0.9359622179269197, "learning_rate": 1.909890299099979e-05, "loss": 0.5753, "step": 1995 }, { "epoch": 0.16, "grad_norm": 0.9781846611171376, "learning_rate": 1.9097810597321095e-05, "loss": 0.5734, "step": 1996 }, { "epoch": 0.16, "grad_norm": 0.9658528853661609, "learning_rate": 1.9096717573172192e-05, "loss": 0.6279, "step": 1997 }, { "epoch": 0.16, "grad_norm": 1.0353063421004602, "learning_rate": 1.909562391862883e-05, "loss": 0.6234, "step": 1998 }, { "epoch": 0.16, "grad_norm": 0.8673949782339934, "learning_rate": 1.90945296337668e-05, "loss": 0.5474, "step": 1999 }, { "epoch": 0.16, "grad_norm": 1.0514599651032068, "learning_rate": 1.909343471866193e-05, "loss": 0.6215, "step": 2000 }, { "epoch": 0.16, "grad_norm": 1.0759403952207536, "learning_rate": 1.9092339173390108e-05, "loss": 0.6361, "step": 2001 }, { "epoch": 0.16, "grad_norm": 1.1525565183994508, "learning_rate": 1.909124299802724e-05, "loss": 0.6463, "step": 2002 }, { "epoch": 0.16, "grad_norm": 1.0991145737955368, "learning_rate": 1.9090146192649293e-05, "loss": 0.6291, "step": 2003 }, { "epoch": 0.16, "grad_norm": 0.9114300270012013, "learning_rate": 1.9089048757332285e-05, "loss": 0.5504, "step": 2004 }, { "epoch": 0.16, "grad_norm": 1.1271425098911323, "learning_rate": 1.908795069215226e-05, "loss": 0.6001, "step": 2005 }, { "epoch": 0.16, "grad_norm": 0.9186969798037613, "learning_rate": 1.9086851997185307e-05, "loss": 0.5815, "step": 2006 }, { "epoch": 0.16, "grad_norm": 0.9598657717472785, "learning_rate": 1.908575267250757e-05, "loss": 0.6459, "step": 2007 }, { "epoch": 0.16, "grad_norm": 1.0136639599893646, "learning_rate": 1.9084652718195237e-05, "loss": 0.5951, "step": 2008 }, { "epoch": 0.16, "grad_norm": 0.9345431131491013, "learning_rate": 1.908355213432453e-05, "loss": 0.5771, "step": 2009 }, { "epoch": 0.16, "grad_norm": 0.8956818361321761, "learning_rate": 1.9082450920971712e-05, "loss": 0.593, "step": 2010 }, { "epoch": 0.16, "grad_norm": 0.9994855629040246, "learning_rate": 1.9081349078213105e-05, "loss": 0.6248, "step": 2011 }, { "epoch": 0.16, "grad_norm": 1.2337518096291753, "learning_rate": 1.908024660612506e-05, "loss": 0.5699, "step": 2012 }, { "epoch": 0.16, "grad_norm": 0.938035174519471, "learning_rate": 1.9079143504783982e-05, "loss": 0.6318, "step": 2013 }, { "epoch": 0.16, "grad_norm": 0.9892323617279899, "learning_rate": 1.9078039774266308e-05, "loss": 0.5445, "step": 2014 }, { "epoch": 0.16, "grad_norm": 0.922117987267186, "learning_rate": 1.9076935414648533e-05, "loss": 0.5127, "step": 2015 }, { "epoch": 0.16, "grad_norm": 0.9604587542539131, "learning_rate": 1.9075830426007184e-05, "loss": 0.5714, "step": 2016 }, { "epoch": 0.16, "grad_norm": 0.9977282997590355, "learning_rate": 1.9074724808418837e-05, "loss": 0.537, "step": 2017 }, { "epoch": 0.16, "grad_norm": 1.0716207428072122, "learning_rate": 1.907361856196011e-05, "loss": 0.6096, "step": 2018 }, { "epoch": 0.16, "grad_norm": 1.0384083043576222, "learning_rate": 1.9072511686707663e-05, "loss": 0.5372, "step": 2019 }, { "epoch": 0.16, "grad_norm": 0.9594529053049637, "learning_rate": 1.9071404182738206e-05, "loss": 0.5936, "step": 2020 }, { "epoch": 0.16, "grad_norm": 0.9420925394401806, "learning_rate": 1.9070296050128486e-05, "loss": 0.532, "step": 2021 }, { "epoch": 0.16, "grad_norm": 1.0147759141041532, "learning_rate": 1.9069187288955296e-05, "loss": 0.5981, "step": 2022 }, { "epoch": 0.16, "grad_norm": 0.9816266033078141, "learning_rate": 1.9068077899295468e-05, "loss": 0.6499, "step": 2023 }, { "epoch": 0.16, "grad_norm": 0.9879841579914928, "learning_rate": 1.9066967881225887e-05, "loss": 0.5568, "step": 2024 }, { "epoch": 0.16, "grad_norm": 1.058523530099737, "learning_rate": 1.906585723482347e-05, "loss": 0.6412, "step": 2025 }, { "epoch": 0.16, "grad_norm": 0.9878797709934289, "learning_rate": 1.9064745960165196e-05, "loss": 0.6247, "step": 2026 }, { "epoch": 0.16, "grad_norm": 0.8936292649973512, "learning_rate": 1.906363405732806e-05, "loss": 0.5601, "step": 2027 }, { "epoch": 0.16, "grad_norm": 1.011701229053685, "learning_rate": 1.9062521526389126e-05, "loss": 0.5808, "step": 2028 }, { "epoch": 0.16, "grad_norm": 1.1187731519116415, "learning_rate": 1.906140836742549e-05, "loss": 0.6699, "step": 2029 }, { "epoch": 0.16, "grad_norm": 1.0718544915843096, "learning_rate": 1.9060294580514293e-05, "loss": 0.6348, "step": 2030 }, { "epoch": 0.17, "grad_norm": 0.9462997682760822, "learning_rate": 1.905918016573271e-05, "loss": 0.6116, "step": 2031 }, { "epoch": 0.17, "grad_norm": 0.9663357125634172, "learning_rate": 1.9058065123157985e-05, "loss": 0.6202, "step": 2032 }, { "epoch": 0.17, "grad_norm": 0.9246530545030645, "learning_rate": 1.905694945286738e-05, "loss": 0.5894, "step": 2033 }, { "epoch": 0.17, "grad_norm": 0.9493007285658821, "learning_rate": 1.9055833154938208e-05, "loss": 0.5899, "step": 2034 }, { "epoch": 0.17, "grad_norm": 0.9030284864206434, "learning_rate": 1.9054716229447835e-05, "loss": 0.5221, "step": 2035 }, { "epoch": 0.17, "grad_norm": 0.9601465057030015, "learning_rate": 1.9053598676473656e-05, "loss": 0.6482, "step": 2036 }, { "epoch": 0.17, "grad_norm": 1.0732783069225416, "learning_rate": 1.905248049609312e-05, "loss": 0.6566, "step": 2037 }, { "epoch": 0.17, "grad_norm": 1.1640059200373662, "learning_rate": 1.9051361688383715e-05, "loss": 0.6609, "step": 2038 }, { "epoch": 0.17, "grad_norm": 0.922087630723464, "learning_rate": 1.9050242253422975e-05, "loss": 0.5481, "step": 2039 }, { "epoch": 0.17, "grad_norm": 1.0115892884144417, "learning_rate": 1.9049122191288473e-05, "loss": 0.5152, "step": 2040 }, { "epoch": 0.17, "grad_norm": 0.9810015503795328, "learning_rate": 1.9048001502057828e-05, "loss": 0.5688, "step": 2041 }, { "epoch": 0.17, "grad_norm": 0.9424808502813058, "learning_rate": 1.9046880185808706e-05, "loss": 0.629, "step": 2042 }, { "epoch": 0.17, "grad_norm": 1.0318164669425995, "learning_rate": 1.9045758242618813e-05, "loss": 0.6051, "step": 2043 }, { "epoch": 0.17, "grad_norm": 1.0273118739479803, "learning_rate": 1.9044635672565898e-05, "loss": 0.5613, "step": 2044 }, { "epoch": 0.17, "grad_norm": 0.8830868072185081, "learning_rate": 1.904351247572775e-05, "loss": 0.5698, "step": 2045 }, { "epoch": 0.17, "grad_norm": 1.1183889457477896, "learning_rate": 1.904238865218221e-05, "loss": 0.4665, "step": 2046 }, { "epoch": 0.17, "grad_norm": 1.0225856305965528, "learning_rate": 1.9041264202007158e-05, "loss": 0.5748, "step": 2047 }, { "epoch": 0.17, "grad_norm": 1.0694762111194591, "learning_rate": 1.9040139125280517e-05, "loss": 0.5748, "step": 2048 }, { "epoch": 0.17, "grad_norm": 1.0199752555319277, "learning_rate": 1.9039013422080255e-05, "loss": 0.6081, "step": 2049 }, { "epoch": 0.17, "grad_norm": 0.9842016088499462, "learning_rate": 1.9037887092484377e-05, "loss": 0.5637, "step": 2050 }, { "epoch": 0.17, "grad_norm": 1.023273970692155, "learning_rate": 1.903676013657094e-05, "loss": 0.6317, "step": 2051 }, { "epoch": 0.17, "grad_norm": 1.030403375603567, "learning_rate": 1.9035632554418045e-05, "loss": 0.6595, "step": 2052 }, { "epoch": 0.17, "grad_norm": 0.9432884806250941, "learning_rate": 1.9034504346103825e-05, "loss": 0.6023, "step": 2053 }, { "epoch": 0.17, "grad_norm": 1.0073480752386745, "learning_rate": 1.9033375511706466e-05, "loss": 0.6112, "step": 2054 }, { "epoch": 0.17, "grad_norm": 1.068931655724002, "learning_rate": 1.90322460513042e-05, "loss": 0.6017, "step": 2055 }, { "epoch": 0.17, "grad_norm": 1.0386163177149474, "learning_rate": 1.9031115964975295e-05, "loss": 0.6221, "step": 2056 }, { "epoch": 0.17, "grad_norm": 1.0051235548300301, "learning_rate": 1.9029985252798062e-05, "loss": 0.6493, "step": 2057 }, { "epoch": 0.17, "grad_norm": 1.0869761990452371, "learning_rate": 1.902885391485086e-05, "loss": 0.5659, "step": 2058 }, { "epoch": 0.17, "grad_norm": 0.9535848607413672, "learning_rate": 1.9027721951212092e-05, "loss": 0.6128, "step": 2059 }, { "epoch": 0.17, "grad_norm": 1.0412077321045292, "learning_rate": 1.90265893619602e-05, "loss": 0.6478, "step": 2060 }, { "epoch": 0.17, "grad_norm": 1.0429919366838214, "learning_rate": 1.9025456147173668e-05, "loss": 0.6557, "step": 2061 }, { "epoch": 0.17, "grad_norm": 0.9722372011141724, "learning_rate": 1.9024322306931035e-05, "loss": 0.585, "step": 2062 }, { "epoch": 0.17, "grad_norm": 1.026797061134865, "learning_rate": 1.902318784131087e-05, "loss": 0.661, "step": 2063 }, { "epoch": 0.17, "grad_norm": 0.9701379700065667, "learning_rate": 1.902205275039179e-05, "loss": 0.6493, "step": 2064 }, { "epoch": 0.17, "grad_norm": 0.9532383610104006, "learning_rate": 1.902091703425246e-05, "loss": 0.5722, "step": 2065 }, { "epoch": 0.17, "grad_norm": 1.0321123189440147, "learning_rate": 1.901978069297158e-05, "loss": 0.5949, "step": 2066 }, { "epoch": 0.17, "grad_norm": 0.9594333455983534, "learning_rate": 1.9018643726627894e-05, "loss": 0.4708, "step": 2067 }, { "epoch": 0.17, "grad_norm": 0.9797937635347074, "learning_rate": 1.90175061353002e-05, "loss": 0.4845, "step": 2068 }, { "epoch": 0.17, "grad_norm": 1.029429760101533, "learning_rate": 1.9016367919067332e-05, "loss": 0.6088, "step": 2069 }, { "epoch": 0.17, "grad_norm": 1.0400521070299615, "learning_rate": 1.9015229078008163e-05, "loss": 0.6772, "step": 2070 }, { "epoch": 0.17, "grad_norm": 0.984440854452079, "learning_rate": 1.9014089612201612e-05, "loss": 0.5406, "step": 2071 }, { "epoch": 0.17, "grad_norm": 1.0205451452855927, "learning_rate": 1.901294952172665e-05, "loss": 0.5767, "step": 2072 }, { "epoch": 0.17, "grad_norm": 0.9840334858354977, "learning_rate": 1.901180880666228e-05, "loss": 0.6672, "step": 2073 }, { "epoch": 0.17, "grad_norm": 0.9427674229758277, "learning_rate": 1.9010667467087554e-05, "loss": 0.5223, "step": 2074 }, { "epoch": 0.17, "grad_norm": 1.1013136774216972, "learning_rate": 1.9009525503081565e-05, "loss": 0.5849, "step": 2075 }, { "epoch": 0.17, "grad_norm": 1.0761601500099016, "learning_rate": 1.900838291472345e-05, "loss": 0.5946, "step": 2076 }, { "epoch": 0.17, "grad_norm": 1.0176861590592503, "learning_rate": 1.900723970209239e-05, "loss": 0.5184, "step": 2077 }, { "epoch": 0.17, "grad_norm": 1.0460873298295472, "learning_rate": 1.9006095865267605e-05, "loss": 0.5795, "step": 2078 }, { "epoch": 0.17, "grad_norm": 0.9584819948779129, "learning_rate": 1.9004951404328363e-05, "loss": 0.6414, "step": 2079 }, { "epoch": 0.17, "grad_norm": 1.087426505655538, "learning_rate": 1.9003806319353985e-05, "loss": 0.5963, "step": 2080 }, { "epoch": 0.17, "grad_norm": 0.9346911893859755, "learning_rate": 1.9002660610423808e-05, "loss": 0.5711, "step": 2081 }, { "epoch": 0.17, "grad_norm": 1.0277683323783546, "learning_rate": 1.9001514277617236e-05, "loss": 0.6062, "step": 2082 }, { "epoch": 0.17, "grad_norm": 0.9992807356109709, "learning_rate": 1.900036732101371e-05, "loss": 0.6467, "step": 2083 }, { "epoch": 0.17, "grad_norm": 0.8751132896048749, "learning_rate": 1.8999219740692716e-05, "loss": 0.5005, "step": 2084 }, { "epoch": 0.17, "grad_norm": 0.9918946212928599, "learning_rate": 1.899807153673377e-05, "loss": 0.6396, "step": 2085 }, { "epoch": 0.17, "grad_norm": 0.8375400677513595, "learning_rate": 1.8996922709216456e-05, "loss": 0.5189, "step": 2086 }, { "epoch": 0.17, "grad_norm": 1.0006365110478812, "learning_rate": 1.8995773258220374e-05, "loss": 0.6274, "step": 2087 }, { "epoch": 0.17, "grad_norm": 0.9937520191210955, "learning_rate": 1.8994623183825183e-05, "loss": 0.6189, "step": 2088 }, { "epoch": 0.17, "grad_norm": 1.1116281752553856, "learning_rate": 1.8993472486110586e-05, "loss": 0.694, "step": 2089 }, { "epoch": 0.17, "grad_norm": 1.0213234557429571, "learning_rate": 1.899232116515632e-05, "loss": 0.6026, "step": 2090 }, { "epoch": 0.17, "grad_norm": 1.0023366265849354, "learning_rate": 1.8991169221042173e-05, "loss": 0.5995, "step": 2091 }, { "epoch": 0.17, "grad_norm": 0.9910945945231302, "learning_rate": 1.8990016653847978e-05, "loss": 0.6022, "step": 2092 }, { "epoch": 0.17, "grad_norm": 1.1200184181031805, "learning_rate": 1.8988863463653603e-05, "loss": 0.6562, "step": 2093 }, { "epoch": 0.17, "grad_norm": 1.0155184591151794, "learning_rate": 1.8987709650538958e-05, "loss": 0.6389, "step": 2094 }, { "epoch": 0.17, "grad_norm": 0.9650011456012725, "learning_rate": 1.898655521458401e-05, "loss": 0.5576, "step": 2095 }, { "epoch": 0.17, "grad_norm": 1.0579642872444046, "learning_rate": 1.8985400155868756e-05, "loss": 0.5898, "step": 2096 }, { "epoch": 0.17, "grad_norm": 0.968661935774855, "learning_rate": 1.898424447447324e-05, "loss": 0.5597, "step": 2097 }, { "epoch": 0.17, "grad_norm": 1.0804058104288323, "learning_rate": 1.8983088170477556e-05, "loss": 0.6229, "step": 2098 }, { "epoch": 0.17, "grad_norm": 1.1630361930612874, "learning_rate": 1.8981931243961823e-05, "loss": 0.6395, "step": 2099 }, { "epoch": 0.17, "grad_norm": 0.9472132177706004, "learning_rate": 1.8980773695006226e-05, "loss": 0.5772, "step": 2100 }, { "epoch": 0.17, "grad_norm": 0.9232106027800606, "learning_rate": 1.897961552369098e-05, "loss": 0.5748, "step": 2101 }, { "epoch": 0.17, "grad_norm": 0.9375393267389712, "learning_rate": 1.8978456730096336e-05, "loss": 0.5782, "step": 2102 }, { "epoch": 0.17, "grad_norm": 1.0526881739286673, "learning_rate": 1.897729731430261e-05, "loss": 0.5435, "step": 2103 }, { "epoch": 0.17, "grad_norm": 1.1440929482692097, "learning_rate": 1.8976137276390145e-05, "loss": 0.6177, "step": 2104 }, { "epoch": 0.17, "grad_norm": 0.9239252211596448, "learning_rate": 1.897497661643932e-05, "loss": 0.6119, "step": 2105 }, { "epoch": 0.17, "grad_norm": 0.9664453721766127, "learning_rate": 1.8973815334530583e-05, "loss": 0.523, "step": 2106 }, { "epoch": 0.17, "grad_norm": 0.9504949286899973, "learning_rate": 1.8972653430744403e-05, "loss": 0.6081, "step": 2107 }, { "epoch": 0.17, "grad_norm": 1.0457199660560634, "learning_rate": 1.8971490905161297e-05, "loss": 0.6772, "step": 2108 }, { "epoch": 0.17, "grad_norm": 1.0378069524788978, "learning_rate": 1.897032775786183e-05, "loss": 0.626, "step": 2109 }, { "epoch": 0.17, "grad_norm": 0.9729500857706217, "learning_rate": 1.8969163988926606e-05, "loss": 0.5945, "step": 2110 }, { "epoch": 0.17, "grad_norm": 1.533917801839161, "learning_rate": 1.896799959843627e-05, "loss": 0.6287, "step": 2111 }, { "epoch": 0.17, "grad_norm": 1.0979853305051799, "learning_rate": 1.8966834586471517e-05, "loss": 0.656, "step": 2112 }, { "epoch": 0.17, "grad_norm": 0.9436270666351112, "learning_rate": 1.8965668953113083e-05, "loss": 0.5879, "step": 2113 }, { "epoch": 0.17, "grad_norm": 0.9454702686057739, "learning_rate": 1.8964502698441745e-05, "loss": 0.5991, "step": 2114 }, { "epoch": 0.17, "grad_norm": 0.8873844998178761, "learning_rate": 1.8963335822538317e-05, "loss": 0.5835, "step": 2115 }, { "epoch": 0.17, "grad_norm": 1.147471428553273, "learning_rate": 1.896216832548367e-05, "loss": 0.6603, "step": 2116 }, { "epoch": 0.17, "grad_norm": 1.1057922519103787, "learning_rate": 1.8961000207358707e-05, "loss": 0.6675, "step": 2117 }, { "epoch": 0.17, "grad_norm": 0.9412420554305279, "learning_rate": 1.895983146824438e-05, "loss": 0.6009, "step": 2118 }, { "epoch": 0.17, "grad_norm": 0.9299496677178599, "learning_rate": 1.8958662108221677e-05, "loss": 0.5487, "step": 2119 }, { "epoch": 0.17, "grad_norm": 1.0024697235170035, "learning_rate": 1.8957492127371635e-05, "loss": 0.644, "step": 2120 }, { "epoch": 0.17, "grad_norm": 0.9525929588641228, "learning_rate": 1.8956321525775337e-05, "loss": 0.628, "step": 2121 }, { "epoch": 0.17, "grad_norm": 1.0418909039513666, "learning_rate": 1.8955150303513902e-05, "loss": 0.5646, "step": 2122 }, { "epoch": 0.17, "grad_norm": 0.9514053674277251, "learning_rate": 1.895397846066849e-05, "loss": 0.5671, "step": 2123 }, { "epoch": 0.17, "grad_norm": 1.040858447434405, "learning_rate": 1.8952805997320315e-05, "loss": 0.6042, "step": 2124 }, { "epoch": 0.17, "grad_norm": 1.0380436753980238, "learning_rate": 1.8951632913550625e-05, "loss": 0.6072, "step": 2125 }, { "epoch": 0.17, "grad_norm": 1.0234302731992189, "learning_rate": 1.8950459209440716e-05, "loss": 0.6109, "step": 2126 }, { "epoch": 0.17, "grad_norm": 1.0945225898261712, "learning_rate": 1.8949284885071917e-05, "loss": 0.6402, "step": 2127 }, { "epoch": 0.17, "grad_norm": 0.9642259329102109, "learning_rate": 1.8948109940525622e-05, "loss": 0.5854, "step": 2128 }, { "epoch": 0.17, "grad_norm": 0.9262640462813323, "learning_rate": 1.894693437588324e-05, "loss": 0.5699, "step": 2129 }, { "epoch": 0.17, "grad_norm": 1.0546036035380781, "learning_rate": 1.8945758191226242e-05, "loss": 0.6399, "step": 2130 }, { "epoch": 0.17, "grad_norm": 0.892279726784369, "learning_rate": 1.8944581386636137e-05, "loss": 0.5991, "step": 2131 }, { "epoch": 0.17, "grad_norm": 0.9601886962094532, "learning_rate": 1.8943403962194477e-05, "loss": 0.5391, "step": 2132 }, { "epoch": 0.17, "grad_norm": 0.9820769851632366, "learning_rate": 1.8942225917982854e-05, "loss": 0.6186, "step": 2133 }, { "epoch": 0.17, "grad_norm": 1.0907957831446005, "learning_rate": 1.8941047254082903e-05, "loss": 0.5005, "step": 2134 }, { "epoch": 0.17, "grad_norm": 0.8872934358107167, "learning_rate": 1.8939867970576315e-05, "loss": 0.5791, "step": 2135 }, { "epoch": 0.17, "grad_norm": 1.012616664948051, "learning_rate": 1.8938688067544802e-05, "loss": 0.5794, "step": 2136 }, { "epoch": 0.17, "grad_norm": 1.023609395455567, "learning_rate": 1.893750754507014e-05, "loss": 0.5418, "step": 2137 }, { "epoch": 0.17, "grad_norm": 1.0245431610313795, "learning_rate": 1.8936326403234125e-05, "loss": 0.5804, "step": 2138 }, { "epoch": 0.17, "grad_norm": 1.0381393955766731, "learning_rate": 1.893514464211862e-05, "loss": 0.617, "step": 2139 }, { "epoch": 0.17, "grad_norm": 0.9191246443163674, "learning_rate": 1.8933962261805515e-05, "loss": 0.6222, "step": 2140 }, { "epoch": 0.17, "grad_norm": 1.0053856875035811, "learning_rate": 1.893277926237675e-05, "loss": 0.5887, "step": 2141 }, { "epoch": 0.17, "grad_norm": 0.9937571856876615, "learning_rate": 1.8931595643914307e-05, "loss": 0.6551, "step": 2142 }, { "epoch": 0.17, "grad_norm": 1.0103025400896615, "learning_rate": 1.893041140650021e-05, "loss": 0.4889, "step": 2143 }, { "epoch": 0.17, "grad_norm": 1.1326283733191278, "learning_rate": 1.8929226550216522e-05, "loss": 0.6579, "step": 2144 }, { "epoch": 0.17, "grad_norm": 1.0345882359058862, "learning_rate": 1.8928041075145352e-05, "loss": 0.6248, "step": 2145 }, { "epoch": 0.17, "grad_norm": 0.9638961572425462, "learning_rate": 1.892685498136886e-05, "loss": 0.5776, "step": 2146 }, { "epoch": 0.17, "grad_norm": 0.9503035496039794, "learning_rate": 1.892566826896923e-05, "loss": 0.5758, "step": 2147 }, { "epoch": 0.17, "grad_norm": 1.0265755834645953, "learning_rate": 1.8924480938028708e-05, "loss": 0.5906, "step": 2148 }, { "epoch": 0.17, "grad_norm": 0.9772350507987435, "learning_rate": 1.8923292988629575e-05, "loss": 0.5762, "step": 2149 }, { "epoch": 0.17, "grad_norm": 0.9678168956550066, "learning_rate": 1.892210442085415e-05, "loss": 0.5854, "step": 2150 }, { "epoch": 0.17, "grad_norm": 0.9550725284032997, "learning_rate": 1.8920915234784805e-05, "loss": 0.6434, "step": 2151 }, { "epoch": 0.17, "grad_norm": 1.0418780540372863, "learning_rate": 1.8919725430503946e-05, "loss": 0.6381, "step": 2152 }, { "epoch": 0.17, "grad_norm": 0.951117776669499, "learning_rate": 1.8918535008094028e-05, "loss": 0.5835, "step": 2153 }, { "epoch": 0.18, "grad_norm": 0.8789893915475889, "learning_rate": 1.891734396763754e-05, "loss": 0.5071, "step": 2154 }, { "epoch": 0.18, "grad_norm": 0.9637330021078095, "learning_rate": 1.891615230921703e-05, "loss": 0.602, "step": 2155 }, { "epoch": 0.18, "grad_norm": 0.9730380922707319, "learning_rate": 1.8914960032915072e-05, "loss": 0.5574, "step": 2156 }, { "epoch": 0.18, "grad_norm": 0.9745286624861224, "learning_rate": 1.891376713881429e-05, "loss": 0.6078, "step": 2157 }, { "epoch": 0.18, "grad_norm": 0.9153110919261715, "learning_rate": 1.8912573626997354e-05, "loss": 0.5904, "step": 2158 }, { "epoch": 0.18, "grad_norm": 1.011667820668252, "learning_rate": 1.891137949754697e-05, "loss": 0.6148, "step": 2159 }, { "epoch": 0.18, "grad_norm": 1.0902155476345923, "learning_rate": 1.891018475054589e-05, "loss": 0.6056, "step": 2160 }, { "epoch": 0.18, "grad_norm": 0.9472518475231407, "learning_rate": 1.890898938607691e-05, "loss": 0.544, "step": 2161 }, { "epoch": 0.18, "grad_norm": 1.0552234696767666, "learning_rate": 1.890779340422287e-05, "loss": 0.6252, "step": 2162 }, { "epoch": 0.18, "grad_norm": 1.0569263154227653, "learning_rate": 1.8906596805066648e-05, "loss": 0.5732, "step": 2163 }, { "epoch": 0.18, "grad_norm": 1.0492075165471888, "learning_rate": 1.8905399588691165e-05, "loss": 0.621, "step": 2164 }, { "epoch": 0.18, "grad_norm": 1.0089167595328195, "learning_rate": 1.890420175517939e-05, "loss": 0.6021, "step": 2165 }, { "epoch": 0.18, "grad_norm": 1.0353057215864783, "learning_rate": 1.8903003304614332e-05, "loss": 0.5981, "step": 2166 }, { "epoch": 0.18, "grad_norm": 0.9646737678180385, "learning_rate": 1.8901804237079043e-05, "loss": 0.6035, "step": 2167 }, { "epoch": 0.18, "grad_norm": 1.0961600407946666, "learning_rate": 1.8900604552656615e-05, "loss": 0.6611, "step": 2168 }, { "epoch": 0.18, "grad_norm": 0.9352215688772914, "learning_rate": 1.889940425143019e-05, "loss": 0.5734, "step": 2169 }, { "epoch": 0.18, "grad_norm": 0.9609782533708203, "learning_rate": 1.889820333348294e-05, "loss": 0.569, "step": 2170 }, { "epoch": 0.18, "grad_norm": 0.8452540176519757, "learning_rate": 1.8897001798898093e-05, "loss": 0.5334, "step": 2171 }, { "epoch": 0.18, "grad_norm": 0.9851803544469124, "learning_rate": 1.8895799647758912e-05, "loss": 0.6187, "step": 2172 }, { "epoch": 0.18, "grad_norm": 1.0293889942861165, "learning_rate": 1.889459688014871e-05, "loss": 0.6058, "step": 2173 }, { "epoch": 0.18, "grad_norm": 1.0018336586296221, "learning_rate": 1.8893393496150828e-05, "loss": 0.6061, "step": 2174 }, { "epoch": 0.18, "grad_norm": 1.008102851968431, "learning_rate": 1.889218949584867e-05, "loss": 0.6237, "step": 2175 }, { "epoch": 0.18, "grad_norm": 1.0072450005393212, "learning_rate": 1.8890984879325664e-05, "loss": 0.6138, "step": 2176 }, { "epoch": 0.18, "grad_norm": 1.187526031541943, "learning_rate": 1.888977964666529e-05, "loss": 0.6545, "step": 2177 }, { "epoch": 0.18, "grad_norm": 0.9867953653591311, "learning_rate": 1.8888573797951078e-05, "loss": 0.6127, "step": 2178 }, { "epoch": 0.18, "grad_norm": 0.9387087958810846, "learning_rate": 1.888736733326658e-05, "loss": 0.6117, "step": 2179 }, { "epoch": 0.18, "grad_norm": 1.0722541801802747, "learning_rate": 1.8886160252695413e-05, "loss": 0.6302, "step": 2180 }, { "epoch": 0.18, "grad_norm": 0.9779850206491939, "learning_rate": 1.8884952556321223e-05, "loss": 0.6469, "step": 2181 }, { "epoch": 0.18, "grad_norm": 1.0439762284857736, "learning_rate": 1.8883744244227697e-05, "loss": 0.6531, "step": 2182 }, { "epoch": 0.18, "grad_norm": 1.0267706789612425, "learning_rate": 1.8882535316498577e-05, "loss": 0.6373, "step": 2183 }, { "epoch": 0.18, "grad_norm": 0.9898580400329446, "learning_rate": 1.888132577321764e-05, "loss": 0.6467, "step": 2184 }, { "epoch": 0.18, "grad_norm": 0.8775000630268309, "learning_rate": 1.8880115614468705e-05, "loss": 0.5477, "step": 2185 }, { "epoch": 0.18, "grad_norm": 0.9595056001380599, "learning_rate": 1.8878904840335635e-05, "loss": 0.669, "step": 2186 }, { "epoch": 0.18, "grad_norm": 0.9219339134739535, "learning_rate": 1.887769345090233e-05, "loss": 0.5872, "step": 2187 }, { "epoch": 0.18, "grad_norm": 1.034499138231239, "learning_rate": 1.887648144625275e-05, "loss": 0.6174, "step": 2188 }, { "epoch": 0.18, "grad_norm": 0.9426548133498441, "learning_rate": 1.8875268826470875e-05, "loss": 0.5727, "step": 2189 }, { "epoch": 0.18, "grad_norm": 1.0290765921044192, "learning_rate": 1.8874055591640746e-05, "loss": 0.5592, "step": 2190 }, { "epoch": 0.18, "grad_norm": 0.8568541726447252, "learning_rate": 1.887284174184643e-05, "loss": 0.499, "step": 2191 }, { "epoch": 0.18, "grad_norm": 1.009808118159287, "learning_rate": 1.8871627277172058e-05, "loss": 0.5931, "step": 2192 }, { "epoch": 0.18, "grad_norm": 1.0151562984941591, "learning_rate": 1.887041219770178e-05, "loss": 0.6362, "step": 2193 }, { "epoch": 0.18, "grad_norm": 0.9196707775669238, "learning_rate": 1.8869196503519807e-05, "loss": 0.5952, "step": 2194 }, { "epoch": 0.18, "grad_norm": 0.9065759621006254, "learning_rate": 1.8867980194710382e-05, "loss": 0.6028, "step": 2195 }, { "epoch": 0.18, "grad_norm": 1.027461981521237, "learning_rate": 1.88667632713578e-05, "loss": 0.5995, "step": 2196 }, { "epoch": 0.18, "grad_norm": 0.9316606805491132, "learning_rate": 1.886554573354638e-05, "loss": 0.561, "step": 2197 }, { "epoch": 0.18, "grad_norm": 1.0300501829703554, "learning_rate": 1.886432758136051e-05, "loss": 0.5255, "step": 2198 }, { "epoch": 0.18, "grad_norm": 1.0239573905462445, "learning_rate": 1.8863108814884602e-05, "loss": 0.6205, "step": 2199 }, { "epoch": 0.18, "grad_norm": 1.0534000129161385, "learning_rate": 1.8861889434203112e-05, "loss": 0.599, "step": 2200 }, { "epoch": 0.18, "grad_norm": 1.0750868919378282, "learning_rate": 1.8860669439400543e-05, "loss": 0.5538, "step": 2201 }, { "epoch": 0.18, "grad_norm": 0.9883194803662289, "learning_rate": 1.8859448830561445e-05, "loss": 0.6598, "step": 2202 }, { "epoch": 0.18, "grad_norm": 0.9895234269568443, "learning_rate": 1.8858227607770398e-05, "loss": 0.668, "step": 2203 }, { "epoch": 0.18, "grad_norm": 0.9954174791349467, "learning_rate": 1.885700577111204e-05, "loss": 0.632, "step": 2204 }, { "epoch": 0.18, "grad_norm": 1.1474192952722595, "learning_rate": 1.8855783320671034e-05, "loss": 0.5958, "step": 2205 }, { "epoch": 0.18, "grad_norm": 0.9192615979698529, "learning_rate": 1.8854560256532098e-05, "loss": 0.5911, "step": 2206 }, { "epoch": 0.18, "grad_norm": 1.013275376886477, "learning_rate": 1.8853336578779994e-05, "loss": 0.5722, "step": 2207 }, { "epoch": 0.18, "grad_norm": 0.9358020044247859, "learning_rate": 1.8852112287499518e-05, "loss": 0.5594, "step": 2208 }, { "epoch": 0.18, "grad_norm": 0.983445447661659, "learning_rate": 1.8850887382775507e-05, "loss": 0.5761, "step": 2209 }, { "epoch": 0.18, "grad_norm": 1.026955046322064, "learning_rate": 1.884966186469286e-05, "loss": 0.5661, "step": 2210 }, { "epoch": 0.18, "grad_norm": 1.159699459442207, "learning_rate": 1.8848435733336487e-05, "loss": 0.6511, "step": 2211 }, { "epoch": 0.18, "grad_norm": 1.001777768746846, "learning_rate": 1.884720898879137e-05, "loss": 0.5746, "step": 2212 }, { "epoch": 0.18, "grad_norm": 0.8785765469744956, "learning_rate": 1.8845981631142518e-05, "loss": 0.5533, "step": 2213 }, { "epoch": 0.18, "grad_norm": 1.0559320675134651, "learning_rate": 1.8844753660474985e-05, "loss": 0.6484, "step": 2214 }, { "epoch": 0.18, "grad_norm": 1.060208265156859, "learning_rate": 1.8843525076873866e-05, "loss": 0.6731, "step": 2215 }, { "epoch": 0.18, "grad_norm": 0.9241440849844474, "learning_rate": 1.8842295880424305e-05, "loss": 0.5179, "step": 2216 }, { "epoch": 0.18, "grad_norm": 0.960927482449649, "learning_rate": 1.8841066071211485e-05, "loss": 0.5848, "step": 2217 }, { "epoch": 0.18, "grad_norm": 1.0117277009292722, "learning_rate": 1.8839835649320622e-05, "loss": 0.5871, "step": 2218 }, { "epoch": 0.18, "grad_norm": 1.1051299188661905, "learning_rate": 1.8838604614837e-05, "loss": 0.6599, "step": 2219 }, { "epoch": 0.18, "grad_norm": 2.1126022281978067, "learning_rate": 1.8837372967845907e-05, "loss": 0.6209, "step": 2220 }, { "epoch": 0.18, "grad_norm": 1.0437512390828085, "learning_rate": 1.883614070843271e-05, "loss": 0.6129, "step": 2221 }, { "epoch": 0.18, "grad_norm": 0.9347343174247541, "learning_rate": 1.88349078366828e-05, "loss": 0.5732, "step": 2222 }, { "epoch": 0.18, "grad_norm": 0.978072948910801, "learning_rate": 1.8833674352681613e-05, "loss": 0.5862, "step": 2223 }, { "epoch": 0.18, "grad_norm": 0.9035635392386544, "learning_rate": 1.8832440256514633e-05, "loss": 0.5817, "step": 2224 }, { "epoch": 0.18, "grad_norm": 0.946716318845138, "learning_rate": 1.8831205548267375e-05, "loss": 0.6018, "step": 2225 }, { "epoch": 0.18, "grad_norm": 0.9114345496424868, "learning_rate": 1.8829970228025405e-05, "loss": 0.5422, "step": 2226 }, { "epoch": 0.18, "grad_norm": 0.947695890004665, "learning_rate": 1.882873429587433e-05, "loss": 0.6219, "step": 2227 }, { "epoch": 0.18, "grad_norm": 0.9607336606853346, "learning_rate": 1.8827497751899798e-05, "loss": 0.5899, "step": 2228 }, { "epoch": 0.18, "grad_norm": 1.056301862698836, "learning_rate": 1.8826260596187505e-05, "loss": 0.6012, "step": 2229 }, { "epoch": 0.18, "grad_norm": 0.9839236383385103, "learning_rate": 1.882502282882318e-05, "loss": 0.5476, "step": 2230 }, { "epoch": 0.18, "grad_norm": 0.9453761983415658, "learning_rate": 1.88237844498926e-05, "loss": 0.5613, "step": 2231 }, { "epoch": 0.18, "grad_norm": 1.0769594253515593, "learning_rate": 1.8822545459481585e-05, "loss": 0.6289, "step": 2232 }, { "epoch": 0.18, "grad_norm": 0.9504981321207882, "learning_rate": 1.8821305857675997e-05, "loss": 0.5588, "step": 2233 }, { "epoch": 0.18, "grad_norm": 0.9703487793124386, "learning_rate": 1.8820065644561736e-05, "loss": 0.5812, "step": 2234 }, { "epoch": 0.18, "grad_norm": 0.9863394180870134, "learning_rate": 1.8818824820224747e-05, "loss": 0.6304, "step": 2235 }, { "epoch": 0.18, "grad_norm": 0.9529494000210472, "learning_rate": 1.8817583384751023e-05, "loss": 0.5819, "step": 2236 }, { "epoch": 0.18, "grad_norm": 0.9677249493395993, "learning_rate": 1.881634133822659e-05, "loss": 0.6066, "step": 2237 }, { "epoch": 0.18, "grad_norm": 0.9420220803818873, "learning_rate": 1.8815098680737523e-05, "loss": 0.5745, "step": 2238 }, { "epoch": 0.18, "grad_norm": 1.016139132803753, "learning_rate": 1.881385541236994e-05, "loss": 0.6346, "step": 2239 }, { "epoch": 0.18, "grad_norm": 1.1089714899910492, "learning_rate": 1.881261153320999e-05, "loss": 0.6067, "step": 2240 }, { "epoch": 0.18, "grad_norm": 0.9853924873312063, "learning_rate": 1.881136704334388e-05, "loss": 0.6338, "step": 2241 }, { "epoch": 0.18, "grad_norm": 1.8155046887597646, "learning_rate": 1.8810121942857848e-05, "loss": 0.6043, "step": 2242 }, { "epoch": 0.18, "grad_norm": 0.9104861985868933, "learning_rate": 1.880887623183818e-05, "loss": 0.5181, "step": 2243 }, { "epoch": 0.18, "grad_norm": 0.9228901883726971, "learning_rate": 1.8807629910371203e-05, "loss": 0.5156, "step": 2244 }, { "epoch": 0.18, "grad_norm": 0.9801612127255177, "learning_rate": 1.8806382978543283e-05, "loss": 0.6033, "step": 2245 }, { "epoch": 0.18, "grad_norm": 1.056989468750248, "learning_rate": 1.8805135436440837e-05, "loss": 0.6078, "step": 2246 }, { "epoch": 0.18, "grad_norm": 1.1457163395568721, "learning_rate": 1.8803887284150317e-05, "loss": 0.6381, "step": 2247 }, { "epoch": 0.18, "grad_norm": 1.0805771612792152, "learning_rate": 1.8802638521758214e-05, "loss": 0.6548, "step": 2248 }, { "epoch": 0.18, "grad_norm": 1.3202820723326176, "learning_rate": 1.880138914935107e-05, "loss": 0.636, "step": 2249 }, { "epoch": 0.18, "grad_norm": 0.9148686090902376, "learning_rate": 1.8800139167015466e-05, "loss": 0.5745, "step": 2250 }, { "epoch": 0.18, "grad_norm": 1.0389049536285395, "learning_rate": 1.8798888574838023e-05, "loss": 0.6133, "step": 2251 }, { "epoch": 0.18, "grad_norm": 1.030661980018216, "learning_rate": 1.8797637372905407e-05, "loss": 0.548, "step": 2252 }, { "epoch": 0.18, "grad_norm": 1.0276577285974946, "learning_rate": 1.8796385561304323e-05, "loss": 0.6184, "step": 2253 }, { "epoch": 0.18, "grad_norm": 1.0196070836335733, "learning_rate": 1.8795133140121522e-05, "loss": 0.5818, "step": 2254 }, { "epoch": 0.18, "grad_norm": 1.0889379693225758, "learning_rate": 1.8793880109443797e-05, "loss": 0.6733, "step": 2255 }, { "epoch": 0.18, "grad_norm": 0.9353604600412142, "learning_rate": 1.8792626469357983e-05, "loss": 0.5649, "step": 2256 }, { "epoch": 0.18, "grad_norm": 0.9000891999282137, "learning_rate": 1.879137221995095e-05, "loss": 0.558, "step": 2257 }, { "epoch": 0.18, "grad_norm": 0.9242703330932535, "learning_rate": 1.879011736130962e-05, "loss": 0.6322, "step": 2258 }, { "epoch": 0.18, "grad_norm": 1.1666243279578408, "learning_rate": 1.8788861893520954e-05, "loss": 0.6027, "step": 2259 }, { "epoch": 0.18, "grad_norm": 0.9173273086450375, "learning_rate": 1.8787605816671956e-05, "loss": 0.582, "step": 2260 }, { "epoch": 0.18, "grad_norm": 0.9437307573005449, "learning_rate": 1.8786349130849667e-05, "loss": 0.5623, "step": 2261 }, { "epoch": 0.18, "grad_norm": 0.8570905229082192, "learning_rate": 1.8785091836141177e-05, "loss": 0.522, "step": 2262 }, { "epoch": 0.18, "grad_norm": 1.5564688988373234, "learning_rate": 1.8783833932633617e-05, "loss": 0.6621, "step": 2263 }, { "epoch": 0.18, "grad_norm": 1.0150904173397395, "learning_rate": 1.8782575420414155e-05, "loss": 0.652, "step": 2264 }, { "epoch": 0.18, "grad_norm": 0.9376520417247656, "learning_rate": 1.8781316299570007e-05, "loss": 0.5854, "step": 2265 }, { "epoch": 0.18, "grad_norm": 0.9330936778529615, "learning_rate": 1.878005657018843e-05, "loss": 0.5916, "step": 2266 }, { "epoch": 0.18, "grad_norm": 1.0036469337468876, "learning_rate": 1.877879623235672e-05, "loss": 0.6498, "step": 2267 }, { "epoch": 0.18, "grad_norm": 1.0443264323120567, "learning_rate": 1.8777535286162217e-05, "loss": 0.678, "step": 2268 }, { "epoch": 0.18, "grad_norm": 0.9581630196481067, "learning_rate": 1.8776273731692306e-05, "loss": 0.6016, "step": 2269 }, { "epoch": 0.18, "grad_norm": 1.083876675418555, "learning_rate": 1.8775011569034405e-05, "loss": 0.6118, "step": 2270 }, { "epoch": 0.18, "grad_norm": 0.9873027524445195, "learning_rate": 1.877374879827599e-05, "loss": 0.629, "step": 2271 }, { "epoch": 0.18, "grad_norm": 0.9486312587682523, "learning_rate": 1.8772485419504566e-05, "loss": 0.5635, "step": 2272 }, { "epoch": 0.18, "grad_norm": 1.011383681981482, "learning_rate": 1.877122143280768e-05, "loss": 0.642, "step": 2273 }, { "epoch": 0.18, "grad_norm": 1.054871890426257, "learning_rate": 1.8769956838272937e-05, "loss": 0.624, "step": 2274 }, { "epoch": 0.18, "grad_norm": 0.9243443195958089, "learning_rate": 1.8768691635987957e-05, "loss": 0.6012, "step": 2275 }, { "epoch": 0.18, "grad_norm": 0.9137583522941478, "learning_rate": 1.8767425826040426e-05, "loss": 0.6101, "step": 2276 }, { "epoch": 0.19, "grad_norm": 0.8732029787589334, "learning_rate": 1.8766159408518062e-05, "loss": 0.5436, "step": 2277 }, { "epoch": 0.19, "grad_norm": 1.0490382062184178, "learning_rate": 1.8764892383508626e-05, "loss": 0.5305, "step": 2278 }, { "epoch": 0.19, "grad_norm": 1.0350121153065304, "learning_rate": 1.8763624751099924e-05, "loss": 0.6107, "step": 2279 }, { "epoch": 0.19, "grad_norm": 0.8036004247191376, "learning_rate": 1.8762356511379796e-05, "loss": 0.5169, "step": 2280 }, { "epoch": 0.19, "grad_norm": 0.9817024833816537, "learning_rate": 1.8761087664436137e-05, "loss": 0.5483, "step": 2281 }, { "epoch": 0.19, "grad_norm": 1.0869713946968007, "learning_rate": 1.8759818210356874e-05, "loss": 0.6059, "step": 2282 }, { "epoch": 0.19, "grad_norm": 0.9333008852297026, "learning_rate": 1.8758548149229978e-05, "loss": 0.5559, "step": 2283 }, { "epoch": 0.19, "grad_norm": 0.9797631089608451, "learning_rate": 1.8757277481143467e-05, "loss": 0.5846, "step": 2284 }, { "epoch": 0.19, "grad_norm": 0.8580503736403456, "learning_rate": 1.8756006206185388e-05, "loss": 0.5484, "step": 2285 }, { "epoch": 0.19, "grad_norm": 0.9459834566720147, "learning_rate": 1.8754734324443853e-05, "loss": 0.5814, "step": 2286 }, { "epoch": 0.19, "grad_norm": 1.129380323888552, "learning_rate": 1.875346183600699e-05, "loss": 0.6183, "step": 2287 }, { "epoch": 0.19, "grad_norm": 0.9513144228125862, "learning_rate": 1.8752188740962986e-05, "loss": 0.607, "step": 2288 }, { "epoch": 0.19, "grad_norm": 1.0304282864628564, "learning_rate": 1.8750915039400068e-05, "loss": 0.6071, "step": 2289 }, { "epoch": 0.19, "grad_norm": 0.9340505697364657, "learning_rate": 1.87496407314065e-05, "loss": 0.5504, "step": 2290 }, { "epoch": 0.19, "grad_norm": 1.023533370754278, "learning_rate": 1.8748365817070586e-05, "loss": 0.6182, "step": 2291 }, { "epoch": 0.19, "grad_norm": 0.9882283676462045, "learning_rate": 1.8747090296480683e-05, "loss": 0.5844, "step": 2292 }, { "epoch": 0.19, "grad_norm": 0.9320546167398589, "learning_rate": 1.8745814169725183e-05, "loss": 0.5435, "step": 2293 }, { "epoch": 0.19, "grad_norm": 0.9867593700841457, "learning_rate": 1.8744537436892517e-05, "loss": 0.5975, "step": 2294 }, { "epoch": 0.19, "grad_norm": 1.0258212231425314, "learning_rate": 1.8743260098071163e-05, "loss": 0.5049, "step": 2295 }, { "epoch": 0.19, "grad_norm": 1.0907723321403282, "learning_rate": 1.8741982153349642e-05, "loss": 0.6586, "step": 2296 }, { "epoch": 0.19, "grad_norm": 0.950702288691101, "learning_rate": 1.8740703602816506e-05, "loss": 0.6177, "step": 2297 }, { "epoch": 0.19, "grad_norm": 1.0254255423493217, "learning_rate": 1.8739424446560365e-05, "loss": 0.6089, "step": 2298 }, { "epoch": 0.19, "grad_norm": 1.0918084848642433, "learning_rate": 1.8738144684669867e-05, "loss": 0.6807, "step": 2299 }, { "epoch": 0.19, "grad_norm": 1.0255381404566133, "learning_rate": 1.8736864317233688e-05, "loss": 0.6707, "step": 2300 }, { "epoch": 0.19, "grad_norm": 0.9287783578796915, "learning_rate": 1.873558334434056e-05, "loss": 0.6023, "step": 2301 }, { "epoch": 0.19, "grad_norm": 0.9522040066938621, "learning_rate": 1.873430176607926e-05, "loss": 0.5756, "step": 2302 }, { "epoch": 0.19, "grad_norm": 0.9664634365215445, "learning_rate": 1.8733019582538595e-05, "loss": 0.5544, "step": 2303 }, { "epoch": 0.19, "grad_norm": 0.9438264204012479, "learning_rate": 1.8731736793807417e-05, "loss": 0.5802, "step": 2304 }, { "epoch": 0.19, "grad_norm": 0.9450277499788748, "learning_rate": 1.873045339997462e-05, "loss": 0.5775, "step": 2305 }, { "epoch": 0.19, "grad_norm": 0.9226491997621605, "learning_rate": 1.872916940112915e-05, "loss": 0.5655, "step": 2306 }, { "epoch": 0.19, "grad_norm": 0.9316094898328231, "learning_rate": 1.8727884797359984e-05, "loss": 0.571, "step": 2307 }, { "epoch": 0.19, "grad_norm": 0.9161774303898466, "learning_rate": 1.8726599588756144e-05, "loss": 0.5742, "step": 2308 }, { "epoch": 0.19, "grad_norm": 1.0212729130725422, "learning_rate": 1.8725313775406693e-05, "loss": 0.6725, "step": 2309 }, { "epoch": 0.19, "grad_norm": 1.0371994533029285, "learning_rate": 1.8724027357400737e-05, "loss": 0.6275, "step": 2310 }, { "epoch": 0.19, "grad_norm": 0.9387275440330368, "learning_rate": 1.872274033482742e-05, "loss": 0.609, "step": 2311 }, { "epoch": 0.19, "grad_norm": 1.018429708491077, "learning_rate": 1.8721452707775935e-05, "loss": 0.5579, "step": 2312 }, { "epoch": 0.19, "grad_norm": 1.0025377413107226, "learning_rate": 1.8720164476335516e-05, "loss": 0.6228, "step": 2313 }, { "epoch": 0.19, "grad_norm": 1.1300305737441967, "learning_rate": 1.8718875640595432e-05, "loss": 0.661, "step": 2314 }, { "epoch": 0.19, "grad_norm": 1.0204356367555911, "learning_rate": 1.8717586200645002e-05, "loss": 0.5863, "step": 2315 }, { "epoch": 0.19, "grad_norm": 0.9527375433742457, "learning_rate": 1.8716296156573578e-05, "loss": 0.6602, "step": 2316 }, { "epoch": 0.19, "grad_norm": 0.9901798497245836, "learning_rate": 1.8715005508470565e-05, "loss": 0.5693, "step": 2317 }, { "epoch": 0.19, "grad_norm": 1.053822223301016, "learning_rate": 1.8713714256425396e-05, "loss": 0.5913, "step": 2318 }, { "epoch": 0.19, "grad_norm": 0.8833294760007623, "learning_rate": 1.8712422400527556e-05, "loss": 0.5931, "step": 2319 }, { "epoch": 0.19, "grad_norm": 0.9639514016390054, "learning_rate": 1.8711129940866577e-05, "loss": 0.5981, "step": 2320 }, { "epoch": 0.19, "grad_norm": 1.0823666757152655, "learning_rate": 1.870983687753202e-05, "loss": 0.6608, "step": 2321 }, { "epoch": 0.19, "grad_norm": 0.8931959289690588, "learning_rate": 1.8708543210613492e-05, "loss": 0.5971, "step": 2322 }, { "epoch": 0.19, "grad_norm": 0.9858619839716677, "learning_rate": 1.8707248940200643e-05, "loss": 0.5847, "step": 2323 }, { "epoch": 0.19, "grad_norm": 0.931992339599603, "learning_rate": 1.8705954066383166e-05, "loss": 0.5913, "step": 2324 }, { "epoch": 0.19, "grad_norm": 0.9600650161274669, "learning_rate": 1.8704658589250795e-05, "loss": 0.5694, "step": 2325 }, { "epoch": 0.19, "grad_norm": 0.9139115608171103, "learning_rate": 1.8703362508893302e-05, "loss": 0.599, "step": 2326 }, { "epoch": 0.19, "grad_norm": 0.9079993112354389, "learning_rate": 1.870206582540051e-05, "loss": 0.6328, "step": 2327 }, { "epoch": 0.19, "grad_norm": 0.930373994652327, "learning_rate": 1.8700768538862274e-05, "loss": 0.6067, "step": 2328 }, { "epoch": 0.19, "grad_norm": 0.935081675704654, "learning_rate": 1.8699470649368496e-05, "loss": 0.5887, "step": 2329 }, { "epoch": 0.19, "grad_norm": 0.974106870385751, "learning_rate": 1.8698172157009124e-05, "loss": 0.5886, "step": 2330 }, { "epoch": 0.19, "grad_norm": 0.9335074917711323, "learning_rate": 1.8696873061874127e-05, "loss": 0.556, "step": 2331 }, { "epoch": 0.19, "grad_norm": 0.9289390886527041, "learning_rate": 1.8695573364053548e-05, "loss": 0.6101, "step": 2332 }, { "epoch": 0.19, "grad_norm": 0.9494119915399611, "learning_rate": 1.8694273063637444e-05, "loss": 0.6225, "step": 2333 }, { "epoch": 0.19, "grad_norm": 0.868203726273747, "learning_rate": 1.869297216071593e-05, "loss": 0.5634, "step": 2334 }, { "epoch": 0.19, "grad_norm": 0.9645235066076571, "learning_rate": 1.8691670655379157e-05, "loss": 0.5783, "step": 2335 }, { "epoch": 0.19, "grad_norm": 0.9291663000652597, "learning_rate": 1.8690368547717313e-05, "loss": 0.5383, "step": 2336 }, { "epoch": 0.19, "grad_norm": 1.030978162010167, "learning_rate": 1.8689065837820642e-05, "loss": 0.6168, "step": 2337 }, { "epoch": 0.19, "grad_norm": 1.01006563723956, "learning_rate": 1.8687762525779412e-05, "loss": 0.5955, "step": 2338 }, { "epoch": 0.19, "grad_norm": 0.9572350447649162, "learning_rate": 1.8686458611683948e-05, "loss": 0.617, "step": 2339 }, { "epoch": 0.19, "grad_norm": 1.0413589226973223, "learning_rate": 1.8685154095624605e-05, "loss": 0.7026, "step": 2340 }, { "epoch": 0.19, "grad_norm": 1.006611036261011, "learning_rate": 1.8683848977691784e-05, "loss": 0.6215, "step": 2341 }, { "epoch": 0.19, "grad_norm": 0.902856036151998, "learning_rate": 1.868254325797594e-05, "loss": 0.5864, "step": 2342 }, { "epoch": 0.19, "grad_norm": 1.0432428298350762, "learning_rate": 1.868123693656754e-05, "loss": 0.5814, "step": 2343 }, { "epoch": 0.19, "grad_norm": 1.0310284547691086, "learning_rate": 1.8679930013557127e-05, "loss": 0.5941, "step": 2344 }, { "epoch": 0.19, "grad_norm": 0.9342025335462472, "learning_rate": 1.867862248903526e-05, "loss": 0.6058, "step": 2345 }, { "epoch": 0.19, "grad_norm": 1.0046093700308836, "learning_rate": 1.8677314363092555e-05, "loss": 0.5298, "step": 2346 }, { "epoch": 0.19, "grad_norm": 0.9711270887222404, "learning_rate": 1.867600563581966e-05, "loss": 0.5702, "step": 2347 }, { "epoch": 0.19, "grad_norm": 1.077095212889763, "learning_rate": 1.867469630730727e-05, "loss": 0.5995, "step": 2348 }, { "epoch": 0.19, "grad_norm": 0.8931131934883211, "learning_rate": 1.867338637764612e-05, "loss": 0.4927, "step": 2349 }, { "epoch": 0.19, "grad_norm": 1.0369626372647212, "learning_rate": 1.867207584692699e-05, "loss": 0.577, "step": 2350 }, { "epoch": 0.19, "grad_norm": 0.9567874811250195, "learning_rate": 1.867076471524069e-05, "loss": 0.4779, "step": 2351 }, { "epoch": 0.19, "grad_norm": 0.9766997211054103, "learning_rate": 1.866945298267809e-05, "loss": 0.6276, "step": 2352 }, { "epoch": 0.19, "grad_norm": 1.0546535855796144, "learning_rate": 1.866814064933009e-05, "loss": 0.6283, "step": 2353 }, { "epoch": 0.19, "grad_norm": 1.0842469473548701, "learning_rate": 1.8666827715287627e-05, "loss": 0.66, "step": 2354 }, { "epoch": 0.19, "grad_norm": 0.9133052767488332, "learning_rate": 1.8665514180641697e-05, "loss": 0.5781, "step": 2355 }, { "epoch": 0.19, "grad_norm": 0.9991322639797208, "learning_rate": 1.8664200045483314e-05, "loss": 0.5336, "step": 2356 }, { "epoch": 0.19, "grad_norm": 0.9699785492721534, "learning_rate": 1.8662885309903558e-05, "loss": 0.5715, "step": 2357 }, { "epoch": 0.19, "grad_norm": 1.0024836346982164, "learning_rate": 1.8661569973993533e-05, "loss": 0.6319, "step": 2358 }, { "epoch": 0.19, "grad_norm": 1.0298945721796273, "learning_rate": 1.866025403784439e-05, "loss": 0.6255, "step": 2359 }, { "epoch": 0.19, "grad_norm": 0.8974998871243925, "learning_rate": 1.865893750154732e-05, "loss": 0.5178, "step": 2360 }, { "epoch": 0.19, "grad_norm": 1.057894104135965, "learning_rate": 1.8657620365193566e-05, "loss": 0.6258, "step": 2361 }, { "epoch": 0.19, "grad_norm": 0.9877141542623917, "learning_rate": 1.8656302628874402e-05, "loss": 0.6648, "step": 2362 }, { "epoch": 0.19, "grad_norm": 0.9542372663196041, "learning_rate": 1.8654984292681142e-05, "loss": 0.5872, "step": 2363 }, { "epoch": 0.19, "grad_norm": 0.8546229947901854, "learning_rate": 1.8653665356705146e-05, "loss": 0.5864, "step": 2364 }, { "epoch": 0.19, "grad_norm": 0.9260957967141475, "learning_rate": 1.865234582103782e-05, "loss": 0.5882, "step": 2365 }, { "epoch": 0.19, "grad_norm": 1.038963914135987, "learning_rate": 1.86510256857706e-05, "loss": 0.6273, "step": 2366 }, { "epoch": 0.19, "grad_norm": 0.9630029696755222, "learning_rate": 1.8649704950994976e-05, "loss": 0.6044, "step": 2367 }, { "epoch": 0.19, "grad_norm": 1.0408025228163054, "learning_rate": 1.864838361680247e-05, "loss": 0.5721, "step": 2368 }, { "epoch": 0.19, "grad_norm": 0.9901450030781432, "learning_rate": 1.864706168328465e-05, "loss": 0.6231, "step": 2369 }, { "epoch": 0.19, "grad_norm": 1.0128039127196022, "learning_rate": 1.8645739150533123e-05, "loss": 0.6232, "step": 2370 }, { "epoch": 0.19, "grad_norm": 1.035782290703731, "learning_rate": 1.8644416018639547e-05, "loss": 0.6019, "step": 2371 }, { "epoch": 0.19, "grad_norm": 1.0569343629525514, "learning_rate": 1.8643092287695604e-05, "loss": 0.6118, "step": 2372 }, { "epoch": 0.19, "grad_norm": 0.9739875131987521, "learning_rate": 1.8641767957793037e-05, "loss": 0.6424, "step": 2373 }, { "epoch": 0.19, "grad_norm": 0.9480212105192773, "learning_rate": 1.864044302902361e-05, "loss": 0.5794, "step": 2374 }, { "epoch": 0.19, "grad_norm": 0.9633841127891988, "learning_rate": 1.8639117501479143e-05, "loss": 0.5918, "step": 2375 }, { "epoch": 0.19, "grad_norm": 0.8875048063352914, "learning_rate": 1.8637791375251505e-05, "loss": 0.5661, "step": 2376 }, { "epoch": 0.19, "grad_norm": 0.9553926429206897, "learning_rate": 1.863646465043258e-05, "loss": 0.6066, "step": 2377 }, { "epoch": 0.19, "grad_norm": 0.9406156212474246, "learning_rate": 1.8635137327114317e-05, "loss": 0.5694, "step": 2378 }, { "epoch": 0.19, "grad_norm": 1.0082468868263996, "learning_rate": 1.8633809405388697e-05, "loss": 0.584, "step": 2379 }, { "epoch": 0.19, "grad_norm": 0.9697465631492217, "learning_rate": 1.8632480885347744e-05, "loss": 0.636, "step": 2380 }, { "epoch": 0.19, "grad_norm": 0.9103226982286848, "learning_rate": 1.863115176708352e-05, "loss": 0.5912, "step": 2381 }, { "epoch": 0.19, "grad_norm": 0.903505883017083, "learning_rate": 1.8629822050688138e-05, "loss": 0.5725, "step": 2382 }, { "epoch": 0.19, "grad_norm": 0.9399914412587055, "learning_rate": 1.862849173625374e-05, "loss": 0.5703, "step": 2383 }, { "epoch": 0.19, "grad_norm": 0.865161610202597, "learning_rate": 1.862716082387252e-05, "loss": 0.5792, "step": 2384 }, { "epoch": 0.19, "grad_norm": 0.9859677288051738, "learning_rate": 1.8625829313636707e-05, "loss": 0.6196, "step": 2385 }, { "epoch": 0.19, "grad_norm": 0.994656223718024, "learning_rate": 1.862449720563857e-05, "loss": 0.6377, "step": 2386 }, { "epoch": 0.19, "grad_norm": 1.0886207567360782, "learning_rate": 1.862316449997043e-05, "loss": 0.5995, "step": 2387 }, { "epoch": 0.19, "grad_norm": 0.9986334346192072, "learning_rate": 1.862183119672464e-05, "loss": 0.6196, "step": 2388 }, { "epoch": 0.19, "grad_norm": 1.0365462981424516, "learning_rate": 1.862049729599359e-05, "loss": 0.5983, "step": 2389 }, { "epoch": 0.19, "grad_norm": 1.0433892804940201, "learning_rate": 1.8619162797869728e-05, "loss": 0.5875, "step": 2390 }, { "epoch": 0.19, "grad_norm": 0.956003796235037, "learning_rate": 1.861782770244553e-05, "loss": 0.5334, "step": 2391 }, { "epoch": 0.19, "grad_norm": 0.8948968349280684, "learning_rate": 1.8616492009813516e-05, "loss": 0.5317, "step": 2392 }, { "epoch": 0.19, "grad_norm": 0.967022027051535, "learning_rate": 1.8615155720066247e-05, "loss": 0.6288, "step": 2393 }, { "epoch": 0.19, "grad_norm": 0.9790421801306229, "learning_rate": 1.861381883329633e-05, "loss": 0.6143, "step": 2394 }, { "epoch": 0.19, "grad_norm": 1.1236596585523422, "learning_rate": 1.8612481349596406e-05, "loss": 0.6214, "step": 2395 }, { "epoch": 0.19, "grad_norm": 0.946020311289503, "learning_rate": 1.8611143269059165e-05, "loss": 0.5774, "step": 2396 }, { "epoch": 0.19, "grad_norm": 1.0809689030226681, "learning_rate": 1.8609804591777333e-05, "loss": 0.5975, "step": 2397 }, { "epoch": 0.19, "grad_norm": 1.0135643705288075, "learning_rate": 1.860846531784368e-05, "loss": 0.6491, "step": 2398 }, { "epoch": 0.19, "grad_norm": 0.9544748502585023, "learning_rate": 1.8607125447351017e-05, "loss": 0.6125, "step": 2399 }, { "epoch": 0.2, "grad_norm": 0.9163164402222126, "learning_rate": 1.8605784980392193e-05, "loss": 0.5258, "step": 2400 }, { "epoch": 0.2, "grad_norm": 1.14714289345333, "learning_rate": 1.86044439170601e-05, "loss": 0.5343, "step": 2401 }, { "epoch": 0.2, "grad_norm": 0.9996593655533791, "learning_rate": 1.8603102257447686e-05, "loss": 0.6477, "step": 2402 }, { "epoch": 0.2, "grad_norm": 1.0396410331618158, "learning_rate": 1.860176000164791e-05, "loss": 0.5779, "step": 2403 }, { "epoch": 0.2, "grad_norm": 0.9694426587894854, "learning_rate": 1.8600417149753794e-05, "loss": 0.5503, "step": 2404 }, { "epoch": 0.2, "grad_norm": 0.9399594621363393, "learning_rate": 1.85990737018584e-05, "loss": 0.5529, "step": 2405 }, { "epoch": 0.2, "grad_norm": 0.9611673318625494, "learning_rate": 1.8597729658054827e-05, "loss": 0.577, "step": 2406 }, { "epoch": 0.2, "grad_norm": 1.063842825267057, "learning_rate": 1.8596385018436214e-05, "loss": 0.6513, "step": 2407 }, { "epoch": 0.2, "grad_norm": 0.9440600590318581, "learning_rate": 1.8595039783095747e-05, "loss": 0.522, "step": 2408 }, { "epoch": 0.2, "grad_norm": 0.9333040800805044, "learning_rate": 1.859369395212664e-05, "loss": 0.6214, "step": 2409 }, { "epoch": 0.2, "grad_norm": 1.0208532063205993, "learning_rate": 1.859234752562217e-05, "loss": 0.6271, "step": 2410 }, { "epoch": 0.2, "grad_norm": 1.081775506940939, "learning_rate": 1.8591000503675635e-05, "loss": 0.5961, "step": 2411 }, { "epoch": 0.2, "grad_norm": 0.9756559941848068, "learning_rate": 1.8589652886380387e-05, "loss": 0.6023, "step": 2412 }, { "epoch": 0.2, "grad_norm": 1.061469168824822, "learning_rate": 1.8588304673829814e-05, "loss": 0.64, "step": 2413 }, { "epoch": 0.2, "grad_norm": 0.9643883392364556, "learning_rate": 1.8586955866117345e-05, "loss": 0.6009, "step": 2414 }, { "epoch": 0.2, "grad_norm": 0.9622948386380665, "learning_rate": 1.8585606463336448e-05, "loss": 0.5491, "step": 2415 }, { "epoch": 0.2, "grad_norm": 0.9804391313646905, "learning_rate": 1.8584256465580642e-05, "loss": 0.6679, "step": 2416 }, { "epoch": 0.2, "grad_norm": 1.1901230134706908, "learning_rate": 1.8582905872943477e-05, "loss": 0.5767, "step": 2417 }, { "epoch": 0.2, "grad_norm": 1.0298855448133197, "learning_rate": 1.8581554685518543e-05, "loss": 0.5731, "step": 2418 }, { "epoch": 0.2, "grad_norm": 0.9403946452528907, "learning_rate": 1.8580202903399484e-05, "loss": 0.5958, "step": 2419 }, { "epoch": 0.2, "grad_norm": 1.0789858376109014, "learning_rate": 1.8578850526679976e-05, "loss": 0.6047, "step": 2420 }, { "epoch": 0.2, "grad_norm": 0.9846049002431896, "learning_rate": 1.8577497555453735e-05, "loss": 0.5542, "step": 2421 }, { "epoch": 0.2, "grad_norm": 0.9920273561796581, "learning_rate": 1.8576143989814524e-05, "loss": 0.5957, "step": 2422 }, { "epoch": 0.2, "grad_norm": 1.012926254158362, "learning_rate": 1.857478982985614e-05, "loss": 0.5816, "step": 2423 }, { "epoch": 0.2, "grad_norm": 1.1060199873713032, "learning_rate": 1.8573435075672422e-05, "loss": 0.6219, "step": 2424 }, { "epoch": 0.2, "grad_norm": 0.9665465330730769, "learning_rate": 1.8572079727357265e-05, "loss": 0.5141, "step": 2425 }, { "epoch": 0.2, "grad_norm": 1.010619313409472, "learning_rate": 1.8570723785004583e-05, "loss": 0.6223, "step": 2426 }, { "epoch": 0.2, "grad_norm": 1.0701981612275158, "learning_rate": 1.8569367248708343e-05, "loss": 0.6207, "step": 2427 }, { "epoch": 0.2, "grad_norm": 0.968653341581941, "learning_rate": 1.8568010118562556e-05, "loss": 0.6153, "step": 2428 }, { "epoch": 0.2, "grad_norm": 1.0285147099330336, "learning_rate": 1.8566652394661268e-05, "loss": 0.5845, "step": 2429 }, { "epoch": 0.2, "grad_norm": 1.0401009522193112, "learning_rate": 1.856529407709857e-05, "loss": 0.5614, "step": 2430 }, { "epoch": 0.2, "grad_norm": 1.0312486719248375, "learning_rate": 1.8563935165968584e-05, "loss": 0.5535, "step": 2431 }, { "epoch": 0.2, "grad_norm": 0.8939351482825008, "learning_rate": 1.8562575661365493e-05, "loss": 0.5404, "step": 2432 }, { "epoch": 0.2, "grad_norm": 1.0252632002781987, "learning_rate": 1.8561215563383496e-05, "loss": 0.5902, "step": 2433 }, { "epoch": 0.2, "grad_norm": 0.9425198617957646, "learning_rate": 1.855985487211686e-05, "loss": 0.579, "step": 2434 }, { "epoch": 0.2, "grad_norm": 0.9890648746831118, "learning_rate": 1.8558493587659874e-05, "loss": 0.621, "step": 2435 }, { "epoch": 0.2, "grad_norm": 0.9366563149047062, "learning_rate": 1.8557131710106873e-05, "loss": 0.5563, "step": 2436 }, { "epoch": 0.2, "grad_norm": 0.9984953205723499, "learning_rate": 1.8555769239552232e-05, "loss": 0.6493, "step": 2437 }, { "epoch": 0.2, "grad_norm": 0.980140534084269, "learning_rate": 1.8554406176090377e-05, "loss": 0.6212, "step": 2438 }, { "epoch": 0.2, "grad_norm": 1.00668465629207, "learning_rate": 1.8553042519815756e-05, "loss": 0.6299, "step": 2439 }, { "epoch": 0.2, "grad_norm": 0.9874709621095749, "learning_rate": 1.8551678270822878e-05, "loss": 0.6188, "step": 2440 }, { "epoch": 0.2, "grad_norm": 0.9247042108253141, "learning_rate": 1.8550313429206282e-05, "loss": 0.557, "step": 2441 }, { "epoch": 0.2, "grad_norm": 0.9952811181588312, "learning_rate": 1.8548947995060547e-05, "loss": 0.6042, "step": 2442 }, { "epoch": 0.2, "grad_norm": 1.0020526733733572, "learning_rate": 1.85475819684803e-05, "loss": 0.5833, "step": 2443 }, { "epoch": 0.2, "grad_norm": 0.955455293951105, "learning_rate": 1.8546215349560204e-05, "loss": 0.6151, "step": 2444 }, { "epoch": 0.2, "grad_norm": 1.0341207506844388, "learning_rate": 1.8544848138394965e-05, "loss": 0.5803, "step": 2445 }, { "epoch": 0.2, "grad_norm": 1.0264964629842621, "learning_rate": 1.854348033507933e-05, "loss": 0.6286, "step": 2446 }, { "epoch": 0.2, "grad_norm": 0.9302590580265883, "learning_rate": 1.8542111939708086e-05, "loss": 0.5781, "step": 2447 }, { "epoch": 0.2, "grad_norm": 0.8796676362843857, "learning_rate": 1.854074295237606e-05, "loss": 0.5421, "step": 2448 }, { "epoch": 0.2, "grad_norm": 1.004870683626179, "learning_rate": 1.8539373373178126e-05, "loss": 0.5445, "step": 2449 }, { "epoch": 0.2, "grad_norm": 1.0665094713061753, "learning_rate": 1.8538003202209186e-05, "loss": 0.6193, "step": 2450 }, { "epoch": 0.2, "grad_norm": 1.0457962229818203, "learning_rate": 1.8536632439564203e-05, "loss": 0.5028, "step": 2451 }, { "epoch": 0.2, "grad_norm": 0.9640419849222112, "learning_rate": 1.853526108533816e-05, "loss": 0.6291, "step": 2452 }, { "epoch": 0.2, "grad_norm": 1.0268636636017952, "learning_rate": 1.8533889139626096e-05, "loss": 0.5408, "step": 2453 }, { "epoch": 0.2, "grad_norm": 1.0358135887820035, "learning_rate": 1.8532516602523087e-05, "loss": 0.5789, "step": 2454 }, { "epoch": 0.2, "grad_norm": 1.046182089719115, "learning_rate": 1.853114347412424e-05, "loss": 0.5674, "step": 2455 }, { "epoch": 0.2, "grad_norm": 1.126374433619317, "learning_rate": 1.8529769754524724e-05, "loss": 0.6586, "step": 2456 }, { "epoch": 0.2, "grad_norm": 0.9045955142000208, "learning_rate": 1.8528395443819725e-05, "loss": 0.5948, "step": 2457 }, { "epoch": 0.2, "grad_norm": 0.8992249493330345, "learning_rate": 1.8527020542104487e-05, "loss": 0.5861, "step": 2458 }, { "epoch": 0.2, "grad_norm": 1.0028727195091176, "learning_rate": 1.852564504947429e-05, "loss": 0.6158, "step": 2459 }, { "epoch": 0.2, "grad_norm": 1.1317843702334225, "learning_rate": 1.852426896602445e-05, "loss": 0.632, "step": 2460 }, { "epoch": 0.2, "grad_norm": 1.0718645139741845, "learning_rate": 1.8522892291850335e-05, "loss": 0.6324, "step": 2461 }, { "epoch": 0.2, "grad_norm": 1.1507465013420966, "learning_rate": 1.8521515027047344e-05, "loss": 0.5917, "step": 2462 }, { "epoch": 0.2, "grad_norm": 1.0390501368556815, "learning_rate": 1.8520137171710923e-05, "loss": 0.623, "step": 2463 }, { "epoch": 0.2, "grad_norm": 1.0199145522160606, "learning_rate": 1.851875872593655e-05, "loss": 0.6655, "step": 2464 }, { "epoch": 0.2, "grad_norm": 0.9765738987075874, "learning_rate": 1.8517379689819752e-05, "loss": 0.5973, "step": 2465 }, { "epoch": 0.2, "grad_norm": 1.0897129702395667, "learning_rate": 1.85160000634561e-05, "loss": 0.5654, "step": 2466 }, { "epoch": 0.2, "grad_norm": 0.9737301064012777, "learning_rate": 1.8514619846941192e-05, "loss": 0.589, "step": 2467 }, { "epoch": 0.2, "grad_norm": 0.9419077307422697, "learning_rate": 1.851323904037069e-05, "loss": 0.5888, "step": 2468 }, { "epoch": 0.2, "grad_norm": 1.0853730530182204, "learning_rate": 1.8511857643840264e-05, "loss": 0.6198, "step": 2469 }, { "epoch": 0.2, "grad_norm": 0.992302096237278, "learning_rate": 1.8510475657445656e-05, "loss": 0.5962, "step": 2470 }, { "epoch": 0.2, "grad_norm": 1.0140166287761607, "learning_rate": 1.8509093081282636e-05, "loss": 0.6112, "step": 2471 }, { "epoch": 0.2, "grad_norm": 0.8910914912445789, "learning_rate": 1.8507709915447013e-05, "loss": 0.5705, "step": 2472 }, { "epoch": 0.2, "grad_norm": 0.9660251973192866, "learning_rate": 1.8506326160034638e-05, "loss": 0.556, "step": 2473 }, { "epoch": 0.2, "grad_norm": 0.9516083077316758, "learning_rate": 1.8504941815141406e-05, "loss": 0.622, "step": 2474 }, { "epoch": 0.2, "grad_norm": 1.0276900810420124, "learning_rate": 1.850355688086325e-05, "loss": 0.6428, "step": 2475 }, { "epoch": 0.2, "grad_norm": 1.0234009192262177, "learning_rate": 1.8502171357296144e-05, "loss": 0.521, "step": 2476 }, { "epoch": 0.2, "grad_norm": 0.9300947641179861, "learning_rate": 1.8500785244536104e-05, "loss": 0.5487, "step": 2477 }, { "epoch": 0.2, "grad_norm": 0.9803225475297762, "learning_rate": 1.849939854267919e-05, "loss": 0.5837, "step": 2478 }, { "epoch": 0.2, "grad_norm": 0.9718620665209154, "learning_rate": 1.849801125182149e-05, "loss": 0.5954, "step": 2479 }, { "epoch": 0.2, "grad_norm": 0.8568227724968096, "learning_rate": 1.8496623372059152e-05, "loss": 0.5528, "step": 2480 }, { "epoch": 0.2, "grad_norm": 0.9975366955008645, "learning_rate": 1.849523490348835e-05, "loss": 0.6434, "step": 2481 }, { "epoch": 0.2, "grad_norm": 0.9704820831780121, "learning_rate": 1.8493845846205303e-05, "loss": 0.6076, "step": 2482 }, { "epoch": 0.2, "grad_norm": 1.0470017044262758, "learning_rate": 1.8492456200306276e-05, "loss": 0.6127, "step": 2483 }, { "epoch": 0.2, "grad_norm": 1.0002178090901197, "learning_rate": 1.8491065965887568e-05, "loss": 0.5508, "step": 2484 }, { "epoch": 0.2, "grad_norm": 0.9119469336380572, "learning_rate": 1.8489675143045516e-05, "loss": 0.5624, "step": 2485 }, { "epoch": 0.2, "grad_norm": 1.0505339179736148, "learning_rate": 1.8488283731876508e-05, "loss": 0.6058, "step": 2486 }, { "epoch": 0.2, "grad_norm": 0.8875674907841911, "learning_rate": 1.848689173247697e-05, "loss": 0.5268, "step": 2487 }, { "epoch": 0.2, "grad_norm": 0.9865949243859709, "learning_rate": 1.8485499144943358e-05, "loss": 0.5906, "step": 2488 }, { "epoch": 0.2, "grad_norm": 0.9625178791840348, "learning_rate": 1.8484105969372184e-05, "loss": 0.5549, "step": 2489 }, { "epoch": 0.2, "grad_norm": 0.8948390139961174, "learning_rate": 1.8482712205859992e-05, "loss": 0.6188, "step": 2490 }, { "epoch": 0.2, "grad_norm": 1.047076142564619, "learning_rate": 1.848131785450337e-05, "loss": 0.6543, "step": 2491 }, { "epoch": 0.2, "grad_norm": 0.9444702882438062, "learning_rate": 1.8479922915398937e-05, "loss": 0.579, "step": 2492 }, { "epoch": 0.2, "grad_norm": 1.0852998525665014, "learning_rate": 1.8478527388643375e-05, "loss": 0.6392, "step": 2493 }, { "epoch": 0.2, "grad_norm": 0.951577608183831, "learning_rate": 1.8477131274333383e-05, "loss": 0.5678, "step": 2494 }, { "epoch": 0.2, "grad_norm": 0.9536234809843884, "learning_rate": 1.847573457256571e-05, "loss": 0.5663, "step": 2495 }, { "epoch": 0.2, "grad_norm": 1.0085519410123744, "learning_rate": 1.8474337283437155e-05, "loss": 0.6148, "step": 2496 }, { "epoch": 0.2, "grad_norm": 0.9733488355019365, "learning_rate": 1.8472939407044536e-05, "loss": 0.6036, "step": 2497 }, { "epoch": 0.2, "grad_norm": 0.9837751916587119, "learning_rate": 1.847154094348474e-05, "loss": 0.5707, "step": 2498 }, { "epoch": 0.2, "grad_norm": 1.0040076270633222, "learning_rate": 1.847014189285466e-05, "loss": 0.5996, "step": 2499 }, { "epoch": 0.2, "grad_norm": 0.8408621469655635, "learning_rate": 1.8468742255251268e-05, "loss": 0.5432, "step": 2500 }, { "epoch": 0.2, "grad_norm": 1.0111470886873506, "learning_rate": 1.846734203077155e-05, "loss": 0.6538, "step": 2501 }, { "epoch": 0.2, "grad_norm": 0.965137606513294, "learning_rate": 1.8465941219512533e-05, "loss": 0.581, "step": 2502 }, { "epoch": 0.2, "grad_norm": 0.8809664551569435, "learning_rate": 1.8464539821571302e-05, "loss": 0.5743, "step": 2503 }, { "epoch": 0.2, "grad_norm": 1.0831610501744091, "learning_rate": 1.8463137837044973e-05, "loss": 0.5693, "step": 2504 }, { "epoch": 0.2, "grad_norm": 0.9377551417262783, "learning_rate": 1.8461735266030696e-05, "loss": 0.5784, "step": 2505 }, { "epoch": 0.2, "grad_norm": 1.0527879433579985, "learning_rate": 1.8460332108625668e-05, "loss": 0.629, "step": 2506 }, { "epoch": 0.2, "grad_norm": 0.992612826274164, "learning_rate": 1.8458928364927137e-05, "loss": 0.5982, "step": 2507 }, { "epoch": 0.2, "grad_norm": 0.9554499534353595, "learning_rate": 1.8457524035032364e-05, "loss": 0.5667, "step": 2508 }, { "epoch": 0.2, "grad_norm": 1.0322061914645393, "learning_rate": 1.8456119119038683e-05, "loss": 0.6673, "step": 2509 }, { "epoch": 0.2, "grad_norm": 1.012298631254966, "learning_rate": 1.8454713617043448e-05, "loss": 0.6267, "step": 2510 }, { "epoch": 0.2, "grad_norm": 0.9751534686903088, "learning_rate": 1.8453307529144055e-05, "loss": 0.641, "step": 2511 }, { "epoch": 0.2, "grad_norm": 1.0246962915110827, "learning_rate": 1.845190085543795e-05, "loss": 0.6305, "step": 2512 }, { "epoch": 0.2, "grad_norm": 0.9089309094696517, "learning_rate": 1.845049359602261e-05, "loss": 0.5929, "step": 2513 }, { "epoch": 0.2, "grad_norm": 0.8998356265099821, "learning_rate": 1.8449085750995564e-05, "loss": 0.5446, "step": 2514 }, { "epoch": 0.2, "grad_norm": 1.008345723946353, "learning_rate": 1.8447677320454367e-05, "loss": 0.596, "step": 2515 }, { "epoch": 0.2, "grad_norm": 0.9908774036336532, "learning_rate": 1.8446268304496624e-05, "loss": 0.6383, "step": 2516 }, { "epoch": 0.2, "grad_norm": 1.0072737095100066, "learning_rate": 1.8444858703219982e-05, "loss": 0.6442, "step": 2517 }, { "epoch": 0.2, "grad_norm": 0.8652406980423714, "learning_rate": 1.844344851672212e-05, "loss": 0.5321, "step": 2518 }, { "epoch": 0.2, "grad_norm": 0.9981622954590514, "learning_rate": 1.844203774510077e-05, "loss": 0.6602, "step": 2519 }, { "epoch": 0.2, "grad_norm": 0.8713331836320304, "learning_rate": 1.8440626388453686e-05, "loss": 0.5823, "step": 2520 }, { "epoch": 0.2, "grad_norm": 0.9236692999869184, "learning_rate": 1.8439214446878685e-05, "loss": 0.7023, "step": 2521 }, { "epoch": 0.2, "grad_norm": 0.9117951641284442, "learning_rate": 1.8437801920473605e-05, "loss": 0.5547, "step": 2522 }, { "epoch": 0.21, "grad_norm": 0.9483077352466959, "learning_rate": 1.8436388809336338e-05, "loss": 0.561, "step": 2523 }, { "epoch": 0.21, "grad_norm": 0.9665947626085446, "learning_rate": 1.8434975113564804e-05, "loss": 0.6055, "step": 2524 }, { "epoch": 0.21, "grad_norm": 0.9385491506378937, "learning_rate": 1.8433560833256986e-05, "loss": 0.6063, "step": 2525 }, { "epoch": 0.21, "grad_norm": 0.9527023510611712, "learning_rate": 1.8432145968510878e-05, "loss": 0.5277, "step": 2526 }, { "epoch": 0.21, "grad_norm": 0.9662826327001135, "learning_rate": 1.8430730519424532e-05, "loss": 0.5898, "step": 2527 }, { "epoch": 0.21, "grad_norm": 1.034736667191442, "learning_rate": 1.8429314486096042e-05, "loss": 0.5866, "step": 2528 }, { "epoch": 0.21, "grad_norm": 1.0689261718103882, "learning_rate": 1.8427897868623535e-05, "loss": 0.6371, "step": 2529 }, { "epoch": 0.21, "grad_norm": 0.8653291692283432, "learning_rate": 1.8426480667105178e-05, "loss": 0.568, "step": 2530 }, { "epoch": 0.21, "grad_norm": 0.9098599831792978, "learning_rate": 1.842506288163919e-05, "loss": 0.5838, "step": 2531 }, { "epoch": 0.21, "grad_norm": 0.9191263895725162, "learning_rate": 1.8423644512323814e-05, "loss": 0.6342, "step": 2532 }, { "epoch": 0.21, "grad_norm": 0.9569768590266543, "learning_rate": 1.8422225559257345e-05, "loss": 0.6281, "step": 2533 }, { "epoch": 0.21, "grad_norm": 0.8285310527806168, "learning_rate": 1.8420806022538115e-05, "loss": 0.5172, "step": 2534 }, { "epoch": 0.21, "grad_norm": 0.8930960260286901, "learning_rate": 1.8419385902264497e-05, "loss": 0.6172, "step": 2535 }, { "epoch": 0.21, "grad_norm": 0.9269553677227064, "learning_rate": 1.8417965198534907e-05, "loss": 0.6097, "step": 2536 }, { "epoch": 0.21, "grad_norm": 1.0740954878523123, "learning_rate": 1.841654391144779e-05, "loss": 0.6397, "step": 2537 }, { "epoch": 0.21, "grad_norm": 0.9446598393823056, "learning_rate": 1.841512204110165e-05, "loss": 0.5599, "step": 2538 }, { "epoch": 0.21, "grad_norm": 0.9103665763797646, "learning_rate": 1.8413699587595016e-05, "loss": 0.5865, "step": 2539 }, { "epoch": 0.21, "grad_norm": 0.9054825790388551, "learning_rate": 1.841227655102646e-05, "loss": 0.5674, "step": 2540 }, { "epoch": 0.21, "grad_norm": 0.8263852032716875, "learning_rate": 1.8410852931494606e-05, "loss": 0.6027, "step": 2541 }, { "epoch": 0.21, "grad_norm": 1.0279982369799479, "learning_rate": 1.84094287290981e-05, "loss": 0.6128, "step": 2542 }, { "epoch": 0.21, "grad_norm": 0.8627449323576147, "learning_rate": 1.8408003943935643e-05, "loss": 0.5885, "step": 2543 }, { "epoch": 0.21, "grad_norm": 0.9312463352872257, "learning_rate": 1.8406578576105973e-05, "loss": 0.5843, "step": 2544 }, { "epoch": 0.21, "grad_norm": 0.8876688206289071, "learning_rate": 1.8405152625707863e-05, "loss": 0.5971, "step": 2545 }, { "epoch": 0.21, "grad_norm": 0.8334420024858418, "learning_rate": 1.840372609284013e-05, "loss": 0.5071, "step": 2546 }, { "epoch": 0.21, "grad_norm": 1.0547438702977643, "learning_rate": 1.8402298977601636e-05, "loss": 0.6659, "step": 2547 }, { "epoch": 0.21, "grad_norm": 0.9384283089727256, "learning_rate": 1.8400871280091274e-05, "loss": 0.62, "step": 2548 }, { "epoch": 0.21, "grad_norm": 0.8741990456683056, "learning_rate": 1.839944300040798e-05, "loss": 0.5506, "step": 2549 }, { "epoch": 0.21, "grad_norm": 0.9156548395431, "learning_rate": 1.8398014138650742e-05, "loss": 0.575, "step": 2550 }, { "epoch": 0.21, "grad_norm": 0.9491831775479801, "learning_rate": 1.839658469491857e-05, "loss": 0.5716, "step": 2551 }, { "epoch": 0.21, "grad_norm": 0.970076589441962, "learning_rate": 1.839515466931053e-05, "loss": 0.6099, "step": 2552 }, { "epoch": 0.21, "grad_norm": 1.004362696843191, "learning_rate": 1.8393724061925714e-05, "loss": 0.6277, "step": 2553 }, { "epoch": 0.21, "grad_norm": 0.9440776440870434, "learning_rate": 1.839229287286327e-05, "loss": 0.5533, "step": 2554 }, { "epoch": 0.21, "grad_norm": 0.9078265991640199, "learning_rate": 1.839086110222237e-05, "loss": 0.5973, "step": 2555 }, { "epoch": 0.21, "grad_norm": 0.9495101497226277, "learning_rate": 1.8389428750102238e-05, "loss": 0.5827, "step": 2556 }, { "epoch": 0.21, "grad_norm": 0.9223923326893704, "learning_rate": 1.8387995816602137e-05, "loss": 0.5098, "step": 2557 }, { "epoch": 0.21, "grad_norm": 0.8775065736224732, "learning_rate": 1.8386562301821363e-05, "loss": 0.5311, "step": 2558 }, { "epoch": 0.21, "grad_norm": 0.9730830661108568, "learning_rate": 1.8385128205859267e-05, "loss": 0.5869, "step": 2559 }, { "epoch": 0.21, "grad_norm": 0.9741033272488008, "learning_rate": 1.8383693528815218e-05, "loss": 0.5783, "step": 2560 }, { "epoch": 0.21, "grad_norm": 0.9586616755962616, "learning_rate": 1.8382258270788648e-05, "loss": 0.5866, "step": 2561 }, { "epoch": 0.21, "grad_norm": 0.9620923146243326, "learning_rate": 1.8380822431879012e-05, "loss": 0.5727, "step": 2562 }, { "epoch": 0.21, "grad_norm": 0.9470891414477499, "learning_rate": 1.8379386012185813e-05, "loss": 0.5911, "step": 2563 }, { "epoch": 0.21, "grad_norm": 1.1488547373729767, "learning_rate": 1.83779490118086e-05, "loss": 0.626, "step": 2564 }, { "epoch": 0.21, "grad_norm": 1.0020985253821808, "learning_rate": 1.837651143084695e-05, "loss": 0.5889, "step": 2565 }, { "epoch": 0.21, "grad_norm": 0.9519069969151185, "learning_rate": 1.8375073269400488e-05, "loss": 0.6057, "step": 2566 }, { "epoch": 0.21, "grad_norm": 1.040029345268677, "learning_rate": 1.8373634527568877e-05, "loss": 0.4678, "step": 2567 }, { "epoch": 0.21, "grad_norm": 1.0185322739046456, "learning_rate": 1.8372195205451822e-05, "loss": 0.6452, "step": 2568 }, { "epoch": 0.21, "grad_norm": 0.9846593231833829, "learning_rate": 1.8370755303149064e-05, "loss": 0.5689, "step": 2569 }, { "epoch": 0.21, "grad_norm": 1.0532067696810197, "learning_rate": 1.8369314820760386e-05, "loss": 0.6109, "step": 2570 }, { "epoch": 0.21, "grad_norm": 1.062830205061241, "learning_rate": 1.836787375838562e-05, "loss": 0.6192, "step": 2571 }, { "epoch": 0.21, "grad_norm": 0.8932031597560607, "learning_rate": 1.836643211612462e-05, "loss": 0.5726, "step": 2572 }, { "epoch": 0.21, "grad_norm": 0.938840490483992, "learning_rate": 1.8364989894077297e-05, "loss": 0.6245, "step": 2573 }, { "epoch": 0.21, "grad_norm": 0.9301251286808755, "learning_rate": 1.8363547092343593e-05, "loss": 0.5844, "step": 2574 }, { "epoch": 0.21, "grad_norm": 0.9830204831340076, "learning_rate": 1.8362103711023498e-05, "loss": 0.5838, "step": 2575 }, { "epoch": 0.21, "grad_norm": 0.8233563546163213, "learning_rate": 1.836065975021703e-05, "loss": 0.4613, "step": 2576 }, { "epoch": 0.21, "grad_norm": 0.8804645354736946, "learning_rate": 1.835921521002426e-05, "loss": 0.6636, "step": 2577 }, { "epoch": 0.21, "grad_norm": 0.9507404367699335, "learning_rate": 1.8357770090545285e-05, "loss": 0.5366, "step": 2578 }, { "epoch": 0.21, "grad_norm": 3.932122035312336, "learning_rate": 1.835632439188026e-05, "loss": 0.6617, "step": 2579 }, { "epoch": 0.21, "grad_norm": 0.9172867112989296, "learning_rate": 1.8354878114129368e-05, "loss": 0.5361, "step": 2580 }, { "epoch": 0.21, "grad_norm": 0.9350228745493447, "learning_rate": 1.835343125739283e-05, "loss": 0.5781, "step": 2581 }, { "epoch": 0.21, "grad_norm": 1.0337044836734066, "learning_rate": 1.8351983821770915e-05, "loss": 0.5823, "step": 2582 }, { "epoch": 0.21, "grad_norm": 0.9051354658937525, "learning_rate": 1.835053580736393e-05, "loss": 0.5522, "step": 2583 }, { "epoch": 0.21, "grad_norm": 1.081713649725828, "learning_rate": 1.8349087214272222e-05, "loss": 0.6442, "step": 2584 }, { "epoch": 0.21, "grad_norm": 0.986286911076109, "learning_rate": 1.8347638042596177e-05, "loss": 0.5924, "step": 2585 }, { "epoch": 0.21, "grad_norm": 0.9145313347195219, "learning_rate": 1.834618829243622e-05, "loss": 0.5414, "step": 2586 }, { "epoch": 0.21, "grad_norm": 1.033848476051229, "learning_rate": 1.8344737963892813e-05, "loss": 0.6362, "step": 2587 }, { "epoch": 0.21, "grad_norm": 0.9634793666586047, "learning_rate": 1.834328705706647e-05, "loss": 0.5892, "step": 2588 }, { "epoch": 0.21, "grad_norm": 0.8135673361799275, "learning_rate": 1.8341835572057735e-05, "loss": 0.5019, "step": 2589 }, { "epoch": 0.21, "grad_norm": 0.9544914367776864, "learning_rate": 1.834038350896719e-05, "loss": 0.5692, "step": 2590 }, { "epoch": 0.21, "grad_norm": 0.9695356063078714, "learning_rate": 1.833893086789547e-05, "loss": 0.6074, "step": 2591 }, { "epoch": 0.21, "grad_norm": 0.8833866555407887, "learning_rate": 1.8337477648943236e-05, "loss": 0.5801, "step": 2592 }, { "epoch": 0.21, "grad_norm": 1.0225031754148959, "learning_rate": 1.8336023852211197e-05, "loss": 0.5934, "step": 2593 }, { "epoch": 0.21, "grad_norm": 0.9512387406439328, "learning_rate": 1.83345694778001e-05, "loss": 0.6627, "step": 2594 }, { "epoch": 0.21, "grad_norm": 0.9865220706641923, "learning_rate": 1.8333114525810726e-05, "loss": 0.5865, "step": 2595 }, { "epoch": 0.21, "grad_norm": 1.0745395462324694, "learning_rate": 1.833165899634391e-05, "loss": 0.6437, "step": 2596 }, { "epoch": 0.21, "grad_norm": 0.9406556691737249, "learning_rate": 1.8330202889500518e-05, "loss": 0.6085, "step": 2597 }, { "epoch": 0.21, "grad_norm": 0.9666420944891609, "learning_rate": 1.8328746205381453e-05, "loss": 0.5894, "step": 2598 }, { "epoch": 0.21, "grad_norm": 0.9022430872716137, "learning_rate": 1.8327288944087663e-05, "loss": 0.5847, "step": 2599 }, { "epoch": 0.21, "grad_norm": 0.9809080782353568, "learning_rate": 1.8325831105720135e-05, "loss": 0.6222, "step": 2600 }, { "epoch": 0.21, "grad_norm": 0.9673317294393174, "learning_rate": 1.8324372690379896e-05, "loss": 0.5582, "step": 2601 }, { "epoch": 0.21, "grad_norm": 0.9761900184362405, "learning_rate": 1.8322913698168014e-05, "loss": 0.6216, "step": 2602 }, { "epoch": 0.21, "grad_norm": 0.9661851831216987, "learning_rate": 1.8321454129185597e-05, "loss": 0.5772, "step": 2603 }, { "epoch": 0.21, "grad_norm": 0.9906529066084958, "learning_rate": 1.831999398353379e-05, "loss": 0.5657, "step": 2604 }, { "epoch": 0.21, "grad_norm": 1.0410741717992296, "learning_rate": 1.831853326131378e-05, "loss": 0.5796, "step": 2605 }, { "epoch": 0.21, "grad_norm": 1.105172965112674, "learning_rate": 1.831707196262679e-05, "loss": 0.5994, "step": 2606 }, { "epoch": 0.21, "grad_norm": 1.0392581992735035, "learning_rate": 1.8315610087574088e-05, "loss": 0.6008, "step": 2607 }, { "epoch": 0.21, "grad_norm": 1.053043125787598, "learning_rate": 1.831414763625699e-05, "loss": 0.6217, "step": 2608 }, { "epoch": 0.21, "grad_norm": 1.085606036923765, "learning_rate": 1.831268460877683e-05, "loss": 0.5928, "step": 2609 }, { "epoch": 0.21, "grad_norm": 0.9618254873284892, "learning_rate": 1.8311221005235e-05, "loss": 0.566, "step": 2610 }, { "epoch": 0.21, "grad_norm": 0.9879925417829869, "learning_rate": 1.830975682573293e-05, "loss": 0.6077, "step": 2611 }, { "epoch": 0.21, "grad_norm": 0.9013566267896668, "learning_rate": 1.8308292070372084e-05, "loss": 0.6134, "step": 2612 }, { "epoch": 0.21, "grad_norm": 0.9471939550715492, "learning_rate": 1.8306826739253965e-05, "loss": 0.554, "step": 2613 }, { "epoch": 0.21, "grad_norm": 1.0117778301036309, "learning_rate": 1.8305360832480118e-05, "loss": 0.601, "step": 2614 }, { "epoch": 0.21, "grad_norm": 0.8994234802143227, "learning_rate": 1.8303894350152138e-05, "loss": 0.4759, "step": 2615 }, { "epoch": 0.21, "grad_norm": 1.009902132319506, "learning_rate": 1.830242729237164e-05, "loss": 0.6027, "step": 2616 }, { "epoch": 0.21, "grad_norm": 0.9314631626290966, "learning_rate": 1.8300959659240292e-05, "loss": 0.5654, "step": 2617 }, { "epoch": 0.21, "grad_norm": 0.9663152894675163, "learning_rate": 1.829949145085981e-05, "loss": 0.6001, "step": 2618 }, { "epoch": 0.21, "grad_norm": 1.0527759807116104, "learning_rate": 1.829802266733193e-05, "loss": 0.554, "step": 2619 }, { "epoch": 0.21, "grad_norm": 0.9441814027278527, "learning_rate": 1.829655330875844e-05, "loss": 0.5764, "step": 2620 }, { "epoch": 0.21, "grad_norm": 0.9669943416679583, "learning_rate": 1.829508337524116e-05, "loss": 0.5185, "step": 2621 }, { "epoch": 0.21, "grad_norm": 0.9532324404339418, "learning_rate": 1.8293612866881965e-05, "loss": 0.6193, "step": 2622 }, { "epoch": 0.21, "grad_norm": 0.9122909261664486, "learning_rate": 1.8292141783782754e-05, "loss": 0.5344, "step": 2623 }, { "epoch": 0.21, "grad_norm": 0.9184525412283985, "learning_rate": 1.829067012604547e-05, "loss": 0.6079, "step": 2624 }, { "epoch": 0.21, "grad_norm": 1.0400039581612621, "learning_rate": 1.8289197893772103e-05, "loss": 0.6262, "step": 2625 }, { "epoch": 0.21, "grad_norm": 1.0703975227084082, "learning_rate": 1.8287725087064673e-05, "loss": 0.5594, "step": 2626 }, { "epoch": 0.21, "grad_norm": 0.9508531842550776, "learning_rate": 1.8286251706025245e-05, "loss": 0.5872, "step": 2627 }, { "epoch": 0.21, "grad_norm": 1.0260098313678139, "learning_rate": 1.828477775075592e-05, "loss": 0.5755, "step": 2628 }, { "epoch": 0.21, "grad_norm": 0.9910777135295307, "learning_rate": 1.8283303221358854e-05, "loss": 0.6457, "step": 2629 }, { "epoch": 0.21, "grad_norm": 0.9249482186538088, "learning_rate": 1.8281828117936217e-05, "loss": 0.5589, "step": 2630 }, { "epoch": 0.21, "grad_norm": 0.9602553560831182, "learning_rate": 1.8280352440590236e-05, "loss": 0.6608, "step": 2631 }, { "epoch": 0.21, "grad_norm": 1.0260206878932465, "learning_rate": 1.827887618942318e-05, "loss": 0.5475, "step": 2632 }, { "epoch": 0.21, "grad_norm": 1.0142523857260723, "learning_rate": 1.8277399364537345e-05, "loss": 0.6027, "step": 2633 }, { "epoch": 0.21, "grad_norm": 0.9068347442951924, "learning_rate": 1.8275921966035076e-05, "loss": 0.5952, "step": 2634 }, { "epoch": 0.21, "grad_norm": 0.9662920231000809, "learning_rate": 1.8274443994018754e-05, "loss": 0.608, "step": 2635 }, { "epoch": 0.21, "grad_norm": 0.9831243383495013, "learning_rate": 1.8272965448590807e-05, "loss": 0.5982, "step": 2636 }, { "epoch": 0.21, "grad_norm": 1.0072628424109062, "learning_rate": 1.827148632985369e-05, "loss": 0.5552, "step": 2637 }, { "epoch": 0.21, "grad_norm": 0.9561275626706207, "learning_rate": 1.8270006637909907e-05, "loss": 0.5363, "step": 2638 }, { "epoch": 0.21, "grad_norm": 0.9382299009178503, "learning_rate": 1.8268526372862e-05, "loss": 0.61, "step": 2639 }, { "epoch": 0.21, "grad_norm": 1.036330966633798, "learning_rate": 1.8267045534812547e-05, "loss": 0.5729, "step": 2640 }, { "epoch": 0.21, "grad_norm": 1.0591982746515018, "learning_rate": 1.8265564123864174e-05, "loss": 0.5942, "step": 2641 }, { "epoch": 0.21, "grad_norm": 1.0280607493711271, "learning_rate": 1.826408214011954e-05, "loss": 0.6189, "step": 2642 }, { "epoch": 0.21, "grad_norm": 0.9694580927695198, "learning_rate": 1.826259958368134e-05, "loss": 0.5646, "step": 2643 }, { "epoch": 0.21, "grad_norm": 0.9148632149342607, "learning_rate": 1.826111645465232e-05, "loss": 0.5231, "step": 2644 }, { "epoch": 0.21, "grad_norm": 1.0528796357090346, "learning_rate": 1.8259632753135257e-05, "loss": 0.6494, "step": 2645 }, { "epoch": 0.22, "grad_norm": 0.8927493697457675, "learning_rate": 1.825814847923297e-05, "loss": 0.605, "step": 2646 }, { "epoch": 0.22, "grad_norm": 0.9746220924690185, "learning_rate": 1.825666363304832e-05, "loss": 0.5596, "step": 2647 }, { "epoch": 0.22, "grad_norm": 1.104502002989997, "learning_rate": 1.82551782146842e-05, "loss": 0.5217, "step": 2648 }, { "epoch": 0.22, "grad_norm": 0.9522011695494644, "learning_rate": 1.825369222424356e-05, "loss": 0.5457, "step": 2649 }, { "epoch": 0.22, "grad_norm": 0.9974276810353772, "learning_rate": 1.8252205661829364e-05, "loss": 0.6035, "step": 2650 }, { "epoch": 0.22, "grad_norm": 0.8536988050512456, "learning_rate": 1.8250718527544636e-05, "loss": 0.561, "step": 2651 }, { "epoch": 0.22, "grad_norm": 1.014452009587213, "learning_rate": 1.824923082149243e-05, "loss": 0.5939, "step": 2652 }, { "epoch": 0.22, "grad_norm": 1.0286657701497812, "learning_rate": 1.824774254377585e-05, "loss": 0.6005, "step": 2653 }, { "epoch": 0.22, "grad_norm": 0.8738102658423137, "learning_rate": 1.8246253694498024e-05, "loss": 0.5788, "step": 2654 }, { "epoch": 0.22, "grad_norm": 0.9880182661920288, "learning_rate": 1.8244764273762133e-05, "loss": 0.632, "step": 2655 }, { "epoch": 0.22, "grad_norm": 0.8950089128352056, "learning_rate": 1.8243274281671392e-05, "loss": 0.5739, "step": 2656 }, { "epoch": 0.22, "grad_norm": 0.904422421895005, "learning_rate": 1.824178371832905e-05, "loss": 0.603, "step": 2657 }, { "epoch": 0.22, "grad_norm": 0.9873253816907256, "learning_rate": 1.824029258383841e-05, "loss": 0.5945, "step": 2658 }, { "epoch": 0.22, "grad_norm": 1.1132475701850797, "learning_rate": 1.8238800878302804e-05, "loss": 0.6235, "step": 2659 }, { "epoch": 0.22, "grad_norm": 1.1216405221969532, "learning_rate": 1.8237308601825604e-05, "loss": 0.5969, "step": 2660 }, { "epoch": 0.22, "grad_norm": 1.0266161431706164, "learning_rate": 1.8235815754510227e-05, "loss": 0.519, "step": 2661 }, { "epoch": 0.22, "grad_norm": 0.9705185677800771, "learning_rate": 1.823432233646012e-05, "loss": 0.5804, "step": 2662 }, { "epoch": 0.22, "grad_norm": 0.9579417158184198, "learning_rate": 1.8232828347778778e-05, "loss": 0.675, "step": 2663 }, { "epoch": 0.22, "grad_norm": 0.999957902123899, "learning_rate": 1.8231333788569737e-05, "loss": 0.5755, "step": 2664 }, { "epoch": 0.22, "grad_norm": 0.996708421437918, "learning_rate": 1.8229838658936566e-05, "loss": 0.6197, "step": 2665 }, { "epoch": 0.22, "grad_norm": 0.8644195494317504, "learning_rate": 1.8228342958982874e-05, "loss": 0.5294, "step": 2666 }, { "epoch": 0.22, "grad_norm": 0.9366598979644085, "learning_rate": 1.8226846688812314e-05, "loss": 0.6038, "step": 2667 }, { "epoch": 0.22, "grad_norm": 0.8966307476883493, "learning_rate": 1.8225349848528574e-05, "loss": 0.5667, "step": 2668 }, { "epoch": 0.22, "grad_norm": 1.0839760777914489, "learning_rate": 1.822385243823539e-05, "loss": 0.6408, "step": 2669 }, { "epoch": 0.22, "grad_norm": 0.8627457075836641, "learning_rate": 1.8222354458036523e-05, "loss": 0.5858, "step": 2670 }, { "epoch": 0.22, "grad_norm": 0.9506321931490626, "learning_rate": 1.8220855908035783e-05, "loss": 0.5744, "step": 2671 }, { "epoch": 0.22, "grad_norm": 0.8980323219691284, "learning_rate": 1.8219356788337027e-05, "loss": 0.588, "step": 2672 }, { "epoch": 0.22, "grad_norm": 0.9614101469490036, "learning_rate": 1.8217857099044128e-05, "loss": 0.5708, "step": 2673 }, { "epoch": 0.22, "grad_norm": 0.9732045139725454, "learning_rate": 1.8216356840261028e-05, "loss": 0.6515, "step": 2674 }, { "epoch": 0.22, "grad_norm": 0.9802795442791357, "learning_rate": 1.8214856012091684e-05, "loss": 0.634, "step": 2675 }, { "epoch": 0.22, "grad_norm": 0.9608737963408859, "learning_rate": 1.8213354614640105e-05, "loss": 0.6326, "step": 2676 }, { "epoch": 0.22, "grad_norm": 0.9140455271956307, "learning_rate": 1.8211852648010338e-05, "loss": 0.6295, "step": 2677 }, { "epoch": 0.22, "grad_norm": 0.9696423375138519, "learning_rate": 1.8210350112306466e-05, "loss": 0.5804, "step": 2678 }, { "epoch": 0.22, "grad_norm": 0.8442936328053501, "learning_rate": 1.8208847007632613e-05, "loss": 0.5968, "step": 2679 }, { "epoch": 0.22, "grad_norm": 0.9691958315893788, "learning_rate": 1.8207343334092944e-05, "loss": 0.6059, "step": 2680 }, { "epoch": 0.22, "grad_norm": 0.882560444021095, "learning_rate": 1.820583909179166e-05, "loss": 0.5787, "step": 2681 }, { "epoch": 0.22, "grad_norm": 1.036455415023574, "learning_rate": 1.8204334280833005e-05, "loss": 0.5966, "step": 2682 }, { "epoch": 0.22, "grad_norm": 1.0057190615511393, "learning_rate": 1.8202828901321265e-05, "loss": 0.6027, "step": 2683 }, { "epoch": 0.22, "grad_norm": 1.020603493240722, "learning_rate": 1.8201322953360758e-05, "loss": 0.5888, "step": 2684 }, { "epoch": 0.22, "grad_norm": 0.9371289052205584, "learning_rate": 1.8199816437055843e-05, "loss": 0.5487, "step": 2685 }, { "epoch": 0.22, "grad_norm": 1.0491755776905296, "learning_rate": 1.8198309352510924e-05, "loss": 0.6072, "step": 2686 }, { "epoch": 0.22, "grad_norm": 0.8660714216318303, "learning_rate": 1.8196801699830437e-05, "loss": 0.6076, "step": 2687 }, { "epoch": 0.22, "grad_norm": 0.9862596290015903, "learning_rate": 1.8195293479118863e-05, "loss": 0.6277, "step": 2688 }, { "epoch": 0.22, "grad_norm": 1.0437377771063885, "learning_rate": 1.819378469048072e-05, "loss": 0.5915, "step": 2689 }, { "epoch": 0.22, "grad_norm": 0.9439824865798379, "learning_rate": 1.8192275334020565e-05, "loss": 0.5828, "step": 2690 }, { "epoch": 0.22, "grad_norm": 1.0225402759808864, "learning_rate": 1.8190765409842997e-05, "loss": 0.6008, "step": 2691 }, { "epoch": 0.22, "grad_norm": 0.9958776066891274, "learning_rate": 1.818925491805265e-05, "loss": 0.5392, "step": 2692 }, { "epoch": 0.22, "grad_norm": 0.9329456271958139, "learning_rate": 1.8187743858754206e-05, "loss": 0.6347, "step": 2693 }, { "epoch": 0.22, "grad_norm": 0.9906642944077385, "learning_rate": 1.818623223205237e-05, "loss": 0.6293, "step": 2694 }, { "epoch": 0.22, "grad_norm": 0.940956884211602, "learning_rate": 1.8184720038051905e-05, "loss": 0.5864, "step": 2695 }, { "epoch": 0.22, "grad_norm": 0.9430103458543017, "learning_rate": 1.8183207276857596e-05, "loss": 0.5725, "step": 2696 }, { "epoch": 0.22, "grad_norm": 0.9750056603913353, "learning_rate": 1.8181693948574285e-05, "loss": 0.6003, "step": 2697 }, { "epoch": 0.22, "grad_norm": 0.936158816403639, "learning_rate": 1.818018005330684e-05, "loss": 0.5746, "step": 2698 }, { "epoch": 0.22, "grad_norm": 1.0049390990235507, "learning_rate": 1.817866559116017e-05, "loss": 0.6166, "step": 2699 }, { "epoch": 0.22, "grad_norm": 0.9979185376293706, "learning_rate": 1.8177150562239236e-05, "loss": 0.5793, "step": 2700 }, { "epoch": 0.22, "grad_norm": 1.0203902925197001, "learning_rate": 1.8175634966649015e-05, "loss": 0.5755, "step": 2701 }, { "epoch": 0.22, "grad_norm": 0.9142012279899165, "learning_rate": 1.8174118804494548e-05, "loss": 0.5524, "step": 2702 }, { "epoch": 0.22, "grad_norm": 1.0338022613929347, "learning_rate": 1.8172602075880893e-05, "loss": 0.6003, "step": 2703 }, { "epoch": 0.22, "grad_norm": 1.0650332972018386, "learning_rate": 1.8171084780913165e-05, "loss": 0.5795, "step": 2704 }, { "epoch": 0.22, "grad_norm": 0.9413835902741449, "learning_rate": 1.8169566919696512e-05, "loss": 0.6581, "step": 2705 }, { "epoch": 0.22, "grad_norm": 0.9301794167686281, "learning_rate": 1.8168048492336116e-05, "loss": 0.4732, "step": 2706 }, { "epoch": 0.22, "grad_norm": 0.9971512309322477, "learning_rate": 1.81665294989372e-05, "loss": 0.5338, "step": 2707 }, { "epoch": 0.22, "grad_norm": 0.9626054977861396, "learning_rate": 1.8165009939605037e-05, "loss": 0.575, "step": 2708 }, { "epoch": 0.22, "grad_norm": 0.9686254044597287, "learning_rate": 1.816348981444493e-05, "loss": 0.6684, "step": 2709 }, { "epoch": 0.22, "grad_norm": 0.817821965462608, "learning_rate": 1.816196912356222e-05, "loss": 0.5142, "step": 2710 }, { "epoch": 0.22, "grad_norm": 0.9906203998916217, "learning_rate": 1.8160447867062286e-05, "loss": 0.6254, "step": 2711 }, { "epoch": 0.22, "grad_norm": 1.0145536544226452, "learning_rate": 1.8158926045050553e-05, "loss": 0.6246, "step": 2712 }, { "epoch": 0.22, "grad_norm": 0.8877817517453693, "learning_rate": 1.8157403657632485e-05, "loss": 0.5587, "step": 2713 }, { "epoch": 0.22, "grad_norm": 0.9169924356992728, "learning_rate": 1.8155880704913577e-05, "loss": 0.5368, "step": 2714 }, { "epoch": 0.22, "grad_norm": 0.7941288099049386, "learning_rate": 1.8154357186999368e-05, "loss": 0.5503, "step": 2715 }, { "epoch": 0.22, "grad_norm": 0.9409239063898954, "learning_rate": 1.8152833103995443e-05, "loss": 0.6428, "step": 2716 }, { "epoch": 0.22, "grad_norm": 0.9536368923356101, "learning_rate": 1.8151308456007416e-05, "loss": 0.6105, "step": 2717 }, { "epoch": 0.22, "grad_norm": 0.9242821682058803, "learning_rate": 1.814978324314094e-05, "loss": 0.5262, "step": 2718 }, { "epoch": 0.22, "grad_norm": 0.8923447734732718, "learning_rate": 1.8148257465501718e-05, "loss": 0.5968, "step": 2719 }, { "epoch": 0.22, "grad_norm": 1.0346232613906077, "learning_rate": 1.814673112319548e-05, "loss": 0.6651, "step": 2720 }, { "epoch": 0.22, "grad_norm": 0.8967208895801116, "learning_rate": 1.8145204216327998e-05, "loss": 0.5616, "step": 2721 }, { "epoch": 0.22, "grad_norm": 0.9789527728091927, "learning_rate": 1.8143676745005093e-05, "loss": 0.5564, "step": 2722 }, { "epoch": 0.22, "grad_norm": 0.9301175229831795, "learning_rate": 1.814214870933261e-05, "loss": 0.5414, "step": 2723 }, { "epoch": 0.22, "grad_norm": 0.9279945540168463, "learning_rate": 1.8140620109416445e-05, "loss": 0.5891, "step": 2724 }, { "epoch": 0.22, "grad_norm": 0.9583822050583957, "learning_rate": 1.8139090945362525e-05, "loss": 0.6199, "step": 2725 }, { "epoch": 0.22, "grad_norm": 0.963837056971241, "learning_rate": 1.8137561217276823e-05, "loss": 0.5775, "step": 2726 }, { "epoch": 0.22, "grad_norm": 0.9193008965088793, "learning_rate": 1.8136030925265347e-05, "loss": 0.6201, "step": 2727 }, { "epoch": 0.22, "grad_norm": 0.9595001172487877, "learning_rate": 1.8134500069434144e-05, "loss": 0.5929, "step": 2728 }, { "epoch": 0.22, "grad_norm": 0.8662619104759794, "learning_rate": 1.81329686498893e-05, "loss": 0.5501, "step": 2729 }, { "epoch": 0.22, "grad_norm": 0.972240880760536, "learning_rate": 1.8131436666736945e-05, "loss": 0.5691, "step": 2730 }, { "epoch": 0.22, "grad_norm": 0.9615177720700632, "learning_rate": 1.8129904120083243e-05, "loss": 0.5681, "step": 2731 }, { "epoch": 0.22, "grad_norm": 0.9229624236657873, "learning_rate": 1.8128371010034394e-05, "loss": 0.555, "step": 2732 }, { "epoch": 0.22, "grad_norm": 0.8890895179572698, "learning_rate": 1.8126837336696645e-05, "loss": 0.5756, "step": 2733 }, { "epoch": 0.22, "grad_norm": 0.9067723189735549, "learning_rate": 1.8125303100176275e-05, "loss": 0.5928, "step": 2734 }, { "epoch": 0.22, "grad_norm": 0.9007052662584437, "learning_rate": 1.812376830057961e-05, "loss": 0.6127, "step": 2735 }, { "epoch": 0.22, "grad_norm": 0.9437405590348689, "learning_rate": 1.812223293801301e-05, "loss": 0.6169, "step": 2736 }, { "epoch": 0.22, "grad_norm": 0.9646217361783188, "learning_rate": 1.8120697012582863e-05, "loss": 0.5928, "step": 2737 }, { "epoch": 0.22, "grad_norm": 0.9682755714496285, "learning_rate": 1.8119160524395622e-05, "loss": 0.6008, "step": 2738 }, { "epoch": 0.22, "grad_norm": 0.999055226351776, "learning_rate": 1.8117623473557758e-05, "loss": 0.6105, "step": 2739 }, { "epoch": 0.22, "grad_norm": 0.9724026619999333, "learning_rate": 1.8116085860175788e-05, "loss": 0.5347, "step": 2740 }, { "epoch": 0.22, "grad_norm": 1.0081974793755035, "learning_rate": 1.8114547684356264e-05, "loss": 0.5553, "step": 2741 }, { "epoch": 0.22, "grad_norm": 1.0293986423368018, "learning_rate": 1.8113008946205787e-05, "loss": 0.6537, "step": 2742 }, { "epoch": 0.22, "grad_norm": 0.9711812270768454, "learning_rate": 1.8111469645830983e-05, "loss": 0.5677, "step": 2743 }, { "epoch": 0.22, "grad_norm": 0.8855189900145181, "learning_rate": 1.810992978333853e-05, "loss": 0.5616, "step": 2744 }, { "epoch": 0.22, "grad_norm": 0.9973233513608902, "learning_rate": 1.8108389358835135e-05, "loss": 0.5849, "step": 2745 }, { "epoch": 0.22, "grad_norm": 0.919178257973744, "learning_rate": 1.810684837242755e-05, "loss": 0.6009, "step": 2746 }, { "epoch": 0.22, "grad_norm": 1.2479235413643202, "learning_rate": 1.810530682422256e-05, "loss": 0.612, "step": 2747 }, { "epoch": 0.22, "grad_norm": 1.0580936700356294, "learning_rate": 1.8103764714327004e-05, "loss": 0.5988, "step": 2748 }, { "epoch": 0.22, "grad_norm": 1.016274277429732, "learning_rate": 1.8102222042847735e-05, "loss": 0.5768, "step": 2749 }, { "epoch": 0.22, "grad_norm": 1.0392968760654928, "learning_rate": 1.8100678809891668e-05, "loss": 0.6248, "step": 2750 }, { "epoch": 0.22, "grad_norm": 0.9281711927515615, "learning_rate": 1.8099135015565745e-05, "loss": 0.58, "step": 2751 }, { "epoch": 0.22, "grad_norm": 0.893762521635862, "learning_rate": 1.8097590659976946e-05, "loss": 0.5739, "step": 2752 }, { "epoch": 0.22, "grad_norm": 0.8673575882784395, "learning_rate": 1.8096045743232303e-05, "loss": 0.5668, "step": 2753 }, { "epoch": 0.22, "grad_norm": 0.9189763319220341, "learning_rate": 1.8094500265438866e-05, "loss": 0.5817, "step": 2754 }, { "epoch": 0.22, "grad_norm": 0.8740099428626087, "learning_rate": 1.8092954226703742e-05, "loss": 0.5745, "step": 2755 }, { "epoch": 0.22, "grad_norm": 1.0177735915260575, "learning_rate": 1.8091407627134067e-05, "loss": 0.6324, "step": 2756 }, { "epoch": 0.22, "grad_norm": 0.8755765204054051, "learning_rate": 1.8089860466837023e-05, "loss": 0.591, "step": 2757 }, { "epoch": 0.22, "grad_norm": 0.9844508074811542, "learning_rate": 1.8088312745919823e-05, "loss": 0.6863, "step": 2758 }, { "epoch": 0.22, "grad_norm": 0.9178781550779036, "learning_rate": 1.8086764464489723e-05, "loss": 0.5362, "step": 2759 }, { "epoch": 0.22, "grad_norm": 0.8438837000342646, "learning_rate": 1.8085215622654023e-05, "loss": 0.5502, "step": 2760 }, { "epoch": 0.22, "grad_norm": 1.027995036155377, "learning_rate": 1.8083666220520045e-05, "loss": 0.6827, "step": 2761 }, { "epoch": 0.22, "grad_norm": 1.0072627378618417, "learning_rate": 1.8082116258195173e-05, "loss": 0.609, "step": 2762 }, { "epoch": 0.22, "grad_norm": 1.0404432889174489, "learning_rate": 1.8080565735786813e-05, "loss": 0.5405, "step": 2763 }, { "epoch": 0.22, "grad_norm": 0.960224909807464, "learning_rate": 1.8079014653402414e-05, "loss": 0.5984, "step": 2764 }, { "epoch": 0.22, "grad_norm": 0.8957444706981957, "learning_rate": 1.8077463011149464e-05, "loss": 0.5948, "step": 2765 }, { "epoch": 0.22, "grad_norm": 0.8909499775341096, "learning_rate": 1.80759108091355e-05, "loss": 0.5498, "step": 2766 }, { "epoch": 0.22, "grad_norm": 0.9552071296438046, "learning_rate": 1.807435804746807e-05, "loss": 0.5756, "step": 2767 }, { "epoch": 0.22, "grad_norm": 1.0301741647540563, "learning_rate": 1.8072804726254792e-05, "loss": 0.6114, "step": 2768 }, { "epoch": 0.23, "grad_norm": 0.9687543183786282, "learning_rate": 1.807125084560331e-05, "loss": 0.5714, "step": 2769 }, { "epoch": 0.23, "grad_norm": 0.8673956813094961, "learning_rate": 1.80696964056213e-05, "loss": 0.572, "step": 2770 }, { "epoch": 0.23, "grad_norm": 0.8849403306028029, "learning_rate": 1.8068141406416487e-05, "loss": 0.5341, "step": 2771 }, { "epoch": 0.23, "grad_norm": 0.9106492006599914, "learning_rate": 1.8066585848096637e-05, "loss": 0.6088, "step": 2772 }, { "epoch": 0.23, "grad_norm": 0.9742053568515937, "learning_rate": 1.8065029730769534e-05, "loss": 0.6063, "step": 2773 }, { "epoch": 0.23, "grad_norm": 0.9903274449807491, "learning_rate": 1.806347305454303e-05, "loss": 0.5429, "step": 2774 }, { "epoch": 0.23, "grad_norm": 0.9181460398525245, "learning_rate": 1.8061915819524995e-05, "loss": 0.5536, "step": 2775 }, { "epoch": 0.23, "grad_norm": 1.0098508357388716, "learning_rate": 1.8060358025823344e-05, "loss": 0.6372, "step": 2776 }, { "epoch": 0.23, "grad_norm": 0.9213271798021149, "learning_rate": 1.8058799673546032e-05, "loss": 0.5686, "step": 2777 }, { "epoch": 0.23, "grad_norm": 0.9557490562689361, "learning_rate": 1.805724076280105e-05, "loss": 0.5554, "step": 2778 }, { "epoch": 0.23, "grad_norm": 1.0084940234973645, "learning_rate": 1.805568129369643e-05, "loss": 0.665, "step": 2779 }, { "epoch": 0.23, "grad_norm": 0.9126637077556393, "learning_rate": 1.805412126634024e-05, "loss": 0.5613, "step": 2780 }, { "epoch": 0.23, "grad_norm": 0.9577382335710649, "learning_rate": 1.8052560680840595e-05, "loss": 0.6643, "step": 2781 }, { "epoch": 0.23, "grad_norm": 1.0449965708611912, "learning_rate": 1.8050999537305634e-05, "loss": 0.5253, "step": 2782 }, { "epoch": 0.23, "grad_norm": 0.9938981486681303, "learning_rate": 1.8049437835843545e-05, "loss": 0.5189, "step": 2783 }, { "epoch": 0.23, "grad_norm": 0.9753113641821504, "learning_rate": 1.8047875576562556e-05, "loss": 0.6217, "step": 2784 }, { "epoch": 0.23, "grad_norm": 0.9553770251097057, "learning_rate": 1.8046312759570924e-05, "loss": 0.5544, "step": 2785 }, { "epoch": 0.23, "grad_norm": 0.9919726546590415, "learning_rate": 1.804474938497696e-05, "loss": 0.6089, "step": 2786 }, { "epoch": 0.23, "grad_norm": 0.9808570279080948, "learning_rate": 1.8043185452888997e-05, "loss": 0.5294, "step": 2787 }, { "epoch": 0.23, "grad_norm": 1.0518621525037852, "learning_rate": 1.8041620963415418e-05, "loss": 0.5776, "step": 2788 }, { "epoch": 0.23, "grad_norm": 1.1022635177347593, "learning_rate": 1.804005591666464e-05, "loss": 0.6217, "step": 2789 }, { "epoch": 0.23, "grad_norm": 0.9429626058610409, "learning_rate": 1.8038490312745116e-05, "loss": 0.6275, "step": 2790 }, { "epoch": 0.23, "grad_norm": 0.930398967716542, "learning_rate": 1.8036924151765345e-05, "loss": 0.5554, "step": 2791 }, { "epoch": 0.23, "grad_norm": 0.887025266457902, "learning_rate": 1.803535743383386e-05, "loss": 0.5701, "step": 2792 }, { "epoch": 0.23, "grad_norm": 0.9402776215475784, "learning_rate": 1.8033790159059224e-05, "loss": 0.6054, "step": 2793 }, { "epoch": 0.23, "grad_norm": 1.0326325235514582, "learning_rate": 1.8032222327550063e-05, "loss": 0.6075, "step": 2794 }, { "epoch": 0.23, "grad_norm": 0.9775021711373921, "learning_rate": 1.803065393941502e-05, "loss": 0.5606, "step": 2795 }, { "epoch": 0.23, "grad_norm": 0.9157898288063075, "learning_rate": 1.802908499476278e-05, "loss": 0.5374, "step": 2796 }, { "epoch": 0.23, "grad_norm": 0.8959173642295047, "learning_rate": 1.8027515493702075e-05, "loss": 0.487, "step": 2797 }, { "epoch": 0.23, "grad_norm": 0.9516732846513793, "learning_rate": 1.8025945436341663e-05, "loss": 0.5696, "step": 2798 }, { "epoch": 0.23, "grad_norm": 0.9225055633764292, "learning_rate": 1.8024374822790355e-05, "loss": 0.5725, "step": 2799 }, { "epoch": 0.23, "grad_norm": 0.9139056227761806, "learning_rate": 1.8022803653156983e-05, "loss": 0.5483, "step": 2800 }, { "epoch": 0.23, "grad_norm": 0.9438666489422416, "learning_rate": 1.802123192755044e-05, "loss": 0.5711, "step": 2801 }, { "epoch": 0.23, "grad_norm": 0.994929728112408, "learning_rate": 1.8019659646079636e-05, "loss": 0.5899, "step": 2802 }, { "epoch": 0.23, "grad_norm": 0.9894663256980869, "learning_rate": 1.8018086808853535e-05, "loss": 0.6254, "step": 2803 }, { "epoch": 0.23, "grad_norm": 1.0052732190279563, "learning_rate": 1.8016513415981128e-05, "loss": 0.5144, "step": 2804 }, { "epoch": 0.23, "grad_norm": 1.0144845417320454, "learning_rate": 1.801493946757145e-05, "loss": 0.5835, "step": 2805 }, { "epoch": 0.23, "grad_norm": 1.0252420292101474, "learning_rate": 1.801336496373358e-05, "loss": 0.5536, "step": 2806 }, { "epoch": 0.23, "grad_norm": 0.8971116968992333, "learning_rate": 1.8011789904576624e-05, "loss": 0.5506, "step": 2807 }, { "epoch": 0.23, "grad_norm": 0.8970316281501216, "learning_rate": 1.8010214290209735e-05, "loss": 0.6684, "step": 2808 }, { "epoch": 0.23, "grad_norm": 0.9915460509347839, "learning_rate": 1.80086381207421e-05, "loss": 0.6473, "step": 2809 }, { "epoch": 0.23, "grad_norm": 0.9679950558017996, "learning_rate": 1.8007061396282944e-05, "loss": 0.5946, "step": 2810 }, { "epoch": 0.23, "grad_norm": 0.9297039333204747, "learning_rate": 1.800548411694154e-05, "loss": 0.5475, "step": 2811 }, { "epoch": 0.23, "grad_norm": 0.8258902225497208, "learning_rate": 1.8003906282827186e-05, "loss": 0.5406, "step": 2812 }, { "epoch": 0.23, "grad_norm": 0.7445951245744602, "learning_rate": 1.8002327894049225e-05, "loss": 0.5067, "step": 2813 }, { "epoch": 0.23, "grad_norm": 0.8985526515890513, "learning_rate": 1.800074895071704e-05, "loss": 0.579, "step": 2814 }, { "epoch": 0.23, "grad_norm": 1.0104156103033566, "learning_rate": 1.799916945294005e-05, "loss": 0.6262, "step": 2815 }, { "epoch": 0.23, "grad_norm": 1.0213484101551884, "learning_rate": 1.7997589400827712e-05, "loss": 0.6357, "step": 2816 }, { "epoch": 0.23, "grad_norm": 1.0685631612624997, "learning_rate": 1.799600879448952e-05, "loss": 0.5975, "step": 2817 }, { "epoch": 0.23, "grad_norm": 0.9967993589986436, "learning_rate": 1.7994427634035016e-05, "loss": 0.5412, "step": 2818 }, { "epoch": 0.23, "grad_norm": 0.9180677224628448, "learning_rate": 1.799284591957376e-05, "loss": 0.5347, "step": 2819 }, { "epoch": 0.23, "grad_norm": 0.9747429803370978, "learning_rate": 1.799126365121538e-05, "loss": 0.6033, "step": 2820 }, { "epoch": 0.23, "grad_norm": 0.9163259787114536, "learning_rate": 1.798968082906951e-05, "loss": 0.5792, "step": 2821 }, { "epoch": 0.23, "grad_norm": 0.9211695025727105, "learning_rate": 1.798809745324585e-05, "loss": 0.5119, "step": 2822 }, { "epoch": 0.23, "grad_norm": 0.8676048846449058, "learning_rate": 1.798651352385412e-05, "loss": 0.5284, "step": 2823 }, { "epoch": 0.23, "grad_norm": 0.9015393452040311, "learning_rate": 1.798492904100409e-05, "loss": 0.4627, "step": 2824 }, { "epoch": 0.23, "grad_norm": 0.9387971158929248, "learning_rate": 1.7983344004805555e-05, "loss": 0.6055, "step": 2825 }, { "epoch": 0.23, "grad_norm": 0.8471959162297319, "learning_rate": 1.7981758415368365e-05, "loss": 0.5055, "step": 2826 }, { "epoch": 0.23, "grad_norm": 0.988588879294854, "learning_rate": 1.7980172272802398e-05, "loss": 0.5528, "step": 2827 }, { "epoch": 0.23, "grad_norm": 0.9326776260735447, "learning_rate": 1.7978585577217568e-05, "loss": 0.5196, "step": 2828 }, { "epoch": 0.23, "grad_norm": 0.8966730811382917, "learning_rate": 1.7976998328723833e-05, "loss": 0.5468, "step": 2829 }, { "epoch": 0.23, "grad_norm": 0.8490311284377158, "learning_rate": 1.7975410527431195e-05, "loss": 0.5447, "step": 2830 }, { "epoch": 0.23, "grad_norm": 0.8813316008855758, "learning_rate": 1.797382217344968e-05, "loss": 0.4973, "step": 2831 }, { "epoch": 0.23, "grad_norm": 1.089434215108231, "learning_rate": 1.7972233266889356e-05, "loss": 0.6135, "step": 2832 }, { "epoch": 0.23, "grad_norm": 0.9659587366357253, "learning_rate": 1.797064380786034e-05, "loss": 0.5099, "step": 2833 }, { "epoch": 0.23, "grad_norm": 0.9218666717431676, "learning_rate": 1.7969053796472783e-05, "loss": 0.5723, "step": 2834 }, { "epoch": 0.23, "grad_norm": 0.9277864344934234, "learning_rate": 1.796746323283686e-05, "loss": 0.5605, "step": 2835 }, { "epoch": 0.23, "grad_norm": 0.9776799469352457, "learning_rate": 1.7965872117062806e-05, "loss": 0.5077, "step": 2836 }, { "epoch": 0.23, "grad_norm": 0.9318298969039761, "learning_rate": 1.796428044926088e-05, "loss": 0.6138, "step": 2837 }, { "epoch": 0.23, "grad_norm": 0.9075948043353205, "learning_rate": 1.7962688229541382e-05, "loss": 0.5704, "step": 2838 }, { "epoch": 0.23, "grad_norm": 0.9306145438415679, "learning_rate": 1.7961095458014655e-05, "loss": 0.6038, "step": 2839 }, { "epoch": 0.23, "grad_norm": 0.8884000710243785, "learning_rate": 1.795950213479107e-05, "loss": 0.5054, "step": 2840 }, { "epoch": 0.23, "grad_norm": 0.8569607622945014, "learning_rate": 1.795790825998105e-05, "loss": 0.5184, "step": 2841 }, { "epoch": 0.23, "grad_norm": 0.9791079837049262, "learning_rate": 1.7956313833695046e-05, "loss": 0.5739, "step": 2842 }, { "epoch": 0.23, "grad_norm": 0.9588531159827303, "learning_rate": 1.795471885604355e-05, "loss": 0.5694, "step": 2843 }, { "epoch": 0.23, "grad_norm": 1.0297830187508645, "learning_rate": 1.7953123327137093e-05, "loss": 0.6476, "step": 2844 }, { "epoch": 0.23, "grad_norm": 0.9288421640089412, "learning_rate": 1.7951527247086243e-05, "loss": 0.5351, "step": 2845 }, { "epoch": 0.23, "grad_norm": 0.9602300038067393, "learning_rate": 1.794993061600161e-05, "loss": 0.596, "step": 2846 }, { "epoch": 0.23, "grad_norm": 0.9075377212248501, "learning_rate": 1.7948333433993833e-05, "loss": 0.5389, "step": 2847 }, { "epoch": 0.23, "grad_norm": 0.995373021003706, "learning_rate": 1.7946735701173604e-05, "loss": 0.6393, "step": 2848 }, { "epoch": 0.23, "grad_norm": 0.9164049272640322, "learning_rate": 1.7945137417651638e-05, "loss": 0.6477, "step": 2849 }, { "epoch": 0.23, "grad_norm": 0.9477115032759619, "learning_rate": 1.7943538583538696e-05, "loss": 0.6198, "step": 2850 }, { "epoch": 0.23, "grad_norm": 0.90846432248897, "learning_rate": 1.7941939198945574e-05, "loss": 0.52, "step": 2851 }, { "epoch": 0.23, "grad_norm": 0.9174204792362805, "learning_rate": 1.7940339263983112e-05, "loss": 0.5913, "step": 2852 }, { "epoch": 0.23, "grad_norm": 0.9792924779362212, "learning_rate": 1.7938738778762182e-05, "loss": 0.5644, "step": 2853 }, { "epoch": 0.23, "grad_norm": 0.9005528079322241, "learning_rate": 1.7937137743393695e-05, "loss": 0.5532, "step": 2854 }, { "epoch": 0.23, "grad_norm": 0.9674891956317448, "learning_rate": 1.7935536157988605e-05, "loss": 0.5931, "step": 2855 }, { "epoch": 0.23, "grad_norm": 1.0392442539723934, "learning_rate": 1.79339340226579e-05, "loss": 0.5858, "step": 2856 }, { "epoch": 0.23, "grad_norm": 0.9010473425300569, "learning_rate": 1.79323313375126e-05, "loss": 0.5254, "step": 2857 }, { "epoch": 0.23, "grad_norm": 0.908624006675908, "learning_rate": 1.7930728102663775e-05, "loss": 0.5139, "step": 2858 }, { "epoch": 0.23, "grad_norm": 1.0041814177937274, "learning_rate": 1.792912431822253e-05, "loss": 0.591, "step": 2859 }, { "epoch": 0.23, "grad_norm": 0.8424943230616456, "learning_rate": 1.79275199843e-05, "loss": 0.5597, "step": 2860 }, { "epoch": 0.23, "grad_norm": 0.9827303241823689, "learning_rate": 1.7925915101007366e-05, "loss": 0.5951, "step": 2861 }, { "epoch": 0.23, "grad_norm": 0.9747069232699177, "learning_rate": 1.792430966845585e-05, "loss": 0.5873, "step": 2862 }, { "epoch": 0.23, "grad_norm": 1.0293522334126957, "learning_rate": 1.7922703686756697e-05, "loss": 0.6401, "step": 2863 }, { "epoch": 0.23, "grad_norm": 1.011927611023172, "learning_rate": 1.792109715602121e-05, "loss": 0.5973, "step": 2864 }, { "epoch": 0.23, "grad_norm": 0.9273786221962907, "learning_rate": 1.7919490076360714e-05, "loss": 0.6032, "step": 2865 }, { "epoch": 0.23, "grad_norm": 1.093940323247069, "learning_rate": 1.7917882447886585e-05, "loss": 0.6462, "step": 2866 }, { "epoch": 0.23, "grad_norm": 0.890019833154301, "learning_rate": 1.7916274270710218e-05, "loss": 0.6138, "step": 2867 }, { "epoch": 0.23, "grad_norm": 0.9769921244269482, "learning_rate": 1.7914665544943072e-05, "loss": 0.6104, "step": 2868 }, { "epoch": 0.23, "grad_norm": 0.9485632509377442, "learning_rate": 1.791305627069662e-05, "loss": 0.6246, "step": 2869 }, { "epoch": 0.23, "grad_norm": 1.0018934058623412, "learning_rate": 1.791144644808239e-05, "loss": 0.6317, "step": 2870 }, { "epoch": 0.23, "grad_norm": 0.9875398922579854, "learning_rate": 1.7909836077211936e-05, "loss": 0.6921, "step": 2871 }, { "epoch": 0.23, "grad_norm": 0.8942446600706555, "learning_rate": 1.790822515819686e-05, "loss": 0.5872, "step": 2872 }, { "epoch": 0.23, "grad_norm": 0.9604170179855209, "learning_rate": 1.7906613691148796e-05, "loss": 0.5829, "step": 2873 }, { "epoch": 0.23, "grad_norm": 0.8942090505558472, "learning_rate": 1.7905001676179414e-05, "loss": 0.5426, "step": 2874 }, { "epoch": 0.23, "grad_norm": 1.0487502187628917, "learning_rate": 1.7903389113400427e-05, "loss": 0.5667, "step": 2875 }, { "epoch": 0.23, "grad_norm": 0.959663943230379, "learning_rate": 1.790177600292359e-05, "loss": 0.572, "step": 2876 }, { "epoch": 0.23, "grad_norm": 0.9917149402947344, "learning_rate": 1.790016234486068e-05, "loss": 0.5906, "step": 2877 }, { "epoch": 0.23, "grad_norm": 0.937418302425181, "learning_rate": 1.789854813932353e-05, "loss": 0.5202, "step": 2878 }, { "epoch": 0.23, "grad_norm": 0.7861228868319374, "learning_rate": 1.7896933386423998e-05, "loss": 0.4862, "step": 2879 }, { "epoch": 0.23, "grad_norm": 1.0141607091776144, "learning_rate": 1.7895318086273986e-05, "loss": 0.5141, "step": 2880 }, { "epoch": 0.23, "grad_norm": 1.0273508303059105, "learning_rate": 1.7893702238985433e-05, "loss": 0.5389, "step": 2881 }, { "epoch": 0.23, "grad_norm": 0.9399823550683523, "learning_rate": 1.7892085844670318e-05, "loss": 0.5422, "step": 2882 }, { "epoch": 0.23, "grad_norm": 0.9049524508006479, "learning_rate": 1.7890468903440656e-05, "loss": 0.5719, "step": 2883 }, { "epoch": 0.23, "grad_norm": 0.9956943417222794, "learning_rate": 1.7888851415408495e-05, "loss": 0.5924, "step": 2884 }, { "epoch": 0.23, "grad_norm": 0.883186802287362, "learning_rate": 1.788723338068593e-05, "loss": 0.5544, "step": 2885 }, { "epoch": 0.23, "grad_norm": 0.9971492225911742, "learning_rate": 1.7885614799385086e-05, "loss": 0.6009, "step": 2886 }, { "epoch": 0.23, "grad_norm": 0.8355205506640969, "learning_rate": 1.7883995671618133e-05, "loss": 0.5485, "step": 2887 }, { "epoch": 0.23, "grad_norm": 0.9074735160025519, "learning_rate": 1.7882375997497273e-05, "loss": 0.574, "step": 2888 }, { "epoch": 0.23, "grad_norm": 1.0213868782570381, "learning_rate": 1.788075577713475e-05, "loss": 0.5807, "step": 2889 }, { "epoch": 0.23, "grad_norm": 0.944087464291218, "learning_rate": 1.7879135010642836e-05, "loss": 0.6015, "step": 2890 }, { "epoch": 0.23, "grad_norm": 0.9115513698895477, "learning_rate": 1.787751369813386e-05, "loss": 0.573, "step": 2891 }, { "epoch": 0.24, "grad_norm": 0.937984094940414, "learning_rate": 1.787589183972017e-05, "loss": 0.6324, "step": 2892 }, { "epoch": 0.24, "grad_norm": 1.1049632286846252, "learning_rate": 1.787426943551416e-05, "loss": 0.6556, "step": 2893 }, { "epoch": 0.24, "grad_norm": 0.8892815904572252, "learning_rate": 1.7872646485628266e-05, "loss": 0.6171, "step": 2894 }, { "epoch": 0.24, "grad_norm": 0.890347163387591, "learning_rate": 1.7871022990174948e-05, "loss": 0.5293, "step": 2895 }, { "epoch": 0.24, "grad_norm": 0.8997722667227449, "learning_rate": 1.7869398949266724e-05, "loss": 0.565, "step": 2896 }, { "epoch": 0.24, "grad_norm": 0.9329374331298593, "learning_rate": 1.786777436301613e-05, "loss": 0.5565, "step": 2897 }, { "epoch": 0.24, "grad_norm": 0.9746227868782539, "learning_rate": 1.7866149231535754e-05, "loss": 0.5568, "step": 2898 }, { "epoch": 0.24, "grad_norm": 0.8773883378129755, "learning_rate": 1.786452355493821e-05, "loss": 0.5434, "step": 2899 }, { "epoch": 0.24, "grad_norm": 0.9631742937919332, "learning_rate": 1.7862897333336162e-05, "loss": 0.6124, "step": 2900 }, { "epoch": 0.24, "grad_norm": 0.9779033162507079, "learning_rate": 1.78612705668423e-05, "loss": 0.6597, "step": 2901 }, { "epoch": 0.24, "grad_norm": 0.9886719845976905, "learning_rate": 1.7859643255569364e-05, "loss": 0.5886, "step": 2902 }, { "epoch": 0.24, "grad_norm": 0.9661761439948027, "learning_rate": 1.785801539963012e-05, "loss": 0.5725, "step": 2903 }, { "epoch": 0.24, "grad_norm": 0.9641311236560048, "learning_rate": 1.785638699913738e-05, "loss": 0.5953, "step": 2904 }, { "epoch": 0.24, "grad_norm": 0.9824821479543113, "learning_rate": 1.785475805420399e-05, "loss": 0.6451, "step": 2905 }, { "epoch": 0.24, "grad_norm": 0.9574330850350337, "learning_rate": 1.7853128564942834e-05, "loss": 0.591, "step": 2906 }, { "epoch": 0.24, "grad_norm": 1.002233316642695, "learning_rate": 1.7851498531466833e-05, "loss": 0.6056, "step": 2907 }, { "epoch": 0.24, "grad_norm": 1.0215148904505245, "learning_rate": 1.784986795388895e-05, "loss": 0.6765, "step": 2908 }, { "epoch": 0.24, "grad_norm": 0.976072703279664, "learning_rate": 1.7848236832322175e-05, "loss": 0.5366, "step": 2909 }, { "epoch": 0.24, "grad_norm": 1.0272588509160951, "learning_rate": 1.7846605166879555e-05, "loss": 0.667, "step": 2910 }, { "epoch": 0.24, "grad_norm": 0.9826121519395441, "learning_rate": 1.7844972957674156e-05, "loss": 0.5788, "step": 2911 }, { "epoch": 0.24, "grad_norm": 0.978849061408577, "learning_rate": 1.7843340204819087e-05, "loss": 0.6177, "step": 2912 }, { "epoch": 0.24, "grad_norm": 1.0479357254213726, "learning_rate": 1.78417069084275e-05, "loss": 0.5245, "step": 2913 }, { "epoch": 0.24, "grad_norm": 0.9221183355541194, "learning_rate": 1.784007306861258e-05, "loss": 0.623, "step": 2914 }, { "epoch": 0.24, "grad_norm": 0.9128945043954562, "learning_rate": 1.783843868548755e-05, "loss": 0.5235, "step": 2915 }, { "epoch": 0.24, "grad_norm": 0.8916286639889864, "learning_rate": 1.7836803759165673e-05, "loss": 0.5629, "step": 2916 }, { "epoch": 0.24, "grad_norm": 1.0138599166669626, "learning_rate": 1.7835168289760248e-05, "loss": 0.6256, "step": 2917 }, { "epoch": 0.24, "grad_norm": 0.9382854752966578, "learning_rate": 1.7833532277384607e-05, "loss": 0.561, "step": 2918 }, { "epoch": 0.24, "grad_norm": 0.8932203928499848, "learning_rate": 1.783189572215213e-05, "loss": 0.5669, "step": 2919 }, { "epoch": 0.24, "grad_norm": 0.9635695529223409, "learning_rate": 1.7830258624176224e-05, "loss": 0.6151, "step": 2920 }, { "epoch": 0.24, "grad_norm": 0.8869233883388015, "learning_rate": 1.782862098357034e-05, "loss": 0.5696, "step": 2921 }, { "epoch": 0.24, "grad_norm": 1.0316281872406312, "learning_rate": 1.782698280044797e-05, "loss": 0.6051, "step": 2922 }, { "epoch": 0.24, "grad_norm": 1.0455886074412064, "learning_rate": 1.7825344074922633e-05, "loss": 0.6081, "step": 2923 }, { "epoch": 0.24, "grad_norm": 0.9910576483300347, "learning_rate": 1.782370480710789e-05, "loss": 0.6417, "step": 2924 }, { "epoch": 0.24, "grad_norm": 0.9651470637576416, "learning_rate": 1.7822064997117348e-05, "loss": 0.5922, "step": 2925 }, { "epoch": 0.24, "grad_norm": 0.9345754499780848, "learning_rate": 1.7820424645064635e-05, "loss": 0.513, "step": 2926 }, { "epoch": 0.24, "grad_norm": 0.9903327113291599, "learning_rate": 1.7818783751063433e-05, "loss": 0.6042, "step": 2927 }, { "epoch": 0.24, "grad_norm": 0.9698448430291365, "learning_rate": 1.7817142315227452e-05, "loss": 0.6555, "step": 2928 }, { "epoch": 0.24, "grad_norm": 0.9449001506430976, "learning_rate": 1.7815500337670442e-05, "loss": 0.5328, "step": 2929 }, { "epoch": 0.24, "grad_norm": 0.9499642731886148, "learning_rate": 1.7813857818506194e-05, "loss": 0.5382, "step": 2930 }, { "epoch": 0.24, "grad_norm": 0.9114911296436924, "learning_rate": 1.7812214757848523e-05, "loss": 0.5718, "step": 2931 }, { "epoch": 0.24, "grad_norm": 1.0555112155422826, "learning_rate": 1.7810571155811307e-05, "loss": 0.6417, "step": 2932 }, { "epoch": 0.24, "grad_norm": 1.0012259749389651, "learning_rate": 1.7808927012508436e-05, "loss": 0.5963, "step": 2933 }, { "epoch": 0.24, "grad_norm": 0.8588757511049373, "learning_rate": 1.7807282328053847e-05, "loss": 0.5725, "step": 2934 }, { "epoch": 0.24, "grad_norm": 0.8962264917282546, "learning_rate": 1.7805637102561516e-05, "loss": 0.6398, "step": 2935 }, { "epoch": 0.24, "grad_norm": 1.0126953458804542, "learning_rate": 1.7803991336145462e-05, "loss": 0.5541, "step": 2936 }, { "epoch": 0.24, "grad_norm": 0.9485077698206339, "learning_rate": 1.7802345028919728e-05, "loss": 0.585, "step": 2937 }, { "epoch": 0.24, "grad_norm": 0.9528169390915887, "learning_rate": 1.7800698180998406e-05, "loss": 0.6129, "step": 2938 }, { "epoch": 0.24, "grad_norm": 0.9279286190560277, "learning_rate": 1.7799050792495617e-05, "loss": 0.5792, "step": 2939 }, { "epoch": 0.24, "grad_norm": 0.907201226645678, "learning_rate": 1.7797402863525528e-05, "loss": 0.5317, "step": 2940 }, { "epoch": 0.24, "grad_norm": 0.9794055147062374, "learning_rate": 1.7795754394202334e-05, "loss": 0.5912, "step": 2941 }, { "epoch": 0.24, "grad_norm": 1.0109889092334905, "learning_rate": 1.7794105384640277e-05, "loss": 0.6199, "step": 2942 }, { "epoch": 0.24, "grad_norm": 1.0500004928260396, "learning_rate": 1.779245583495363e-05, "loss": 0.5977, "step": 2943 }, { "epoch": 0.24, "grad_norm": 0.9722339821250309, "learning_rate": 1.7790805745256703e-05, "loss": 0.5688, "step": 2944 }, { "epoch": 0.24, "grad_norm": 0.9618036445187186, "learning_rate": 1.7789155115663853e-05, "loss": 0.5664, "step": 2945 }, { "epoch": 0.24, "grad_norm": 0.8426791654221744, "learning_rate": 1.778750394628946e-05, "loss": 0.5497, "step": 2946 }, { "epoch": 0.24, "grad_norm": 0.9612700706138941, "learning_rate": 1.7785852237247952e-05, "loss": 0.5915, "step": 2947 }, { "epoch": 0.24, "grad_norm": 0.9261565886562699, "learning_rate": 1.778419998865379e-05, "loss": 0.5658, "step": 2948 }, { "epoch": 0.24, "grad_norm": 0.9728962618141286, "learning_rate": 1.7782547200621475e-05, "loss": 0.5725, "step": 2949 }, { "epoch": 0.24, "grad_norm": 1.045860367370037, "learning_rate": 1.7780893873265536e-05, "loss": 0.6454, "step": 2950 }, { "epoch": 0.24, "grad_norm": 0.9788455225264904, "learning_rate": 1.777924000670056e-05, "loss": 0.6038, "step": 2951 }, { "epoch": 0.24, "grad_norm": 1.0945387612010085, "learning_rate": 1.777758560104115e-05, "loss": 0.6038, "step": 2952 }, { "epoch": 0.24, "grad_norm": 0.8957453512244588, "learning_rate": 1.777593065640195e-05, "loss": 0.5951, "step": 2953 }, { "epoch": 0.24, "grad_norm": 0.9979474133002743, "learning_rate": 1.777427517289766e-05, "loss": 0.589, "step": 2954 }, { "epoch": 0.24, "grad_norm": 0.9429662984163877, "learning_rate": 1.7772619150642996e-05, "loss": 0.5365, "step": 2955 }, { "epoch": 0.24, "grad_norm": 0.9358474973512778, "learning_rate": 1.777096258975272e-05, "loss": 0.6543, "step": 2956 }, { "epoch": 0.24, "grad_norm": 0.9096719871801495, "learning_rate": 1.7769305490341623e-05, "loss": 0.592, "step": 2957 }, { "epoch": 0.24, "grad_norm": 0.9537438553085594, "learning_rate": 1.776764785252455e-05, "loss": 0.5968, "step": 2958 }, { "epoch": 0.24, "grad_norm": 0.9563472444119449, "learning_rate": 1.7765989676416374e-05, "loss": 0.5488, "step": 2959 }, { "epoch": 0.24, "grad_norm": 0.9289889239231146, "learning_rate": 1.7764330962132e-05, "loss": 0.6065, "step": 2960 }, { "epoch": 0.24, "grad_norm": 0.9496563907877692, "learning_rate": 1.7762671709786375e-05, "loss": 0.5092, "step": 2961 }, { "epoch": 0.24, "grad_norm": 0.9243170842255678, "learning_rate": 1.776101191949449e-05, "loss": 0.591, "step": 2962 }, { "epoch": 0.24, "grad_norm": 1.0710754976090913, "learning_rate": 1.775935159137136e-05, "loss": 0.635, "step": 2963 }, { "epoch": 0.24, "grad_norm": 0.9780586967963613, "learning_rate": 1.7757690725532048e-05, "loss": 0.5785, "step": 2964 }, { "epoch": 0.24, "grad_norm": 0.9906049593399756, "learning_rate": 1.7756029322091647e-05, "loss": 0.5637, "step": 2965 }, { "epoch": 0.24, "grad_norm": 0.91575458677578, "learning_rate": 1.7754367381165298e-05, "loss": 0.5285, "step": 2966 }, { "epoch": 0.24, "grad_norm": 0.977131199479803, "learning_rate": 1.7752704902868164e-05, "loss": 0.6075, "step": 2967 }, { "epoch": 0.24, "grad_norm": 0.9857150899066561, "learning_rate": 1.775104188731546e-05, "loss": 0.6261, "step": 2968 }, { "epoch": 0.24, "grad_norm": 0.9576292313368313, "learning_rate": 1.774937833462243e-05, "loss": 0.5617, "step": 2969 }, { "epoch": 0.24, "grad_norm": 0.8445171511314254, "learning_rate": 1.7747714244904348e-05, "loss": 0.5282, "step": 2970 }, { "epoch": 0.24, "grad_norm": 0.9229718629347127, "learning_rate": 1.7746049618276545e-05, "loss": 0.5479, "step": 2971 }, { "epoch": 0.24, "grad_norm": 0.8690650163482897, "learning_rate": 1.7744384454854377e-05, "loss": 0.5541, "step": 2972 }, { "epoch": 0.24, "grad_norm": 0.9648710618900104, "learning_rate": 1.7742718754753232e-05, "loss": 0.6267, "step": 2973 }, { "epoch": 0.24, "grad_norm": 0.9128519455250741, "learning_rate": 1.774105251808855e-05, "loss": 0.5086, "step": 2974 }, { "epoch": 0.24, "grad_norm": 0.903480230508501, "learning_rate": 1.7739385744975788e-05, "loss": 0.5827, "step": 2975 }, { "epoch": 0.24, "grad_norm": 0.9501921693907465, "learning_rate": 1.773771843553046e-05, "loss": 0.5899, "step": 2976 }, { "epoch": 0.24, "grad_norm": 0.9375234127105827, "learning_rate": 1.773605058986811e-05, "loss": 0.5068, "step": 2977 }, { "epoch": 0.24, "grad_norm": 1.0022123910340575, "learning_rate": 1.7734382208104314e-05, "loss": 0.6204, "step": 2978 }, { "epoch": 0.24, "grad_norm": 0.9271276387355356, "learning_rate": 1.7732713290354694e-05, "loss": 0.5993, "step": 2979 }, { "epoch": 0.24, "grad_norm": 0.8412067356706011, "learning_rate": 1.77310438367349e-05, "loss": 0.5356, "step": 2980 }, { "epoch": 0.24, "grad_norm": 0.8963614258194879, "learning_rate": 1.772937384736063e-05, "loss": 0.5543, "step": 2981 }, { "epoch": 0.24, "grad_norm": 0.941153800669109, "learning_rate": 1.77277033223476e-05, "loss": 0.5908, "step": 2982 }, { "epoch": 0.24, "grad_norm": 0.8840202297399563, "learning_rate": 1.772603226181159e-05, "loss": 0.6028, "step": 2983 }, { "epoch": 0.24, "grad_norm": 0.9729775167227305, "learning_rate": 1.7724360665868395e-05, "loss": 0.6125, "step": 2984 }, { "epoch": 0.24, "grad_norm": 0.8596207995883371, "learning_rate": 1.772268853463386e-05, "loss": 0.5613, "step": 2985 }, { "epoch": 0.24, "grad_norm": 1.0696073709578997, "learning_rate": 1.7721015868223858e-05, "loss": 0.596, "step": 2986 }, { "epoch": 0.24, "grad_norm": 0.8680112938013765, "learning_rate": 1.7719342666754307e-05, "loss": 0.5516, "step": 2987 }, { "epoch": 0.24, "grad_norm": 0.9696281293778938, "learning_rate": 1.7717668930341152e-05, "loss": 0.6295, "step": 2988 }, { "epoch": 0.24, "grad_norm": 0.9755273934994584, "learning_rate": 1.771599465910039e-05, "loss": 0.6291, "step": 2989 }, { "epoch": 0.24, "grad_norm": 0.9345644481453044, "learning_rate": 1.771431985314804e-05, "loss": 0.5621, "step": 2990 }, { "epoch": 0.24, "grad_norm": 0.9037245373504185, "learning_rate": 1.7712644512600163e-05, "loss": 0.5962, "step": 2991 }, { "epoch": 0.24, "grad_norm": 1.0638475033986836, "learning_rate": 1.7710968637572866e-05, "loss": 0.6574, "step": 2992 }, { "epoch": 0.24, "grad_norm": 0.8344440667470702, "learning_rate": 1.770929222818228e-05, "loss": 0.4977, "step": 2993 }, { "epoch": 0.24, "grad_norm": 0.8894745605787729, "learning_rate": 1.7707615284544585e-05, "loss": 0.5996, "step": 2994 }, { "epoch": 0.24, "grad_norm": 0.9326281101476411, "learning_rate": 1.7705937806775986e-05, "loss": 0.5731, "step": 2995 }, { "epoch": 0.24, "grad_norm": 0.8695247751218795, "learning_rate": 1.7704259794992734e-05, "loss": 0.5895, "step": 2996 }, { "epoch": 0.24, "grad_norm": 1.0317406351652776, "learning_rate": 1.7702581249311107e-05, "loss": 0.6543, "step": 2997 }, { "epoch": 0.24, "grad_norm": 0.8077967565584185, "learning_rate": 1.7700902169847434e-05, "loss": 0.518, "step": 2998 }, { "epoch": 0.24, "grad_norm": 1.003810213369707, "learning_rate": 1.769922255671807e-05, "loss": 0.5289, "step": 2999 }, { "epoch": 0.24, "grad_norm": 0.9748601712071312, "learning_rate": 1.7697542410039413e-05, "loss": 0.5974, "step": 3000 }, { "epoch": 0.24, "grad_norm": 0.9220205485487152, "learning_rate": 1.7695861729927896e-05, "loss": 0.5275, "step": 3001 }, { "epoch": 0.24, "grad_norm": 0.9939522843169762, "learning_rate": 1.7694180516499986e-05, "loss": 0.6003, "step": 3002 }, { "epoch": 0.24, "grad_norm": 0.9431839294240236, "learning_rate": 1.769249876987219e-05, "loss": 0.529, "step": 3003 }, { "epoch": 0.24, "grad_norm": 0.9163038128226331, "learning_rate": 1.7690816490161054e-05, "loss": 0.5529, "step": 3004 }, { "epoch": 0.24, "grad_norm": 0.9612779979723355, "learning_rate": 1.768913367748316e-05, "loss": 0.5566, "step": 3005 }, { "epoch": 0.24, "grad_norm": 0.9193225717062538, "learning_rate": 1.7687450331955115e-05, "loss": 0.5653, "step": 3006 }, { "epoch": 0.24, "grad_norm": 0.8985009069505546, "learning_rate": 1.7685766453693584e-05, "loss": 0.5281, "step": 3007 }, { "epoch": 0.24, "grad_norm": 0.9995819562166177, "learning_rate": 1.7684082042815255e-05, "loss": 0.5755, "step": 3008 }, { "epoch": 0.24, "grad_norm": 1.0488660971736616, "learning_rate": 1.768239709943686e-05, "loss": 0.6405, "step": 3009 }, { "epoch": 0.24, "grad_norm": 1.0060682914657895, "learning_rate": 1.7680711623675155e-05, "loss": 0.6043, "step": 3010 }, { "epoch": 0.24, "grad_norm": 0.9685403613457841, "learning_rate": 1.767902561564695e-05, "loss": 0.5787, "step": 3011 }, { "epoch": 0.24, "grad_norm": 0.8615980271016578, "learning_rate": 1.767733907546908e-05, "loss": 0.5722, "step": 3012 }, { "epoch": 0.24, "grad_norm": 0.9853089580676793, "learning_rate": 1.7675652003258427e-05, "loss": 0.6401, "step": 3013 }, { "epoch": 0.24, "grad_norm": 1.0187436271337882, "learning_rate": 1.7673964399131895e-05, "loss": 0.5397, "step": 3014 }, { "epoch": 0.25, "grad_norm": 1.034056628820441, "learning_rate": 1.7672276263206433e-05, "loss": 0.5924, "step": 3015 }, { "epoch": 0.25, "grad_norm": 0.8746264193882949, "learning_rate": 1.7670587595599034e-05, "loss": 0.4671, "step": 3016 }, { "epoch": 0.25, "grad_norm": 1.0132891832260866, "learning_rate": 1.7668898396426717e-05, "loss": 0.5868, "step": 3017 }, { "epoch": 0.25, "grad_norm": 0.8973560235274429, "learning_rate": 1.766720866580655e-05, "loss": 0.5662, "step": 3018 }, { "epoch": 0.25, "grad_norm": 0.9830770810376366, "learning_rate": 1.7665518403855614e-05, "loss": 0.6166, "step": 3019 }, { "epoch": 0.25, "grad_norm": 0.9692899867659497, "learning_rate": 1.766382761069106e-05, "loss": 0.6223, "step": 3020 }, { "epoch": 0.25, "grad_norm": 0.9607598785885566, "learning_rate": 1.7662136286430046e-05, "loss": 0.6089, "step": 3021 }, { "epoch": 0.25, "grad_norm": 0.9226827698337999, "learning_rate": 1.766044443118978e-05, "loss": 0.5055, "step": 3022 }, { "epoch": 0.25, "grad_norm": 1.020928123282797, "learning_rate": 1.7658752045087516e-05, "loss": 0.6092, "step": 3023 }, { "epoch": 0.25, "grad_norm": 0.996536739973853, "learning_rate": 1.7657059128240526e-05, "loss": 0.5707, "step": 3024 }, { "epoch": 0.25, "grad_norm": 1.0197047633927752, "learning_rate": 1.765536568076613e-05, "loss": 0.6489, "step": 3025 }, { "epoch": 0.25, "grad_norm": 0.9591301041949271, "learning_rate": 1.7653671702781685e-05, "loss": 0.5411, "step": 3026 }, { "epoch": 0.25, "grad_norm": 0.8719742546833583, "learning_rate": 1.7651977194404578e-05, "loss": 0.5787, "step": 3027 }, { "epoch": 0.25, "grad_norm": 1.046589072837998, "learning_rate": 1.765028215575224e-05, "loss": 0.5943, "step": 3028 }, { "epoch": 0.25, "grad_norm": 0.93558659226997, "learning_rate": 1.7648586586942134e-05, "loss": 0.5325, "step": 3029 }, { "epoch": 0.25, "grad_norm": 1.0084710284795635, "learning_rate": 1.764689048809176e-05, "loss": 0.5347, "step": 3030 }, { "epoch": 0.25, "grad_norm": 0.9808260064257344, "learning_rate": 1.7645193859318658e-05, "loss": 0.614, "step": 3031 }, { "epoch": 0.25, "grad_norm": 0.9556969370434116, "learning_rate": 1.7643496700740407e-05, "loss": 0.6143, "step": 3032 }, { "epoch": 0.25, "grad_norm": 0.9066448474711181, "learning_rate": 1.7641799012474608e-05, "loss": 0.5614, "step": 3033 }, { "epoch": 0.25, "grad_norm": 0.936506703901719, "learning_rate": 1.764010079463892e-05, "loss": 0.5505, "step": 3034 }, { "epoch": 0.25, "grad_norm": 0.9555289651987778, "learning_rate": 1.7638402047351025e-05, "loss": 0.6526, "step": 3035 }, { "epoch": 0.25, "grad_norm": 0.9611056933445485, "learning_rate": 1.7636702770728637e-05, "loss": 0.5742, "step": 3036 }, { "epoch": 0.25, "grad_norm": 0.9059821946830916, "learning_rate": 1.7635002964889527e-05, "loss": 0.5577, "step": 3037 }, { "epoch": 0.25, "grad_norm": 0.9573450830444244, "learning_rate": 1.763330262995148e-05, "loss": 0.6275, "step": 3038 }, { "epoch": 0.25, "grad_norm": 0.9601163319268974, "learning_rate": 1.7631601766032337e-05, "loss": 0.5637, "step": 3039 }, { "epoch": 0.25, "grad_norm": 1.002866210234754, "learning_rate": 1.7629900373249956e-05, "loss": 0.5704, "step": 3040 }, { "epoch": 0.25, "grad_norm": 0.8760623142199797, "learning_rate": 1.7628198451722247e-05, "loss": 0.5525, "step": 3041 }, { "epoch": 0.25, "grad_norm": 0.9523505020216828, "learning_rate": 1.7626496001567154e-05, "loss": 0.5804, "step": 3042 }, { "epoch": 0.25, "grad_norm": 0.965206293258815, "learning_rate": 1.7624793022902648e-05, "loss": 0.58, "step": 3043 }, { "epoch": 0.25, "grad_norm": 0.9022195843672679, "learning_rate": 1.7623089515846752e-05, "loss": 0.6024, "step": 3044 }, { "epoch": 0.25, "grad_norm": 0.9534846451535837, "learning_rate": 1.7621385480517514e-05, "loss": 0.5893, "step": 3045 }, { "epoch": 0.25, "grad_norm": 0.9509893948267736, "learning_rate": 1.7619680917033023e-05, "loss": 0.5674, "step": 3046 }, { "epoch": 0.25, "grad_norm": 0.9667981591904946, "learning_rate": 1.7617975825511403e-05, "loss": 0.6137, "step": 3047 }, { "epoch": 0.25, "grad_norm": 0.9457533097952052, "learning_rate": 1.7616270206070814e-05, "loss": 0.5797, "step": 3048 }, { "epoch": 0.25, "grad_norm": 0.8800567240746717, "learning_rate": 1.7614564058829454e-05, "loss": 0.5534, "step": 3049 }, { "epoch": 0.25, "grad_norm": 0.9306818061440629, "learning_rate": 1.7612857383905565e-05, "loss": 0.5664, "step": 3050 }, { "epoch": 0.25, "grad_norm": 1.0394494750240004, "learning_rate": 1.7611150181417406e-05, "loss": 0.6443, "step": 3051 }, { "epoch": 0.25, "grad_norm": 0.9143008362220608, "learning_rate": 1.7609442451483292e-05, "loss": 0.5357, "step": 3052 }, { "epoch": 0.25, "grad_norm": 0.8459253561004924, "learning_rate": 1.7607734194221565e-05, "loss": 0.5381, "step": 3053 }, { "epoch": 0.25, "grad_norm": 1.8342798033382928, "learning_rate": 1.7606025409750608e-05, "loss": 0.5503, "step": 3054 }, { "epoch": 0.25, "grad_norm": 0.9497336635018657, "learning_rate": 1.760431609818884e-05, "loss": 0.5588, "step": 3055 }, { "epoch": 0.25, "grad_norm": 0.9574074351131134, "learning_rate": 1.7602606259654704e-05, "loss": 0.5993, "step": 3056 }, { "epoch": 0.25, "grad_norm": 1.2266619564048968, "learning_rate": 1.7600895894266702e-05, "loss": 0.5867, "step": 3057 }, { "epoch": 0.25, "grad_norm": 0.9543613597242573, "learning_rate": 1.7599185002143357e-05, "loss": 0.6301, "step": 3058 }, { "epoch": 0.25, "grad_norm": 0.9433524901855496, "learning_rate": 1.759747358340323e-05, "loss": 0.5699, "step": 3059 }, { "epoch": 0.25, "grad_norm": 0.9989762059557455, "learning_rate": 1.7595761638164924e-05, "loss": 0.5463, "step": 3060 }, { "epoch": 0.25, "grad_norm": 0.9912262872442057, "learning_rate": 1.7594049166547073e-05, "loss": 0.5611, "step": 3061 }, { "epoch": 0.25, "grad_norm": 0.955654711601771, "learning_rate": 1.7592336168668352e-05, "loss": 0.6187, "step": 3062 }, { "epoch": 0.25, "grad_norm": 0.9407381255668952, "learning_rate": 1.7590622644647466e-05, "loss": 0.571, "step": 3063 }, { "epoch": 0.25, "grad_norm": 0.8974550887945183, "learning_rate": 1.7588908594603165e-05, "loss": 0.4889, "step": 3064 }, { "epoch": 0.25, "grad_norm": 0.8719072751485014, "learning_rate": 1.758719401865423e-05, "loss": 0.5625, "step": 3065 }, { "epoch": 0.25, "grad_norm": 0.8891693890171455, "learning_rate": 1.758547891691948e-05, "loss": 0.5671, "step": 3066 }, { "epoch": 0.25, "grad_norm": 0.9430776803567622, "learning_rate": 1.7583763289517767e-05, "loss": 0.6137, "step": 3067 }, { "epoch": 0.25, "grad_norm": 0.9764665774810648, "learning_rate": 1.7582047136567987e-05, "loss": 0.6012, "step": 3068 }, { "epoch": 0.25, "grad_norm": 1.0107046063269252, "learning_rate": 1.7580330458189066e-05, "loss": 0.644, "step": 3069 }, { "epoch": 0.25, "grad_norm": 0.9512873214249711, "learning_rate": 1.757861325449997e-05, "loss": 0.6462, "step": 3070 }, { "epoch": 0.25, "grad_norm": 1.003399707071991, "learning_rate": 1.7576895525619693e-05, "loss": 0.6177, "step": 3071 }, { "epoch": 0.25, "grad_norm": 0.9317498927261336, "learning_rate": 1.757517727166728e-05, "loss": 0.5163, "step": 3072 }, { "epoch": 0.25, "grad_norm": 0.884093708449396, "learning_rate": 1.7573458492761802e-05, "loss": 0.473, "step": 3073 }, { "epoch": 0.25, "grad_norm": 0.9546080085115011, "learning_rate": 1.7571739189022365e-05, "loss": 0.5957, "step": 3074 }, { "epoch": 0.25, "grad_norm": 1.0330919558923133, "learning_rate": 1.7570019360568117e-05, "loss": 0.5716, "step": 3075 }, { "epoch": 0.25, "grad_norm": 0.9623272014338214, "learning_rate": 1.7568299007518247e-05, "loss": 0.5663, "step": 3076 }, { "epoch": 0.25, "grad_norm": 0.9888262495455644, "learning_rate": 1.7566578129991966e-05, "loss": 0.6225, "step": 3077 }, { "epoch": 0.25, "grad_norm": 0.892592052293103, "learning_rate": 1.756485672810853e-05, "loss": 0.5885, "step": 3078 }, { "epoch": 0.25, "grad_norm": 0.9551702736887312, "learning_rate": 1.7563134801987235e-05, "loss": 0.6147, "step": 3079 }, { "epoch": 0.25, "grad_norm": 0.9954287831755133, "learning_rate": 1.7561412351747406e-05, "loss": 0.5876, "step": 3080 }, { "epoch": 0.25, "grad_norm": 0.9158818809267135, "learning_rate": 1.7559689377508413e-05, "loss": 0.5339, "step": 3081 }, { "epoch": 0.25, "grad_norm": 1.0174523252448435, "learning_rate": 1.7557965879389644e-05, "loss": 0.5493, "step": 3082 }, { "epoch": 0.25, "grad_norm": 0.8947025622357997, "learning_rate": 1.7556241857510547e-05, "loss": 0.5544, "step": 3083 }, { "epoch": 0.25, "grad_norm": 1.2270341670509768, "learning_rate": 1.7554517311990592e-05, "loss": 0.6078, "step": 3084 }, { "epoch": 0.25, "grad_norm": 1.0170400035892802, "learning_rate": 1.7552792242949287e-05, "loss": 0.6123, "step": 3085 }, { "epoch": 0.25, "grad_norm": 0.9139996199480314, "learning_rate": 1.755106665050618e-05, "loss": 0.5805, "step": 3086 }, { "epoch": 0.25, "grad_norm": 0.9622769309264265, "learning_rate": 1.7549340534780852e-05, "loss": 0.6121, "step": 3087 }, { "epoch": 0.25, "grad_norm": 0.9809266179096972, "learning_rate": 1.754761389589292e-05, "loss": 0.6003, "step": 3088 }, { "epoch": 0.25, "grad_norm": 0.9361453357606391, "learning_rate": 1.7545886733962044e-05, "loss": 0.5877, "step": 3089 }, { "epoch": 0.25, "grad_norm": 0.9521538902460409, "learning_rate": 1.7544159049107902e-05, "loss": 0.5804, "step": 3090 }, { "epoch": 0.25, "grad_norm": 0.8638250061554852, "learning_rate": 1.7542430841450236e-05, "loss": 0.5138, "step": 3091 }, { "epoch": 0.25, "grad_norm": 0.8724382049228834, "learning_rate": 1.7540702111108803e-05, "loss": 0.5124, "step": 3092 }, { "epoch": 0.25, "grad_norm": 1.0254553888380893, "learning_rate": 1.7538972858203397e-05, "loss": 0.5292, "step": 3093 }, { "epoch": 0.25, "grad_norm": 0.9330508544120452, "learning_rate": 1.7537243082853866e-05, "loss": 0.5537, "step": 3094 }, { "epoch": 0.25, "grad_norm": 0.9432771599677009, "learning_rate": 1.753551278518007e-05, "loss": 0.579, "step": 3095 }, { "epoch": 0.25, "grad_norm": 0.8919899667127843, "learning_rate": 1.7533781965301924e-05, "loss": 0.4923, "step": 3096 }, { "epoch": 0.25, "grad_norm": 0.961077027530501, "learning_rate": 1.753205062333937e-05, "loss": 0.597, "step": 3097 }, { "epoch": 0.25, "grad_norm": 0.9069964785933305, "learning_rate": 1.753031875941239e-05, "loss": 0.5856, "step": 3098 }, { "epoch": 0.25, "grad_norm": 0.9659625727638037, "learning_rate": 1.7528586373640997e-05, "loss": 0.5876, "step": 3099 }, { "epoch": 0.25, "grad_norm": 1.0598241751866497, "learning_rate": 1.7526853466145248e-05, "loss": 0.632, "step": 3100 }, { "epoch": 0.25, "grad_norm": 1.0778620313469889, "learning_rate": 1.7525120037045227e-05, "loss": 0.5799, "step": 3101 }, { "epoch": 0.25, "grad_norm": 0.8954798615992657, "learning_rate": 1.7523386086461065e-05, "loss": 0.6049, "step": 3102 }, { "epoch": 0.25, "grad_norm": 0.935250732427432, "learning_rate": 1.7521651614512918e-05, "loss": 0.6179, "step": 3103 }, { "epoch": 0.25, "grad_norm": 1.0023476270439846, "learning_rate": 1.751991662132099e-05, "loss": 0.5978, "step": 3104 }, { "epoch": 0.25, "grad_norm": 0.9002821778685863, "learning_rate": 1.751818110700551e-05, "loss": 0.5656, "step": 3105 }, { "epoch": 0.25, "grad_norm": 0.8800925304899958, "learning_rate": 1.751644507168674e-05, "loss": 0.5569, "step": 3106 }, { "epoch": 0.25, "grad_norm": 0.9330859593957906, "learning_rate": 1.7514708515485002e-05, "loss": 0.6456, "step": 3107 }, { "epoch": 0.25, "grad_norm": 0.9532235450165991, "learning_rate": 1.7512971438520626e-05, "loss": 0.5808, "step": 3108 }, { "epoch": 0.25, "grad_norm": 0.8998905570101068, "learning_rate": 1.7511233840913994e-05, "loss": 0.5211, "step": 3109 }, { "epoch": 0.25, "grad_norm": 0.9128687245400994, "learning_rate": 1.7509495722785518e-05, "loss": 0.5159, "step": 3110 }, { "epoch": 0.25, "grad_norm": 1.075402357728703, "learning_rate": 1.7507757084255652e-05, "loss": 0.6144, "step": 3111 }, { "epoch": 0.25, "grad_norm": 0.9429480058993237, "learning_rate": 1.750601792544488e-05, "loss": 0.5949, "step": 3112 }, { "epoch": 0.25, "grad_norm": 0.9129715759556368, "learning_rate": 1.750427824647372e-05, "loss": 0.5553, "step": 3113 }, { "epoch": 0.25, "grad_norm": 0.9127366900187914, "learning_rate": 1.7502538047462737e-05, "loss": 0.5545, "step": 3114 }, { "epoch": 0.25, "grad_norm": 0.9397256695327718, "learning_rate": 1.750079732853252e-05, "loss": 0.5552, "step": 3115 }, { "epoch": 0.25, "grad_norm": 1.0024073480566253, "learning_rate": 1.74990560898037e-05, "loss": 0.6514, "step": 3116 }, { "epoch": 0.25, "grad_norm": 0.9620678706748186, "learning_rate": 1.7497314331396946e-05, "loss": 0.616, "step": 3117 }, { "epoch": 0.25, "grad_norm": 0.980921040306892, "learning_rate": 1.7495572053432962e-05, "loss": 0.6389, "step": 3118 }, { "epoch": 0.25, "grad_norm": 0.93016075302683, "learning_rate": 1.749382925603248e-05, "loss": 0.5954, "step": 3119 }, { "epoch": 0.25, "grad_norm": 0.9513139305328434, "learning_rate": 1.749208593931628e-05, "loss": 0.5899, "step": 3120 }, { "epoch": 0.25, "grad_norm": 0.9098228343772, "learning_rate": 1.7490342103405168e-05, "loss": 0.6347, "step": 3121 }, { "epoch": 0.25, "grad_norm": 0.9982505813028052, "learning_rate": 1.748859774841999e-05, "loss": 0.6127, "step": 3122 }, { "epoch": 0.25, "grad_norm": 0.9229136534442328, "learning_rate": 1.748685287448163e-05, "loss": 0.6267, "step": 3123 }, { "epoch": 0.25, "grad_norm": 0.8754740252477136, "learning_rate": 1.7485107481711014e-05, "loss": 0.6227, "step": 3124 }, { "epoch": 0.25, "grad_norm": 0.9122049883525993, "learning_rate": 1.748336157022908e-05, "loss": 0.5726, "step": 3125 }, { "epoch": 0.25, "grad_norm": 0.8813794257915903, "learning_rate": 1.7481615140156837e-05, "loss": 0.5837, "step": 3126 }, { "epoch": 0.25, "grad_norm": 1.0392588455372822, "learning_rate": 1.747986819161529e-05, "loss": 0.5378, "step": 3127 }, { "epoch": 0.25, "grad_norm": 1.063738607824746, "learning_rate": 1.747812072472552e-05, "loss": 0.6057, "step": 3128 }, { "epoch": 0.25, "grad_norm": 1.0799665881792668, "learning_rate": 1.7476372739608615e-05, "loss": 0.6201, "step": 3129 }, { "epoch": 0.25, "grad_norm": 0.922842839007906, "learning_rate": 1.7474624236385706e-05, "loss": 0.5879, "step": 3130 }, { "epoch": 0.25, "grad_norm": 0.9427134960414176, "learning_rate": 1.747287521517797e-05, "loss": 0.5887, "step": 3131 }, { "epoch": 0.25, "grad_norm": 0.9182269254202275, "learning_rate": 1.7471125676106613e-05, "loss": 0.6023, "step": 3132 }, { "epoch": 0.25, "grad_norm": 0.992985791336242, "learning_rate": 1.7469375619292873e-05, "loss": 0.5516, "step": 3133 }, { "epoch": 0.25, "grad_norm": 1.0000627224938892, "learning_rate": 1.7467625044858025e-05, "loss": 0.5806, "step": 3134 }, { "epoch": 0.25, "grad_norm": 1.0464342928375443, "learning_rate": 1.7465873952923386e-05, "loss": 0.6352, "step": 3135 }, { "epoch": 0.25, "grad_norm": 0.8489039252817476, "learning_rate": 1.7464122343610307e-05, "loss": 0.5423, "step": 3136 }, { "epoch": 0.25, "grad_norm": 0.8344056548884692, "learning_rate": 1.7462370217040167e-05, "loss": 0.6058, "step": 3137 }, { "epoch": 0.26, "grad_norm": 0.9413618313258731, "learning_rate": 1.7460617573334393e-05, "loss": 0.588, "step": 3138 }, { "epoch": 0.26, "grad_norm": 1.0008486902893026, "learning_rate": 1.7458864412614436e-05, "loss": 0.6089, "step": 3139 }, { "epoch": 0.26, "grad_norm": 0.9376106409264786, "learning_rate": 1.745711073500179e-05, "loss": 0.571, "step": 3140 }, { "epoch": 0.26, "grad_norm": 0.887158908415457, "learning_rate": 1.7455356540617988e-05, "loss": 0.5797, "step": 3141 }, { "epoch": 0.26, "grad_norm": 0.9221618609988214, "learning_rate": 1.745360182958459e-05, "loss": 0.5654, "step": 3142 }, { "epoch": 0.26, "grad_norm": 0.9051428050775634, "learning_rate": 1.7451846602023196e-05, "loss": 0.5332, "step": 3143 }, { "epoch": 0.26, "grad_norm": 0.972614082330035, "learning_rate": 1.745009085805544e-05, "loss": 0.5517, "step": 3144 }, { "epoch": 0.26, "grad_norm": 1.0267964133100653, "learning_rate": 1.7448334597803e-05, "loss": 0.5996, "step": 3145 }, { "epoch": 0.26, "grad_norm": 0.8669897880071976, "learning_rate": 1.7446577821387575e-05, "loss": 0.5437, "step": 3146 }, { "epoch": 0.26, "grad_norm": 0.9444554670732166, "learning_rate": 1.7444820528930914e-05, "loss": 0.5424, "step": 3147 }, { "epoch": 0.26, "grad_norm": 1.1496699362879719, "learning_rate": 1.7443062720554796e-05, "loss": 0.5784, "step": 3148 }, { "epoch": 0.26, "grad_norm": 1.0157973115543528, "learning_rate": 1.744130439638103e-05, "loss": 0.634, "step": 3149 }, { "epoch": 0.26, "grad_norm": 0.8366603399421715, "learning_rate": 1.7439545556531473e-05, "loss": 0.5062, "step": 3150 }, { "epoch": 0.26, "grad_norm": 0.97088446679413, "learning_rate": 1.7437786201128003e-05, "loss": 0.5379, "step": 3151 }, { "epoch": 0.26, "grad_norm": 0.9939309659717451, "learning_rate": 1.743602633029255e-05, "loss": 0.5846, "step": 3152 }, { "epoch": 0.26, "grad_norm": 0.8990135518929009, "learning_rate": 1.7434265944147068e-05, "loss": 0.5164, "step": 3153 }, { "epoch": 0.26, "grad_norm": 1.019817941557951, "learning_rate": 1.743250504281355e-05, "loss": 0.6309, "step": 3154 }, { "epoch": 0.26, "grad_norm": 0.8928122311880572, "learning_rate": 1.7430743626414024e-05, "loss": 0.4856, "step": 3155 }, { "epoch": 0.26, "grad_norm": 0.9782714945133913, "learning_rate": 1.7428981695070558e-05, "loss": 0.6257, "step": 3156 }, { "epoch": 0.26, "grad_norm": 0.8879693848442041, "learning_rate": 1.7427219248905246e-05, "loss": 0.5216, "step": 3157 }, { "epoch": 0.26, "grad_norm": 1.089049966696691, "learning_rate": 1.7425456288040236e-05, "loss": 0.6037, "step": 3158 }, { "epoch": 0.26, "grad_norm": 0.9056987893721266, "learning_rate": 1.7423692812597682e-05, "loss": 0.5546, "step": 3159 }, { "epoch": 0.26, "grad_norm": 1.0574500523825905, "learning_rate": 1.7421928822699805e-05, "loss": 0.5797, "step": 3160 }, { "epoch": 0.26, "grad_norm": 1.0050064235457727, "learning_rate": 1.7420164318468845e-05, "loss": 0.6173, "step": 3161 }, { "epoch": 0.26, "grad_norm": 0.9148162925439718, "learning_rate": 1.741839930002708e-05, "loss": 0.5662, "step": 3162 }, { "epoch": 0.26, "grad_norm": 0.8762180869341742, "learning_rate": 1.741663376749682e-05, "loss": 0.5885, "step": 3163 }, { "epoch": 0.26, "grad_norm": 0.8594642521189727, "learning_rate": 1.7414867721000423e-05, "loss": 0.5508, "step": 3164 }, { "epoch": 0.26, "grad_norm": 0.9794014670832364, "learning_rate": 1.7413101160660267e-05, "loss": 0.5757, "step": 3165 }, { "epoch": 0.26, "grad_norm": 0.9907022513665806, "learning_rate": 1.7411334086598775e-05, "loss": 0.5629, "step": 3166 }, { "epoch": 0.26, "grad_norm": 0.8991676755470959, "learning_rate": 1.7409566498938405e-05, "loss": 0.5953, "step": 3167 }, { "epoch": 0.26, "grad_norm": 1.091552960265873, "learning_rate": 1.740779839780165e-05, "loss": 0.5151, "step": 3168 }, { "epoch": 0.26, "grad_norm": 1.0550607969869947, "learning_rate": 1.7406029783311036e-05, "loss": 0.6001, "step": 3169 }, { "epoch": 0.26, "grad_norm": 1.023053375591623, "learning_rate": 1.7404260655589128e-05, "loss": 0.6253, "step": 3170 }, { "epoch": 0.26, "grad_norm": 1.0158274844468704, "learning_rate": 1.7402491014758526e-05, "loss": 0.565, "step": 3171 }, { "epoch": 0.26, "grad_norm": 0.8856276226898813, "learning_rate": 1.740072086094186e-05, "loss": 0.5784, "step": 3172 }, { "epoch": 0.26, "grad_norm": 0.9297964339077929, "learning_rate": 1.7398950194261803e-05, "loss": 0.4888, "step": 3173 }, { "epoch": 0.26, "grad_norm": 1.0394334239483127, "learning_rate": 1.739717901484106e-05, "loss": 0.6169, "step": 3174 }, { "epoch": 0.26, "grad_norm": 1.000153477231492, "learning_rate": 1.7395407322802374e-05, "loss": 0.581, "step": 3175 }, { "epoch": 0.26, "grad_norm": 0.9310772778163637, "learning_rate": 1.739363511826852e-05, "loss": 0.5245, "step": 3176 }, { "epoch": 0.26, "grad_norm": 0.9003978982215697, "learning_rate": 1.739186240136231e-05, "loss": 0.5652, "step": 3177 }, { "epoch": 0.26, "grad_norm": 1.0088358143669862, "learning_rate": 1.7390089172206594e-05, "loss": 0.6085, "step": 3178 }, { "epoch": 0.26, "grad_norm": 0.9211698202843335, "learning_rate": 1.7388315430924253e-05, "loss": 0.5584, "step": 3179 }, { "epoch": 0.26, "grad_norm": 0.8748172476650534, "learning_rate": 1.73865411776382e-05, "loss": 0.5751, "step": 3180 }, { "epoch": 0.26, "grad_norm": 0.8417660121346013, "learning_rate": 1.7384766412471405e-05, "loss": 0.5499, "step": 3181 }, { "epoch": 0.26, "grad_norm": 0.9408868417098869, "learning_rate": 1.7382991135546842e-05, "loss": 0.6425, "step": 3182 }, { "epoch": 0.26, "grad_norm": 0.940025872410451, "learning_rate": 1.7381215346987538e-05, "loss": 0.5604, "step": 3183 }, { "epoch": 0.26, "grad_norm": 0.8701094523091236, "learning_rate": 1.7379439046916564e-05, "loss": 0.5617, "step": 3184 }, { "epoch": 0.26, "grad_norm": 0.9589378666465185, "learning_rate": 1.737766223545701e-05, "loss": 0.5457, "step": 3185 }, { "epoch": 0.26, "grad_norm": 0.8717363773584366, "learning_rate": 1.7375884912732004e-05, "loss": 0.5277, "step": 3186 }, { "epoch": 0.26, "grad_norm": 0.9283899269806596, "learning_rate": 1.7374107078864716e-05, "loss": 0.6118, "step": 3187 }, { "epoch": 0.26, "grad_norm": 0.9544424219775307, "learning_rate": 1.7372328733978348e-05, "loss": 0.5704, "step": 3188 }, { "epoch": 0.26, "grad_norm": 0.9255212308304944, "learning_rate": 1.737054987819614e-05, "loss": 0.6063, "step": 3189 }, { "epoch": 0.26, "grad_norm": 0.9358163965712596, "learning_rate": 1.7368770511641365e-05, "loss": 0.5423, "step": 3190 }, { "epoch": 0.26, "grad_norm": 0.9010266984747565, "learning_rate": 1.7366990634437328e-05, "loss": 0.6014, "step": 3191 }, { "epoch": 0.26, "grad_norm": 0.9571620756129031, "learning_rate": 1.736521024670737e-05, "loss": 0.5905, "step": 3192 }, { "epoch": 0.26, "grad_norm": 0.9217502531680845, "learning_rate": 1.736342934857488e-05, "loss": 0.6337, "step": 3193 }, { "epoch": 0.26, "grad_norm": 0.8466450609798067, "learning_rate": 1.7361647940163266e-05, "loss": 0.5326, "step": 3194 }, { "epoch": 0.26, "grad_norm": 0.9985771457839538, "learning_rate": 1.735986602159598e-05, "loss": 0.6586, "step": 3195 }, { "epoch": 0.26, "grad_norm": 0.9246511819018729, "learning_rate": 1.7358083592996507e-05, "loss": 0.5962, "step": 3196 }, { "epoch": 0.26, "grad_norm": 0.9695534053036614, "learning_rate": 1.7356300654488367e-05, "loss": 0.6388, "step": 3197 }, { "epoch": 0.26, "grad_norm": 0.8667598153295203, "learning_rate": 1.7354517206195115e-05, "loss": 0.561, "step": 3198 }, { "epoch": 0.26, "grad_norm": 0.8842929810545559, "learning_rate": 1.7352733248240347e-05, "loss": 0.543, "step": 3199 }, { "epoch": 0.26, "grad_norm": 0.893497524895569, "learning_rate": 1.7350948780747684e-05, "loss": 0.5941, "step": 3200 }, { "epoch": 0.26, "grad_norm": 0.8583532551872464, "learning_rate": 1.734916380384079e-05, "loss": 0.512, "step": 3201 }, { "epoch": 0.26, "grad_norm": 1.0429301416865169, "learning_rate": 1.7347378317643368e-05, "loss": 0.6715, "step": 3202 }, { "epoch": 0.26, "grad_norm": 0.9002956701588745, "learning_rate": 1.7345592322279143e-05, "loss": 0.5906, "step": 3203 }, { "epoch": 0.26, "grad_norm": 1.0766543533538397, "learning_rate": 1.7343805817871885e-05, "loss": 0.614, "step": 3204 }, { "epoch": 0.26, "grad_norm": 0.9758413415717146, "learning_rate": 1.73420188045454e-05, "loss": 0.6033, "step": 3205 }, { "epoch": 0.26, "grad_norm": 1.0511316691987547, "learning_rate": 1.734023128242352e-05, "loss": 0.5785, "step": 3206 }, { "epoch": 0.26, "grad_norm": 0.8933442945857915, "learning_rate": 1.7338443251630125e-05, "loss": 0.5391, "step": 3207 }, { "epoch": 0.26, "grad_norm": 0.8938768216765625, "learning_rate": 1.7336654712289125e-05, "loss": 0.5729, "step": 3208 }, { "epoch": 0.26, "grad_norm": 0.9178495968780668, "learning_rate": 1.733486566452446e-05, "loss": 0.6117, "step": 3209 }, { "epoch": 0.26, "grad_norm": 1.0081580578635072, "learning_rate": 1.733307610846011e-05, "loss": 0.549, "step": 3210 }, { "epoch": 0.26, "grad_norm": 0.9144248732928212, "learning_rate": 1.7331286044220086e-05, "loss": 0.5454, "step": 3211 }, { "epoch": 0.26, "grad_norm": 1.0079526594157142, "learning_rate": 1.7329495471928446e-05, "loss": 0.5632, "step": 3212 }, { "epoch": 0.26, "grad_norm": 0.9673535210182386, "learning_rate": 1.732770439170927e-05, "loss": 0.5216, "step": 3213 }, { "epoch": 0.26, "grad_norm": 0.8366252919513592, "learning_rate": 1.732591280368668e-05, "loss": 0.5358, "step": 3214 }, { "epoch": 0.26, "grad_norm": 0.8997963514421738, "learning_rate": 1.732412070798483e-05, "loss": 0.544, "step": 3215 }, { "epoch": 0.26, "grad_norm": 1.0674135355568224, "learning_rate": 1.732232810472791e-05, "loss": 0.6194, "step": 3216 }, { "epoch": 0.26, "grad_norm": 0.9130503380839831, "learning_rate": 1.7320534994040148e-05, "loss": 0.5702, "step": 3217 }, { "epoch": 0.26, "grad_norm": 0.9125939393730079, "learning_rate": 1.7318741376045806e-05, "loss": 0.5232, "step": 3218 }, { "epoch": 0.26, "grad_norm": 0.9152888605297167, "learning_rate": 1.731694725086918e-05, "loss": 0.5589, "step": 3219 }, { "epoch": 0.26, "grad_norm": 0.918053811287748, "learning_rate": 1.7315152618634594e-05, "loss": 0.5515, "step": 3220 }, { "epoch": 0.26, "grad_norm": 0.9517138283797776, "learning_rate": 1.731335747946642e-05, "loss": 0.491, "step": 3221 }, { "epoch": 0.26, "grad_norm": 0.9695937631844701, "learning_rate": 1.7311561833489065e-05, "loss": 0.6387, "step": 3222 }, { "epoch": 0.26, "grad_norm": 0.9344378625169966, "learning_rate": 1.730976568082696e-05, "loss": 0.5961, "step": 3223 }, { "epoch": 0.26, "grad_norm": 0.9918672455564193, "learning_rate": 1.7307969021604574e-05, "loss": 0.5898, "step": 3224 }, { "epoch": 0.26, "grad_norm": 0.9510075197303961, "learning_rate": 1.730617185594642e-05, "loss": 0.6031, "step": 3225 }, { "epoch": 0.26, "grad_norm": 0.9366114096543621, "learning_rate": 1.7304374183977032e-05, "loss": 0.6217, "step": 3226 }, { "epoch": 0.26, "grad_norm": 1.0382255870102077, "learning_rate": 1.7302576005820997e-05, "loss": 0.6121, "step": 3227 }, { "epoch": 0.26, "grad_norm": 0.8940002944032726, "learning_rate": 1.730077732160292e-05, "loss": 0.5346, "step": 3228 }, { "epoch": 0.26, "grad_norm": 0.9389001006992637, "learning_rate": 1.729897813144745e-05, "loss": 0.6312, "step": 3229 }, { "epoch": 0.26, "grad_norm": 1.027925431969123, "learning_rate": 1.729717843547927e-05, "loss": 0.6487, "step": 3230 }, { "epoch": 0.26, "grad_norm": 0.973280328221155, "learning_rate": 1.7295378233823096e-05, "loss": 0.5505, "step": 3231 }, { "epoch": 0.26, "grad_norm": 0.8950161903045436, "learning_rate": 1.7293577526603684e-05, "loss": 0.5313, "step": 3232 }, { "epoch": 0.26, "grad_norm": 1.0185304119690635, "learning_rate": 1.7291776313945817e-05, "loss": 0.5965, "step": 3233 }, { "epoch": 0.26, "grad_norm": 0.9786576159121729, "learning_rate": 1.728997459597432e-05, "loss": 0.5804, "step": 3234 }, { "epoch": 0.26, "grad_norm": 1.0669721345581529, "learning_rate": 1.728817237281405e-05, "loss": 0.5765, "step": 3235 }, { "epoch": 0.26, "grad_norm": 0.9704256322697749, "learning_rate": 1.7286369644589897e-05, "loss": 0.631, "step": 3236 }, { "epoch": 0.26, "grad_norm": 0.9319412904056394, "learning_rate": 1.728456641142679e-05, "loss": 0.5408, "step": 3237 }, { "epoch": 0.26, "grad_norm": 1.0092120792907255, "learning_rate": 1.7282762673449695e-05, "loss": 0.6245, "step": 3238 }, { "epoch": 0.26, "grad_norm": 0.9394081643734624, "learning_rate": 1.7280958430783608e-05, "loss": 0.6109, "step": 3239 }, { "epoch": 0.26, "grad_norm": 0.9187040965972737, "learning_rate": 1.7279153683553556e-05, "loss": 0.574, "step": 3240 }, { "epoch": 0.26, "grad_norm": 0.890777039494145, "learning_rate": 1.7277348431884613e-05, "loss": 0.5282, "step": 3241 }, { "epoch": 0.26, "grad_norm": 0.9816871876859573, "learning_rate": 1.7275542675901876e-05, "loss": 0.5562, "step": 3242 }, { "epoch": 0.26, "grad_norm": 1.0160416140142166, "learning_rate": 1.7273736415730488e-05, "loss": 0.6288, "step": 3243 }, { "epoch": 0.26, "grad_norm": 0.976610433292306, "learning_rate": 1.7271929651495617e-05, "loss": 0.5985, "step": 3244 }, { "epoch": 0.26, "grad_norm": 0.9581355875399314, "learning_rate": 1.7270122383322473e-05, "loss": 0.6334, "step": 3245 }, { "epoch": 0.26, "grad_norm": 1.1556542224283102, "learning_rate": 1.7268314611336296e-05, "loss": 0.5319, "step": 3246 }, { "epoch": 0.26, "grad_norm": 1.0106194182617843, "learning_rate": 1.726650633566236e-05, "loss": 0.6098, "step": 3247 }, { "epoch": 0.26, "grad_norm": 0.9072489025762995, "learning_rate": 1.726469755642598e-05, "loss": 0.5464, "step": 3248 }, { "epoch": 0.26, "grad_norm": 0.8495528879265195, "learning_rate": 1.7262888273752505e-05, "loss": 0.5391, "step": 3249 }, { "epoch": 0.26, "grad_norm": 0.9946732239643433, "learning_rate": 1.7261078487767317e-05, "loss": 0.5863, "step": 3250 }, { "epoch": 0.26, "grad_norm": 0.9445316998047976, "learning_rate": 1.7259268198595828e-05, "loss": 0.5619, "step": 3251 }, { "epoch": 0.26, "grad_norm": 0.9169313514431681, "learning_rate": 1.7257457406363495e-05, "loss": 0.5631, "step": 3252 }, { "epoch": 0.26, "grad_norm": 0.9302146760899365, "learning_rate": 1.72556461111958e-05, "loss": 0.5708, "step": 3253 }, { "epoch": 0.26, "grad_norm": 0.9320385142628118, "learning_rate": 1.725383431321826e-05, "loss": 0.6121, "step": 3254 }, { "epoch": 0.26, "grad_norm": 0.9053121367203344, "learning_rate": 1.725202201255644e-05, "loss": 0.5386, "step": 3255 }, { "epoch": 0.26, "grad_norm": 0.9140031954486985, "learning_rate": 1.725020920933593e-05, "loss": 0.6397, "step": 3256 }, { "epoch": 0.26, "grad_norm": 1.0485591789305269, "learning_rate": 1.7248395903682347e-05, "loss": 0.6717, "step": 3257 }, { "epoch": 0.26, "grad_norm": 0.9622445974231173, "learning_rate": 1.724658209572136e-05, "loss": 0.5676, "step": 3258 }, { "epoch": 0.26, "grad_norm": 0.9008820906676879, "learning_rate": 1.724476778557866e-05, "loss": 0.5995, "step": 3259 }, { "epoch": 0.26, "grad_norm": 0.8493129216065038, "learning_rate": 1.7242952973379983e-05, "loss": 0.536, "step": 3260 }, { "epoch": 0.27, "grad_norm": 1.0225594519850092, "learning_rate": 1.7241137659251087e-05, "loss": 0.5215, "step": 3261 }, { "epoch": 0.27, "grad_norm": 0.9949755006324897, "learning_rate": 1.723932184331777e-05, "loss": 0.5143, "step": 3262 }, { "epoch": 0.27, "grad_norm": 1.0648617924766293, "learning_rate": 1.7237505525705875e-05, "loss": 0.6225, "step": 3263 }, { "epoch": 0.27, "grad_norm": 0.9205169832526819, "learning_rate": 1.7235688706541266e-05, "loss": 0.5073, "step": 3264 }, { "epoch": 0.27, "grad_norm": 1.0317464947048756, "learning_rate": 1.723387138594985e-05, "loss": 0.586, "step": 3265 }, { "epoch": 0.27, "grad_norm": 0.9378484870577685, "learning_rate": 1.723205356405756e-05, "loss": 0.5489, "step": 3266 }, { "epoch": 0.27, "grad_norm": 0.9899383610437812, "learning_rate": 1.7230235240990373e-05, "loss": 0.565, "step": 3267 }, { "epoch": 0.27, "grad_norm": 1.0498755184573376, "learning_rate": 1.72284164168743e-05, "loss": 0.5998, "step": 3268 }, { "epoch": 0.27, "grad_norm": 0.9073391433511635, "learning_rate": 1.7226597091835377e-05, "loss": 0.5493, "step": 3269 }, { "epoch": 0.27, "grad_norm": 0.8451787420231008, "learning_rate": 1.7224777265999688e-05, "loss": 0.5525, "step": 3270 }, { "epoch": 0.27, "grad_norm": 0.9488853496030325, "learning_rate": 1.722295693949334e-05, "loss": 0.5827, "step": 3271 }, { "epoch": 0.27, "grad_norm": 1.0495687319964833, "learning_rate": 1.7221136112442487e-05, "loss": 0.6238, "step": 3272 }, { "epoch": 0.27, "grad_norm": 0.94853336956771, "learning_rate": 1.7219314784973304e-05, "loss": 0.523, "step": 3273 }, { "epoch": 0.27, "grad_norm": 0.9628064262893254, "learning_rate": 1.721749295721201e-05, "loss": 0.5904, "step": 3274 }, { "epoch": 0.27, "grad_norm": 0.9345929252013101, "learning_rate": 1.7215670629284856e-05, "loss": 0.5494, "step": 3275 }, { "epoch": 0.27, "grad_norm": 0.8830597865479197, "learning_rate": 1.7213847801318128e-05, "loss": 0.5605, "step": 3276 }, { "epoch": 0.27, "grad_norm": 0.9572622089997017, "learning_rate": 1.7212024473438145e-05, "loss": 0.6316, "step": 3277 }, { "epoch": 0.27, "grad_norm": 0.9562194162392834, "learning_rate": 1.7210200645771268e-05, "loss": 0.5326, "step": 3278 }, { "epoch": 0.27, "grad_norm": 1.0553393174265637, "learning_rate": 1.7208376318443877e-05, "loss": 0.634, "step": 3279 }, { "epoch": 0.27, "grad_norm": 0.9970413860899692, "learning_rate": 1.72065514915824e-05, "loss": 0.6207, "step": 3280 }, { "epoch": 0.27, "grad_norm": 0.9318499998745525, "learning_rate": 1.72047261653133e-05, "loss": 0.5454, "step": 3281 }, { "epoch": 0.27, "grad_norm": 0.9223364403973837, "learning_rate": 1.7202900339763066e-05, "loss": 0.6575, "step": 3282 }, { "epoch": 0.27, "grad_norm": 0.929898055654162, "learning_rate": 1.7201074015058226e-05, "loss": 0.5548, "step": 3283 }, { "epoch": 0.27, "grad_norm": 0.9324984663522081, "learning_rate": 1.7199247191325347e-05, "loss": 0.6458, "step": 3284 }, { "epoch": 0.27, "grad_norm": 0.9534813809660702, "learning_rate": 1.7197419868691022e-05, "loss": 0.5562, "step": 3285 }, { "epoch": 0.27, "grad_norm": 0.953733613695882, "learning_rate": 1.719559204728188e-05, "loss": 0.6061, "step": 3286 }, { "epoch": 0.27, "grad_norm": 0.9526278381395721, "learning_rate": 1.7193763727224596e-05, "loss": 0.6133, "step": 3287 }, { "epoch": 0.27, "grad_norm": 0.9974347688291391, "learning_rate": 1.719193490864587e-05, "loss": 0.611, "step": 3288 }, { "epoch": 0.27, "grad_norm": 0.9452782503892461, "learning_rate": 1.719010559167243e-05, "loss": 0.5824, "step": 3289 }, { "epoch": 0.27, "grad_norm": 0.8358480245034036, "learning_rate": 1.7188275776431048e-05, "loss": 0.5364, "step": 3290 }, { "epoch": 0.27, "grad_norm": 0.918407324135863, "learning_rate": 1.7186445463048533e-05, "loss": 0.5966, "step": 3291 }, { "epoch": 0.27, "grad_norm": 0.9047942682262397, "learning_rate": 1.7184614651651723e-05, "loss": 0.5637, "step": 3292 }, { "epoch": 0.27, "grad_norm": 0.9728046891966552, "learning_rate": 1.718278334236749e-05, "loss": 0.6377, "step": 3293 }, { "epoch": 0.27, "grad_norm": 0.9405103120350403, "learning_rate": 1.7180951535322742e-05, "loss": 0.5708, "step": 3294 }, { "epoch": 0.27, "grad_norm": 0.8591298849891674, "learning_rate": 1.717911923064442e-05, "loss": 0.5249, "step": 3295 }, { "epoch": 0.27, "grad_norm": 0.8752323427602121, "learning_rate": 1.7177286428459505e-05, "loss": 0.5671, "step": 3296 }, { "epoch": 0.27, "grad_norm": 0.9414564811464446, "learning_rate": 1.717545312889501e-05, "loss": 0.5671, "step": 3297 }, { "epoch": 0.27, "grad_norm": 1.02907718626476, "learning_rate": 1.7173619332077972e-05, "loss": 0.6245, "step": 3298 }, { "epoch": 0.27, "grad_norm": 0.883595641347089, "learning_rate": 1.717178503813548e-05, "loss": 0.58, "step": 3299 }, { "epoch": 0.27, "grad_norm": 0.9260784685712767, "learning_rate": 1.7169950247194646e-05, "loss": 0.5683, "step": 3300 }, { "epoch": 0.27, "grad_norm": 0.9018370832687636, "learning_rate": 1.716811495938262e-05, "loss": 0.6049, "step": 3301 }, { "epoch": 0.27, "grad_norm": 0.9249549260329984, "learning_rate": 1.716627917482658e-05, "loss": 0.5928, "step": 3302 }, { "epoch": 0.27, "grad_norm": 0.950760435439383, "learning_rate": 1.716444289365376e-05, "loss": 0.592, "step": 3303 }, { "epoch": 0.27, "grad_norm": 1.0057267824279712, "learning_rate": 1.7162606115991395e-05, "loss": 0.6063, "step": 3304 }, { "epoch": 0.27, "grad_norm": 0.9109873091350366, "learning_rate": 1.7160768841966785e-05, "loss": 0.5169, "step": 3305 }, { "epoch": 0.27, "grad_norm": 0.9393355629622872, "learning_rate": 1.7158931071707242e-05, "loss": 0.6203, "step": 3306 }, { "epoch": 0.27, "grad_norm": 0.9332032789248998, "learning_rate": 1.7157092805340126e-05, "loss": 0.5535, "step": 3307 }, { "epoch": 0.27, "grad_norm": 0.9745364105146304, "learning_rate": 1.7155254042992827e-05, "loss": 0.554, "step": 3308 }, { "epoch": 0.27, "grad_norm": 0.8993080723884361, "learning_rate": 1.715341478479277e-05, "loss": 0.5725, "step": 3309 }, { "epoch": 0.27, "grad_norm": 0.9404230807613553, "learning_rate": 1.715157503086741e-05, "loss": 0.5964, "step": 3310 }, { "epoch": 0.27, "grad_norm": 1.0544615187533903, "learning_rate": 1.7149734781344247e-05, "loss": 0.6241, "step": 3311 }, { "epoch": 0.27, "grad_norm": 0.9740583185354336, "learning_rate": 1.7147894036350804e-05, "loss": 0.559, "step": 3312 }, { "epoch": 0.27, "grad_norm": 0.906313386144417, "learning_rate": 1.7146052796014646e-05, "loss": 0.5712, "step": 3313 }, { "epoch": 0.27, "grad_norm": 0.922204166116946, "learning_rate": 1.7144211060463368e-05, "loss": 0.5948, "step": 3314 }, { "epoch": 0.27, "grad_norm": 0.9019297900829841, "learning_rate": 1.7142368829824602e-05, "loss": 0.5747, "step": 3315 }, { "epoch": 0.27, "grad_norm": 0.8758645949867254, "learning_rate": 1.714052610422601e-05, "loss": 0.575, "step": 3316 }, { "epoch": 0.27, "grad_norm": 0.9339216019045296, "learning_rate": 1.7138682883795292e-05, "loss": 0.5893, "step": 3317 }, { "epoch": 0.27, "grad_norm": 0.892853688298582, "learning_rate": 1.713683916866018e-05, "loss": 0.5385, "step": 3318 }, { "epoch": 0.27, "grad_norm": 0.8869662452289221, "learning_rate": 1.7134994958948444e-05, "loss": 0.5819, "step": 3319 }, { "epoch": 0.27, "grad_norm": 0.9363151281743876, "learning_rate": 1.713315025478789e-05, "loss": 0.5378, "step": 3320 }, { "epoch": 0.27, "grad_norm": 0.9454884558940029, "learning_rate": 1.713130505630635e-05, "loss": 0.5796, "step": 3321 }, { "epoch": 0.27, "grad_norm": 0.9600187437555181, "learning_rate": 1.7129459363631692e-05, "loss": 0.5741, "step": 3322 }, { "epoch": 0.27, "grad_norm": 0.9129181561719604, "learning_rate": 1.7127613176891824e-05, "loss": 0.5531, "step": 3323 }, { "epoch": 0.27, "grad_norm": 0.8436607312653893, "learning_rate": 1.7125766496214687e-05, "loss": 0.5581, "step": 3324 }, { "epoch": 0.27, "grad_norm": 0.9634247002821723, "learning_rate": 1.712391932172825e-05, "loss": 0.5728, "step": 3325 }, { "epoch": 0.27, "grad_norm": 0.9110565883221894, "learning_rate": 1.712207165356053e-05, "loss": 0.6041, "step": 3326 }, { "epoch": 0.27, "grad_norm": 0.8991361572067563, "learning_rate": 1.7120223491839553e-05, "loss": 0.5818, "step": 3327 }, { "epoch": 0.27, "grad_norm": 0.9050015040122568, "learning_rate": 1.7118374836693407e-05, "loss": 0.6224, "step": 3328 }, { "epoch": 0.27, "grad_norm": 0.8981409776838637, "learning_rate": 1.71165256882502e-05, "loss": 0.616, "step": 3329 }, { "epoch": 0.27, "grad_norm": 0.9038761710482833, "learning_rate": 1.7114676046638076e-05, "loss": 0.577, "step": 3330 }, { "epoch": 0.27, "grad_norm": 0.9331379208091041, "learning_rate": 1.7112825911985207e-05, "loss": 0.5678, "step": 3331 }, { "epoch": 0.27, "grad_norm": 0.9866541584168005, "learning_rate": 1.7110975284419814e-05, "loss": 0.6266, "step": 3332 }, { "epoch": 0.27, "grad_norm": 0.938458776562073, "learning_rate": 1.7109124164070144e-05, "loss": 0.5473, "step": 3333 }, { "epoch": 0.27, "grad_norm": 0.9282242071375044, "learning_rate": 1.710727255106447e-05, "loss": 0.5782, "step": 3334 }, { "epoch": 0.27, "grad_norm": 0.9217922793401815, "learning_rate": 1.710542044553112e-05, "loss": 0.5536, "step": 3335 }, { "epoch": 0.27, "grad_norm": 0.8435229885105086, "learning_rate": 1.710356784759843e-05, "loss": 0.5292, "step": 3336 }, { "epoch": 0.27, "grad_norm": 0.8888554609533063, "learning_rate": 1.7101714757394792e-05, "loss": 0.6026, "step": 3337 }, { "epoch": 0.27, "grad_norm": 0.9747928015600499, "learning_rate": 1.7099861175048617e-05, "loss": 0.6023, "step": 3338 }, { "epoch": 0.27, "grad_norm": 1.0846321842053752, "learning_rate": 1.7098007100688362e-05, "loss": 0.5917, "step": 3339 }, { "epoch": 0.27, "grad_norm": 0.9185007365976529, "learning_rate": 1.7096152534442515e-05, "loss": 0.5267, "step": 3340 }, { "epoch": 0.27, "grad_norm": 0.8376651511974664, "learning_rate": 1.7094297476439585e-05, "loss": 0.5167, "step": 3341 }, { "epoch": 0.27, "grad_norm": 0.8460925887281229, "learning_rate": 1.7092441926808138e-05, "loss": 0.5659, "step": 3342 }, { "epoch": 0.27, "grad_norm": 0.9936106466309771, "learning_rate": 1.7090585885676753e-05, "loss": 0.5279, "step": 3343 }, { "epoch": 0.27, "grad_norm": 1.064099435320851, "learning_rate": 1.7088729353174054e-05, "loss": 0.5633, "step": 3344 }, { "epoch": 0.27, "grad_norm": 0.9457408843760768, "learning_rate": 1.7086872329428702e-05, "loss": 0.6306, "step": 3345 }, { "epoch": 0.27, "grad_norm": 0.8350392579479472, "learning_rate": 1.708501481456938e-05, "loss": 0.5424, "step": 3346 }, { "epoch": 0.27, "grad_norm": 0.9524852783334381, "learning_rate": 1.7083156808724817e-05, "loss": 0.6225, "step": 3347 }, { "epoch": 0.27, "grad_norm": 0.9605946312330363, "learning_rate": 1.7081298312023773e-05, "loss": 0.5867, "step": 3348 }, { "epoch": 0.27, "grad_norm": 1.0630205296787159, "learning_rate": 1.7079439324595038e-05, "loss": 0.5759, "step": 3349 }, { "epoch": 0.27, "grad_norm": 0.8757051239915971, "learning_rate": 1.7077579846567435e-05, "loss": 0.5896, "step": 3350 }, { "epoch": 0.27, "grad_norm": 0.911951669277883, "learning_rate": 1.7075719878069822e-05, "loss": 0.5148, "step": 3351 }, { "epoch": 0.27, "grad_norm": 0.9017122704150898, "learning_rate": 1.7073859419231104e-05, "loss": 0.5533, "step": 3352 }, { "epoch": 0.27, "grad_norm": 1.130210862193503, "learning_rate": 1.70719984701802e-05, "loss": 0.6358, "step": 3353 }, { "epoch": 0.27, "grad_norm": 1.015208620424021, "learning_rate": 1.7070137031046074e-05, "loss": 0.6288, "step": 3354 }, { "epoch": 0.27, "grad_norm": 0.98786674028703, "learning_rate": 1.7068275101957724e-05, "loss": 0.5977, "step": 3355 }, { "epoch": 0.27, "grad_norm": 0.9258362716954789, "learning_rate": 1.7066412683044176e-05, "loss": 0.5598, "step": 3356 }, { "epoch": 0.27, "grad_norm": 0.9371702688369251, "learning_rate": 1.7064549774434502e-05, "loss": 0.5474, "step": 3357 }, { "epoch": 0.27, "grad_norm": 0.9476739249381467, "learning_rate": 1.7062686376257792e-05, "loss": 0.6134, "step": 3358 }, { "epoch": 0.27, "grad_norm": 0.8500421461497814, "learning_rate": 1.706082248864318e-05, "loss": 0.538, "step": 3359 }, { "epoch": 0.27, "grad_norm": 0.8320273702343286, "learning_rate": 1.7058958111719836e-05, "loss": 0.5137, "step": 3360 }, { "epoch": 0.27, "grad_norm": 0.9597941408747482, "learning_rate": 1.7057093245616953e-05, "loss": 0.6073, "step": 3361 }, { "epoch": 0.27, "grad_norm": 0.9336154644932648, "learning_rate": 1.705522789046377e-05, "loss": 0.5306, "step": 3362 }, { "epoch": 0.27, "grad_norm": 0.9007035249997226, "learning_rate": 1.7053362046389553e-05, "loss": 0.5615, "step": 3363 }, { "epoch": 0.27, "grad_norm": 0.8606210782971025, "learning_rate": 1.7051495713523598e-05, "loss": 0.5333, "step": 3364 }, { "epoch": 0.27, "grad_norm": 1.037431116748183, "learning_rate": 1.7049628891995245e-05, "loss": 0.6167, "step": 3365 }, { "epoch": 0.27, "grad_norm": 0.8689014841123899, "learning_rate": 1.7047761581933867e-05, "loss": 0.5588, "step": 3366 }, { "epoch": 0.27, "grad_norm": 0.8812212259999989, "learning_rate": 1.704589378346886e-05, "loss": 0.5603, "step": 3367 }, { "epoch": 0.27, "grad_norm": 0.9829983554137566, "learning_rate": 1.7044025496729665e-05, "loss": 0.5547, "step": 3368 }, { "epoch": 0.27, "grad_norm": 0.9032978495659435, "learning_rate": 1.7042156721845754e-05, "loss": 0.497, "step": 3369 }, { "epoch": 0.27, "grad_norm": 0.8688514896023216, "learning_rate": 1.7040287458946623e-05, "loss": 0.5186, "step": 3370 }, { "epoch": 0.27, "grad_norm": 0.8515062816030871, "learning_rate": 1.7038417708161817e-05, "loss": 0.58, "step": 3371 }, { "epoch": 0.27, "grad_norm": 0.879654908280097, "learning_rate": 1.7036547469620908e-05, "loss": 0.5517, "step": 3372 }, { "epoch": 0.27, "grad_norm": 0.9111200905537216, "learning_rate": 1.70346767434535e-05, "loss": 0.5802, "step": 3373 }, { "epoch": 0.27, "grad_norm": 0.9598633626437251, "learning_rate": 1.7032805529789233e-05, "loss": 0.5801, "step": 3374 }, { "epoch": 0.27, "grad_norm": 0.9596570514252659, "learning_rate": 1.7030933828757785e-05, "loss": 0.5795, "step": 3375 }, { "epoch": 0.27, "grad_norm": 0.961433589710115, "learning_rate": 1.7029061640488855e-05, "loss": 0.5757, "step": 3376 }, { "epoch": 0.27, "grad_norm": 0.9407600851727718, "learning_rate": 1.702718896511219e-05, "loss": 0.5584, "step": 3377 }, { "epoch": 0.27, "grad_norm": 0.8088236897988393, "learning_rate": 1.7025315802757558e-05, "loss": 0.5237, "step": 3378 }, { "epoch": 0.27, "grad_norm": 0.9058206622747288, "learning_rate": 1.7023442153554776e-05, "loss": 0.554, "step": 3379 }, { "epoch": 0.27, "grad_norm": 0.9935709406567976, "learning_rate": 1.7021568017633683e-05, "loss": 0.6106, "step": 3380 }, { "epoch": 0.27, "grad_norm": 0.9189552590843816, "learning_rate": 1.7019693395124153e-05, "loss": 0.5729, "step": 3381 }, { "epoch": 0.27, "grad_norm": 0.9358733733184447, "learning_rate": 1.70178182861561e-05, "loss": 0.5921, "step": 3382 }, { "epoch": 0.27, "grad_norm": 0.8672815001532652, "learning_rate": 1.701594269085946e-05, "loss": 0.6447, "step": 3383 }, { "epoch": 0.28, "grad_norm": 0.9180923664460106, "learning_rate": 1.701406660936422e-05, "loss": 0.6046, "step": 3384 }, { "epoch": 0.28, "grad_norm": 1.0033780298482418, "learning_rate": 1.7012190041800384e-05, "loss": 0.5508, "step": 3385 }, { "epoch": 0.28, "grad_norm": 0.9146946932023774, "learning_rate": 1.7010312988297993e-05, "loss": 0.589, "step": 3386 }, { "epoch": 0.28, "grad_norm": 0.9725045335715133, "learning_rate": 1.7008435448987134e-05, "loss": 0.6529, "step": 3387 }, { "epoch": 0.28, "grad_norm": 0.9226018976731254, "learning_rate": 1.7006557423997917e-05, "loss": 0.5691, "step": 3388 }, { "epoch": 0.28, "grad_norm": 0.9765084315182954, "learning_rate": 1.7004678913460483e-05, "loss": 0.6344, "step": 3389 }, { "epoch": 0.28, "grad_norm": 1.072975921776912, "learning_rate": 1.7002799917505014e-05, "loss": 0.5671, "step": 3390 }, { "epoch": 0.28, "grad_norm": 0.9540991904709908, "learning_rate": 1.700092043626172e-05, "loss": 0.5818, "step": 3391 }, { "epoch": 0.28, "grad_norm": 0.9795979551718766, "learning_rate": 1.6999040469860852e-05, "loss": 0.5807, "step": 3392 }, { "epoch": 0.28, "grad_norm": 0.8628947844434518, "learning_rate": 1.6997160018432688e-05, "loss": 0.5442, "step": 3393 }, { "epoch": 0.28, "grad_norm": 0.8811809802727653, "learning_rate": 1.6995279082107537e-05, "loss": 0.5267, "step": 3394 }, { "epoch": 0.28, "grad_norm": 0.8814467993180549, "learning_rate": 1.6993397661015754e-05, "loss": 0.5939, "step": 3395 }, { "epoch": 0.28, "grad_norm": 0.892270166357497, "learning_rate": 1.6991515755287715e-05, "loss": 0.5586, "step": 3396 }, { "epoch": 0.28, "grad_norm": 0.9363609939411836, "learning_rate": 1.6989633365053837e-05, "loss": 0.5826, "step": 3397 }, { "epoch": 0.28, "grad_norm": 0.9460263488365313, "learning_rate": 1.6987750490444565e-05, "loss": 0.5738, "step": 3398 }, { "epoch": 0.28, "grad_norm": 0.8830829644992519, "learning_rate": 1.6985867131590383e-05, "loss": 0.6229, "step": 3399 }, { "epoch": 0.28, "grad_norm": 1.0074862329859187, "learning_rate": 1.6983983288621807e-05, "loss": 0.6148, "step": 3400 }, { "epoch": 0.28, "grad_norm": 0.8830992932928085, "learning_rate": 1.6982098961669383e-05, "loss": 0.5447, "step": 3401 }, { "epoch": 0.28, "grad_norm": 0.9669231327429048, "learning_rate": 1.6980214150863692e-05, "loss": 0.5977, "step": 3402 }, { "epoch": 0.28, "grad_norm": 0.9375796919681045, "learning_rate": 1.6978328856335354e-05, "loss": 0.548, "step": 3403 }, { "epoch": 0.28, "grad_norm": 0.8929206405304951, "learning_rate": 1.6976443078215015e-05, "loss": 0.5252, "step": 3404 }, { "epoch": 0.28, "grad_norm": 0.8971001962743499, "learning_rate": 1.697455681663336e-05, "loss": 0.5643, "step": 3405 }, { "epoch": 0.28, "grad_norm": 0.8973473047538455, "learning_rate": 1.69726700717211e-05, "loss": 0.5758, "step": 3406 }, { "epoch": 0.28, "grad_norm": 1.0046297232651533, "learning_rate": 1.6970782843608994e-05, "loss": 0.5857, "step": 3407 }, { "epoch": 0.28, "grad_norm": 0.991860327566595, "learning_rate": 1.6968895132427817e-05, "loss": 0.5205, "step": 3408 }, { "epoch": 0.28, "grad_norm": 0.9344982001418214, "learning_rate": 1.696700693830839e-05, "loss": 0.5794, "step": 3409 }, { "epoch": 0.28, "grad_norm": 0.8598062778766361, "learning_rate": 1.6965118261381557e-05, "loss": 0.5485, "step": 3410 }, { "epoch": 0.28, "grad_norm": 0.9926075241949736, "learning_rate": 1.6963229101778215e-05, "loss": 0.5576, "step": 3411 }, { "epoch": 0.28, "grad_norm": 0.8706724751709342, "learning_rate": 1.696133945962927e-05, "loss": 0.5495, "step": 3412 }, { "epoch": 0.28, "grad_norm": 0.950973621277761, "learning_rate": 1.695944933506567e-05, "loss": 0.5053, "step": 3413 }, { "epoch": 0.28, "grad_norm": 1.0268360922092827, "learning_rate": 1.695755872821841e-05, "loss": 0.666, "step": 3414 }, { "epoch": 0.28, "grad_norm": 0.9053834277128515, "learning_rate": 1.6955667639218497e-05, "loss": 0.5685, "step": 3415 }, { "epoch": 0.28, "grad_norm": 1.082944483643275, "learning_rate": 1.695377606819699e-05, "loss": 0.5695, "step": 3416 }, { "epoch": 0.28, "grad_norm": 0.8542060396820743, "learning_rate": 1.6951884015284966e-05, "loss": 0.5983, "step": 3417 }, { "epoch": 0.28, "grad_norm": 0.9432598922230787, "learning_rate": 1.694999148061355e-05, "loss": 0.5653, "step": 3418 }, { "epoch": 0.28, "grad_norm": 0.9270649827866421, "learning_rate": 1.6948098464313886e-05, "loss": 0.5379, "step": 3419 }, { "epoch": 0.28, "grad_norm": 1.075767072500345, "learning_rate": 1.6946204966517165e-05, "loss": 0.5268, "step": 3420 }, { "epoch": 0.28, "grad_norm": 0.8705734453427241, "learning_rate": 1.6944310987354597e-05, "loss": 0.5886, "step": 3421 }, { "epoch": 0.28, "grad_norm": 0.9925730390794844, "learning_rate": 1.6942416526957438e-05, "loss": 0.6483, "step": 3422 }, { "epoch": 0.28, "grad_norm": 0.907248303985114, "learning_rate": 1.694052158545697e-05, "loss": 0.5032, "step": 3423 }, { "epoch": 0.28, "grad_norm": 0.9043231411925035, "learning_rate": 1.6938626162984516e-05, "loss": 0.5614, "step": 3424 }, { "epoch": 0.28, "grad_norm": 0.9461336445647923, "learning_rate": 1.6936730259671423e-05, "loss": 0.5063, "step": 3425 }, { "epoch": 0.28, "grad_norm": 0.9913856016305469, "learning_rate": 1.6934833875649074e-05, "loss": 0.5747, "step": 3426 }, { "epoch": 0.28, "grad_norm": 0.9253569554243637, "learning_rate": 1.693293701104889e-05, "loss": 0.5865, "step": 3427 }, { "epoch": 0.28, "grad_norm": 0.8644437828373465, "learning_rate": 1.693103966600232e-05, "loss": 0.5782, "step": 3428 }, { "epoch": 0.28, "grad_norm": 0.925353380941438, "learning_rate": 1.692914184064085e-05, "loss": 0.5667, "step": 3429 }, { "epoch": 0.28, "grad_norm": 0.9749291183376588, "learning_rate": 1.6927243535095995e-05, "loss": 0.5387, "step": 3430 }, { "epoch": 0.28, "grad_norm": 0.8819179929608888, "learning_rate": 1.6925344749499308e-05, "loss": 0.5265, "step": 3431 }, { "epoch": 0.28, "grad_norm": 0.8728789653879663, "learning_rate": 1.6923445483982376e-05, "loss": 0.5412, "step": 3432 }, { "epoch": 0.28, "grad_norm": 0.9042520287309848, "learning_rate": 1.6921545738676807e-05, "loss": 0.5677, "step": 3433 }, { "epoch": 0.28, "grad_norm": 0.9097494183903341, "learning_rate": 1.691964551371426e-05, "loss": 0.5969, "step": 3434 }, { "epoch": 0.28, "grad_norm": 0.9853158768766778, "learning_rate": 1.691774480922642e-05, "loss": 0.5842, "step": 3435 }, { "epoch": 0.28, "grad_norm": 0.8166520864997642, "learning_rate": 1.6915843625344997e-05, "loss": 0.5148, "step": 3436 }, { "epoch": 0.28, "grad_norm": 0.8609842442951521, "learning_rate": 1.6913941962201747e-05, "loss": 0.5372, "step": 3437 }, { "epoch": 0.28, "grad_norm": 0.8727021376495987, "learning_rate": 1.691203981992845e-05, "loss": 0.6025, "step": 3438 }, { "epoch": 0.28, "grad_norm": 0.9009716445148898, "learning_rate": 1.6910137198656925e-05, "loss": 0.5617, "step": 3439 }, { "epoch": 0.28, "grad_norm": 0.9791987469560515, "learning_rate": 1.6908234098519024e-05, "loss": 0.5147, "step": 3440 }, { "epoch": 0.28, "grad_norm": 0.9245555800454982, "learning_rate": 1.6906330519646622e-05, "loss": 0.6286, "step": 3441 }, { "epoch": 0.28, "grad_norm": 0.9655064426884022, "learning_rate": 1.6904426462171647e-05, "loss": 0.6019, "step": 3442 }, { "epoch": 0.28, "grad_norm": 0.8731797030694629, "learning_rate": 1.690252192622604e-05, "loss": 0.5452, "step": 3443 }, { "epoch": 0.28, "grad_norm": 0.930713438796544, "learning_rate": 1.6900616911941783e-05, "loss": 0.5488, "step": 3444 }, { "epoch": 0.28, "grad_norm": 0.9807267518738538, "learning_rate": 1.6898711419450897e-05, "loss": 0.5992, "step": 3445 }, { "epoch": 0.28, "grad_norm": 0.9244327054086132, "learning_rate": 1.689680544888543e-05, "loss": 0.5108, "step": 3446 }, { "epoch": 0.28, "grad_norm": 0.8835917131112488, "learning_rate": 1.6894899000377462e-05, "loss": 0.536, "step": 3447 }, { "epoch": 0.28, "grad_norm": 0.9100881276299515, "learning_rate": 1.689299207405911e-05, "loss": 0.6109, "step": 3448 }, { "epoch": 0.28, "grad_norm": 0.9843100087872552, "learning_rate": 1.6891084670062517e-05, "loss": 0.5497, "step": 3449 }, { "epoch": 0.28, "grad_norm": 0.9185017074722286, "learning_rate": 1.6889176788519876e-05, "loss": 0.5741, "step": 3450 }, { "epoch": 0.28, "grad_norm": 0.8697110048282695, "learning_rate": 1.6887268429563387e-05, "loss": 0.5425, "step": 3451 }, { "epoch": 0.28, "grad_norm": 1.0280757547635149, "learning_rate": 1.688535959332531e-05, "loss": 0.6179, "step": 3452 }, { "epoch": 0.28, "grad_norm": 1.0022941901330085, "learning_rate": 1.688345027993792e-05, "loss": 0.6289, "step": 3453 }, { "epoch": 0.28, "grad_norm": 1.0396302382621723, "learning_rate": 1.6881540489533527e-05, "loss": 0.6032, "step": 3454 }, { "epoch": 0.28, "grad_norm": 0.9853194918842026, "learning_rate": 1.6879630222244487e-05, "loss": 0.6601, "step": 3455 }, { "epoch": 0.28, "grad_norm": 0.9947368061129188, "learning_rate": 1.6877719478203172e-05, "loss": 0.4907, "step": 3456 }, { "epoch": 0.28, "grad_norm": 0.8532268987285451, "learning_rate": 1.6875808257541998e-05, "loss": 0.5378, "step": 3457 }, { "epoch": 0.28, "grad_norm": 0.9071315028978325, "learning_rate": 1.6873896560393413e-05, "loss": 0.5627, "step": 3458 }, { "epoch": 0.28, "grad_norm": 0.9136588626603805, "learning_rate": 1.687198438688989e-05, "loss": 0.5592, "step": 3459 }, { "epoch": 0.28, "grad_norm": 0.9719371861308499, "learning_rate": 1.6870071737163948e-05, "loss": 0.5675, "step": 3460 }, { "epoch": 0.28, "grad_norm": 0.9505609502388157, "learning_rate": 1.6868158611348124e-05, "loss": 0.6057, "step": 3461 }, { "epoch": 0.28, "grad_norm": 0.8039359178601575, "learning_rate": 1.6866245009575e-05, "loss": 0.4808, "step": 3462 }, { "epoch": 0.28, "grad_norm": 0.9390886461339968, "learning_rate": 1.686433093197719e-05, "loss": 0.6191, "step": 3463 }, { "epoch": 0.28, "grad_norm": 0.9434824431556162, "learning_rate": 1.686241637868734e-05, "loss": 0.5357, "step": 3464 }, { "epoch": 0.28, "grad_norm": 0.9336655413258358, "learning_rate": 1.6860501349838114e-05, "loss": 0.545, "step": 3465 }, { "epoch": 0.28, "grad_norm": 0.9940741569523487, "learning_rate": 1.685858584556223e-05, "loss": 0.5828, "step": 3466 }, { "epoch": 0.28, "grad_norm": 0.9147782411036046, "learning_rate": 1.6856669865992437e-05, "loss": 0.5497, "step": 3467 }, { "epoch": 0.28, "grad_norm": 0.846982347796428, "learning_rate": 1.68547534112615e-05, "loss": 0.5163, "step": 3468 }, { "epoch": 0.28, "grad_norm": 0.9498171544263488, "learning_rate": 1.685283648150223e-05, "loss": 0.6197, "step": 3469 }, { "epoch": 0.28, "grad_norm": 0.9199470650867926, "learning_rate": 1.6850919076847474e-05, "loss": 0.6108, "step": 3470 }, { "epoch": 0.28, "grad_norm": 1.0394969040093853, "learning_rate": 1.68490011974301e-05, "loss": 0.5443, "step": 3471 }, { "epoch": 0.28, "grad_norm": 0.8709749528807426, "learning_rate": 1.684708284338302e-05, "loss": 0.5751, "step": 3472 }, { "epoch": 0.28, "grad_norm": 0.855026890162834, "learning_rate": 1.684516401483917e-05, "loss": 0.5543, "step": 3473 }, { "epoch": 0.28, "grad_norm": 0.9374281824638894, "learning_rate": 1.6843244711931526e-05, "loss": 0.6044, "step": 3474 }, { "epoch": 0.28, "grad_norm": 0.9238721362463175, "learning_rate": 1.6841324934793096e-05, "loss": 0.558, "step": 3475 }, { "epoch": 0.28, "grad_norm": 0.9072585266971033, "learning_rate": 1.6839404683556914e-05, "loss": 0.4736, "step": 3476 }, { "epoch": 0.28, "grad_norm": 0.9143915587444463, "learning_rate": 1.6837483958356054e-05, "loss": 0.6134, "step": 3477 }, { "epoch": 0.28, "grad_norm": 0.983794665294586, "learning_rate": 1.6835562759323622e-05, "loss": 0.5457, "step": 3478 }, { "epoch": 0.28, "grad_norm": 0.9357786768311311, "learning_rate": 1.683364108659275e-05, "loss": 0.6289, "step": 3479 }, { "epoch": 0.28, "grad_norm": 0.8728876774010064, "learning_rate": 1.6831718940296617e-05, "loss": 0.5035, "step": 3480 }, { "epoch": 0.28, "grad_norm": 0.8528829004574374, "learning_rate": 1.6829796320568416e-05, "loss": 0.6151, "step": 3481 }, { "epoch": 0.28, "grad_norm": 0.954694895551522, "learning_rate": 1.6827873227541393e-05, "loss": 0.6269, "step": 3482 }, { "epoch": 0.28, "grad_norm": 0.9438303988784065, "learning_rate": 1.6825949661348812e-05, "loss": 0.5902, "step": 3483 }, { "epoch": 0.28, "grad_norm": 0.9450631524045531, "learning_rate": 1.682402562212397e-05, "loss": 0.5956, "step": 3484 }, { "epoch": 0.28, "grad_norm": 1.0221615000328568, "learning_rate": 1.6822101110000207e-05, "loss": 0.6432, "step": 3485 }, { "epoch": 0.28, "grad_norm": 0.9340166290868366, "learning_rate": 1.6820176125110886e-05, "loss": 0.6274, "step": 3486 }, { "epoch": 0.28, "grad_norm": 0.9159751842545095, "learning_rate": 1.681825066758941e-05, "loss": 0.5367, "step": 3487 }, { "epoch": 0.28, "grad_norm": 0.8341778683720619, "learning_rate": 1.6816324737569215e-05, "loss": 0.5904, "step": 3488 }, { "epoch": 0.28, "grad_norm": 0.9721777267639525, "learning_rate": 1.681439833518376e-05, "loss": 0.5366, "step": 3489 }, { "epoch": 0.28, "grad_norm": 0.8214085641113914, "learning_rate": 1.681247146056654e-05, "loss": 0.6098, "step": 3490 }, { "epoch": 0.28, "grad_norm": 0.8811256460597657, "learning_rate": 1.6810544113851096e-05, "loss": 0.5111, "step": 3491 }, { "epoch": 0.28, "grad_norm": 0.9484985396662438, "learning_rate": 1.6808616295170983e-05, "loss": 0.5921, "step": 3492 }, { "epoch": 0.28, "grad_norm": 0.8813089051357941, "learning_rate": 1.6806688004659803e-05, "loss": 0.4919, "step": 3493 }, { "epoch": 0.28, "grad_norm": 0.9816279349466661, "learning_rate": 1.6804759242451177e-05, "loss": 0.5854, "step": 3494 }, { "epoch": 0.28, "grad_norm": 0.862825517856782, "learning_rate": 1.6802830008678777e-05, "loss": 0.5217, "step": 3495 }, { "epoch": 0.28, "grad_norm": 0.9204080613604179, "learning_rate": 1.6800900303476286e-05, "loss": 0.5527, "step": 3496 }, { "epoch": 0.28, "grad_norm": 0.9825728162668876, "learning_rate": 1.679897012697744e-05, "loss": 0.5336, "step": 3497 }, { "epoch": 0.28, "grad_norm": 0.9982188434933679, "learning_rate": 1.6797039479315994e-05, "loss": 0.6479, "step": 3498 }, { "epoch": 0.28, "grad_norm": 0.913708551786946, "learning_rate": 1.679510836062574e-05, "loss": 0.5342, "step": 3499 }, { "epoch": 0.28, "grad_norm": 0.9274094365407106, "learning_rate": 1.6793176771040504e-05, "loss": 0.5949, "step": 3500 }, { "epoch": 0.28, "grad_norm": 0.9273663856313644, "learning_rate": 1.6791244710694144e-05, "loss": 0.5546, "step": 3501 }, { "epoch": 0.28, "grad_norm": 0.98301986004304, "learning_rate": 1.678931217972055e-05, "loss": 0.4835, "step": 3502 }, { "epoch": 0.28, "grad_norm": 0.84103745533568, "learning_rate": 1.6787379178253642e-05, "loss": 0.4985, "step": 3503 }, { "epoch": 0.28, "grad_norm": 0.9698588777438701, "learning_rate": 1.678544570642738e-05, "loss": 0.5881, "step": 3504 }, { "epoch": 0.28, "grad_norm": 0.951776115340784, "learning_rate": 1.6783511764375745e-05, "loss": 0.6007, "step": 3505 }, { "epoch": 0.28, "grad_norm": 0.9741451637827968, "learning_rate": 1.678157735223277e-05, "loss": 0.5965, "step": 3506 }, { "epoch": 0.29, "grad_norm": 0.9065524129003147, "learning_rate": 1.6779642470132487e-05, "loss": 0.5405, "step": 3507 }, { "epoch": 0.29, "grad_norm": 0.9250130238196568, "learning_rate": 1.6777707118209004e-05, "loss": 0.6035, "step": 3508 }, { "epoch": 0.29, "grad_norm": 0.9393499737202737, "learning_rate": 1.6775771296596427e-05, "loss": 0.5909, "step": 3509 }, { "epoch": 0.29, "grad_norm": 0.8589539160299299, "learning_rate": 1.677383500542891e-05, "loss": 0.5071, "step": 3510 }, { "epoch": 0.29, "grad_norm": 0.8916436165229539, "learning_rate": 1.6771898244840636e-05, "loss": 0.5841, "step": 3511 }, { "epoch": 0.29, "grad_norm": 0.8795522853651708, "learning_rate": 1.676996101496582e-05, "loss": 0.5698, "step": 3512 }, { "epoch": 0.29, "grad_norm": 0.9140986536382631, "learning_rate": 1.6768023315938708e-05, "loss": 0.6178, "step": 3513 }, { "epoch": 0.29, "grad_norm": 1.0834371963750848, "learning_rate": 1.6766085147893583e-05, "loss": 0.6471, "step": 3514 }, { "epoch": 0.29, "grad_norm": 0.935683335427111, "learning_rate": 1.6764146510964762e-05, "loss": 0.6731, "step": 3515 }, { "epoch": 0.29, "grad_norm": 0.8334877532242587, "learning_rate": 1.676220740528659e-05, "loss": 0.4929, "step": 3516 }, { "epoch": 0.29, "grad_norm": 0.8479848617399672, "learning_rate": 1.676026783099344e-05, "loss": 0.5383, "step": 3517 }, { "epoch": 0.29, "grad_norm": 0.9399644753508193, "learning_rate": 1.6758327788219722e-05, "loss": 0.5711, "step": 3518 }, { "epoch": 0.29, "grad_norm": 0.8976978401914122, "learning_rate": 1.6756387277099885e-05, "loss": 0.5118, "step": 3519 }, { "epoch": 0.29, "grad_norm": 0.9096358011034394, "learning_rate": 1.6754446297768404e-05, "loss": 0.5343, "step": 3520 }, { "epoch": 0.29, "grad_norm": 0.8756936077029496, "learning_rate": 1.6752504850359785e-05, "loss": 0.5871, "step": 3521 }, { "epoch": 0.29, "grad_norm": 0.8481418927380738, "learning_rate": 1.6750562935008572e-05, "loss": 0.5126, "step": 3522 }, { "epoch": 0.29, "grad_norm": 0.875692294080384, "learning_rate": 1.6748620551849333e-05, "loss": 0.6195, "step": 3523 }, { "epoch": 0.29, "grad_norm": 0.9289899034178508, "learning_rate": 1.6746677701016675e-05, "loss": 0.6014, "step": 3524 }, { "epoch": 0.29, "grad_norm": 0.798198379662321, "learning_rate": 1.674473438264524e-05, "loss": 0.5155, "step": 3525 }, { "epoch": 0.29, "grad_norm": 0.8364506278435189, "learning_rate": 1.674279059686969e-05, "loss": 0.5638, "step": 3526 }, { "epoch": 0.29, "grad_norm": 0.9674610184024626, "learning_rate": 1.6740846343824734e-05, "loss": 0.5908, "step": 3527 }, { "epoch": 0.29, "grad_norm": 0.8897752692194084, "learning_rate": 1.6738901623645107e-05, "loss": 0.5301, "step": 3528 }, { "epoch": 0.29, "grad_norm": 1.0070755438073646, "learning_rate": 1.6736956436465573e-05, "loss": 0.6214, "step": 3529 }, { "epoch": 0.29, "grad_norm": 0.853310413688308, "learning_rate": 1.6735010782420934e-05, "loss": 0.5819, "step": 3530 }, { "epoch": 0.29, "grad_norm": 1.002604659974855, "learning_rate": 1.6733064661646023e-05, "loss": 0.5757, "step": 3531 }, { "epoch": 0.29, "grad_norm": 0.8825996930380876, "learning_rate": 1.67311180742757e-05, "loss": 0.5913, "step": 3532 }, { "epoch": 0.29, "grad_norm": 0.9185858297292431, "learning_rate": 1.672917102044487e-05, "loss": 0.6152, "step": 3533 }, { "epoch": 0.29, "grad_norm": 0.898173400038949, "learning_rate": 1.6727223500288458e-05, "loss": 0.6118, "step": 3534 }, { "epoch": 0.29, "grad_norm": 1.0034091029734553, "learning_rate": 1.672527551394142e-05, "loss": 0.5808, "step": 3535 }, { "epoch": 0.29, "grad_norm": 0.882816022561754, "learning_rate": 1.6723327061538753e-05, "loss": 0.5979, "step": 3536 }, { "epoch": 0.29, "grad_norm": 0.8911535124949506, "learning_rate": 1.672137814321549e-05, "loss": 0.5396, "step": 3537 }, { "epoch": 0.29, "grad_norm": 0.8796186206450183, "learning_rate": 1.6719428759106676e-05, "loss": 0.5201, "step": 3538 }, { "epoch": 0.29, "grad_norm": 0.9169457655302597, "learning_rate": 1.6717478909347417e-05, "loss": 0.5862, "step": 3539 }, { "epoch": 0.29, "grad_norm": 0.9814266566832116, "learning_rate": 1.671552859407282e-05, "loss": 0.6225, "step": 3540 }, { "epoch": 0.29, "grad_norm": 0.8622223722404974, "learning_rate": 1.6713577813418058e-05, "loss": 0.5983, "step": 3541 }, { "epoch": 0.29, "grad_norm": 0.9993934642157488, "learning_rate": 1.67116265675183e-05, "loss": 0.5768, "step": 3542 }, { "epoch": 0.29, "grad_norm": 0.9092726740215555, "learning_rate": 1.6709674856508775e-05, "loss": 0.565, "step": 3543 }, { "epoch": 0.29, "grad_norm": 0.9232881931366909, "learning_rate": 1.6707722680524735e-05, "loss": 0.5864, "step": 3544 }, { "epoch": 0.29, "grad_norm": 0.9530394049860118, "learning_rate": 1.6705770039701464e-05, "loss": 0.5408, "step": 3545 }, { "epoch": 0.29, "grad_norm": 0.9506612173293659, "learning_rate": 1.670381693417428e-05, "loss": 0.6284, "step": 3546 }, { "epoch": 0.29, "grad_norm": 0.8434394278434862, "learning_rate": 1.6701863364078524e-05, "loss": 0.5427, "step": 3547 }, { "epoch": 0.29, "grad_norm": 0.9724131503049097, "learning_rate": 1.6699909329549583e-05, "loss": 0.4966, "step": 3548 }, { "epoch": 0.29, "grad_norm": 0.9893191331651435, "learning_rate": 1.669795483072287e-05, "loss": 0.5897, "step": 3549 }, { "epoch": 0.29, "grad_norm": 0.8701298054577694, "learning_rate": 1.6695999867733824e-05, "loss": 0.5424, "step": 3550 }, { "epoch": 0.29, "grad_norm": 0.9389970593482257, "learning_rate": 1.669404444071793e-05, "loss": 0.6622, "step": 3551 }, { "epoch": 0.29, "grad_norm": 0.9024202866939837, "learning_rate": 1.6692088549810695e-05, "loss": 0.4986, "step": 3552 }, { "epoch": 0.29, "grad_norm": 0.9196638626707861, "learning_rate": 1.6690132195147655e-05, "loss": 0.5415, "step": 3553 }, { "epoch": 0.29, "grad_norm": 0.9315837796528372, "learning_rate": 1.668817537686439e-05, "loss": 0.5866, "step": 3554 }, { "epoch": 0.29, "grad_norm": 0.9883947924463582, "learning_rate": 1.6686218095096506e-05, "loss": 0.5928, "step": 3555 }, { "epoch": 0.29, "grad_norm": 0.9627628774229461, "learning_rate": 1.6684260349979637e-05, "loss": 0.5831, "step": 3556 }, { "epoch": 0.29, "grad_norm": 0.9476768527246094, "learning_rate": 1.6682302141649452e-05, "loss": 0.5994, "step": 3557 }, { "epoch": 0.29, "grad_norm": 0.9485565562128317, "learning_rate": 1.668034347024166e-05, "loss": 0.5723, "step": 3558 }, { "epoch": 0.29, "grad_norm": 0.9760330608078868, "learning_rate": 1.667838433589199e-05, "loss": 0.5349, "step": 3559 }, { "epoch": 0.29, "grad_norm": 0.8864957985018198, "learning_rate": 1.6676424738736208e-05, "loss": 0.5838, "step": 3560 }, { "epoch": 0.29, "grad_norm": 0.9307026791978568, "learning_rate": 1.6674464678910117e-05, "loss": 0.5658, "step": 3561 }, { "epoch": 0.29, "grad_norm": 0.9044244733439744, "learning_rate": 1.667250415654954e-05, "loss": 0.5329, "step": 3562 }, { "epoch": 0.29, "grad_norm": 0.9358310455208178, "learning_rate": 1.6670543171790347e-05, "loss": 0.6028, "step": 3563 }, { "epoch": 0.29, "grad_norm": 0.9945180142864156, "learning_rate": 1.6668581724768423e-05, "loss": 0.5394, "step": 3564 }, { "epoch": 0.29, "grad_norm": 0.8602807961739487, "learning_rate": 1.6666619815619703e-05, "loss": 0.539, "step": 3565 }, { "epoch": 0.29, "grad_norm": 0.8948693345748759, "learning_rate": 1.6664657444480145e-05, "loss": 0.6245, "step": 3566 }, { "epoch": 0.29, "grad_norm": 0.8565794868024554, "learning_rate": 1.666269461148574e-05, "loss": 0.4878, "step": 3567 }, { "epoch": 0.29, "grad_norm": 0.866446067109164, "learning_rate": 1.6660731316772503e-05, "loss": 0.5559, "step": 3568 }, { "epoch": 0.29, "grad_norm": 0.9300978205223425, "learning_rate": 1.6658767560476494e-05, "loss": 0.5421, "step": 3569 }, { "epoch": 0.29, "grad_norm": 0.8743919086457418, "learning_rate": 1.6656803342733804e-05, "loss": 0.6016, "step": 3570 }, { "epoch": 0.29, "grad_norm": 0.9887159020971205, "learning_rate": 1.6654838663680542e-05, "loss": 0.5941, "step": 3571 }, { "epoch": 0.29, "grad_norm": 0.9935905572056851, "learning_rate": 1.6652873523452867e-05, "loss": 0.5825, "step": 3572 }, { "epoch": 0.29, "grad_norm": 0.9917578015678681, "learning_rate": 1.6650907922186958e-05, "loss": 0.5464, "step": 3573 }, { "epoch": 0.29, "grad_norm": 0.8836985763212661, "learning_rate": 1.6648941860019028e-05, "loss": 0.5667, "step": 3574 }, { "epoch": 0.29, "grad_norm": 0.8837790954049046, "learning_rate": 1.6646975337085323e-05, "loss": 0.5275, "step": 3575 }, { "epoch": 0.29, "grad_norm": 1.0543057172385015, "learning_rate": 1.6645008353522122e-05, "loss": 0.6152, "step": 3576 }, { "epoch": 0.29, "grad_norm": 0.9161591991044842, "learning_rate": 1.6643040909465743e-05, "loss": 0.5601, "step": 3577 }, { "epoch": 0.29, "grad_norm": 0.9601951864694586, "learning_rate": 1.6641073005052516e-05, "loss": 0.5857, "step": 3578 }, { "epoch": 0.29, "grad_norm": 0.9765714977543812, "learning_rate": 1.663910464041882e-05, "loss": 0.5903, "step": 3579 }, { "epoch": 0.29, "grad_norm": 0.8749919554487148, "learning_rate": 1.6637135815701066e-05, "loss": 0.5547, "step": 3580 }, { "epoch": 0.29, "grad_norm": 0.943868588563792, "learning_rate": 1.663516653103568e-05, "loss": 0.5624, "step": 3581 }, { "epoch": 0.29, "grad_norm": 0.9406057711354047, "learning_rate": 1.6633196786559143e-05, "loss": 0.5454, "step": 3582 }, { "epoch": 0.29, "grad_norm": 0.9602637556293583, "learning_rate": 1.6631226582407954e-05, "loss": 0.6411, "step": 3583 }, { "epoch": 0.29, "grad_norm": 0.8818966099422427, "learning_rate": 1.662925591871864e-05, "loss": 0.5759, "step": 3584 }, { "epoch": 0.29, "grad_norm": 1.0002142171935597, "learning_rate": 1.6627284795627777e-05, "loss": 0.5652, "step": 3585 }, { "epoch": 0.29, "grad_norm": 0.8726073504452184, "learning_rate": 1.6625313213271953e-05, "loss": 0.5773, "step": 3586 }, { "epoch": 0.29, "grad_norm": 0.9105240364055169, "learning_rate": 1.66233411717878e-05, "loss": 0.4951, "step": 3587 }, { "epoch": 0.29, "grad_norm": 0.8467521769576934, "learning_rate": 1.6621368671311973e-05, "loss": 0.5547, "step": 3588 }, { "epoch": 0.29, "grad_norm": 0.9618991888429478, "learning_rate": 1.6619395711981183e-05, "loss": 0.64, "step": 3589 }, { "epoch": 0.29, "grad_norm": 0.9131903387125504, "learning_rate": 1.661742229393213e-05, "loss": 0.6045, "step": 3590 }, { "epoch": 0.29, "grad_norm": 0.9734975634683273, "learning_rate": 1.6615448417301588e-05, "loss": 0.6041, "step": 3591 }, { "epoch": 0.29, "grad_norm": 0.9554196630977829, "learning_rate": 1.6613474082226338e-05, "loss": 0.6134, "step": 3592 }, { "epoch": 0.29, "grad_norm": 1.0722452324867415, "learning_rate": 1.6611499288843194e-05, "loss": 0.6594, "step": 3593 }, { "epoch": 0.29, "grad_norm": 0.8659926589103515, "learning_rate": 1.660952403728902e-05, "loss": 0.5241, "step": 3594 }, { "epoch": 0.29, "grad_norm": 1.0027663664973614, "learning_rate": 1.6607548327700694e-05, "loss": 0.671, "step": 3595 }, { "epoch": 0.29, "grad_norm": 0.8707474663912872, "learning_rate": 1.6605572160215126e-05, "loss": 0.5783, "step": 3596 }, { "epoch": 0.29, "grad_norm": 1.0087074986539963, "learning_rate": 1.6603595534969266e-05, "loss": 0.638, "step": 3597 }, { "epoch": 0.29, "grad_norm": 0.9712820924532131, "learning_rate": 1.6601618452100098e-05, "loss": 0.6031, "step": 3598 }, { "epoch": 0.29, "grad_norm": 0.9282173136440852, "learning_rate": 1.659964091174462e-05, "loss": 0.5563, "step": 3599 }, { "epoch": 0.29, "grad_norm": 0.9858593134307094, "learning_rate": 1.6597662914039885e-05, "loss": 0.5695, "step": 3600 }, { "epoch": 0.29, "grad_norm": 0.9790081161566879, "learning_rate": 1.659568445912296e-05, "loss": 0.6717, "step": 3601 }, { "epoch": 0.29, "grad_norm": 0.92396838455597, "learning_rate": 1.6593705547130955e-05, "loss": 0.5798, "step": 3602 }, { "epoch": 0.29, "grad_norm": 0.8781891791299089, "learning_rate": 1.6591726178201e-05, "loss": 0.5568, "step": 3603 }, { "epoch": 0.29, "grad_norm": 0.9532094188863465, "learning_rate": 1.6589746352470275e-05, "loss": 0.5711, "step": 3604 }, { "epoch": 0.29, "grad_norm": 0.9605308425776762, "learning_rate": 1.6587766070075965e-05, "loss": 0.5674, "step": 3605 }, { "epoch": 0.29, "grad_norm": 0.9075877004257134, "learning_rate": 1.6585785331155312e-05, "loss": 0.5982, "step": 3606 }, { "epoch": 0.29, "grad_norm": 0.9778240081915002, "learning_rate": 1.6583804135845582e-05, "loss": 0.6316, "step": 3607 }, { "epoch": 0.29, "grad_norm": 0.8063905073185061, "learning_rate": 1.658182248428406e-05, "loss": 0.5393, "step": 3608 }, { "epoch": 0.29, "grad_norm": 0.9095242303970597, "learning_rate": 1.6579840376608076e-05, "loss": 0.5845, "step": 3609 }, { "epoch": 0.29, "grad_norm": 0.9260816695897505, "learning_rate": 1.6577857812954994e-05, "loss": 0.5652, "step": 3610 }, { "epoch": 0.29, "grad_norm": 0.998827823223787, "learning_rate": 1.65758747934622e-05, "loss": 0.528, "step": 3611 }, { "epoch": 0.29, "grad_norm": 0.9114557389191057, "learning_rate": 1.6573891318267113e-05, "loss": 0.629, "step": 3612 }, { "epoch": 0.29, "grad_norm": 0.9823821174946192, "learning_rate": 1.6571907387507194e-05, "loss": 0.6058, "step": 3613 }, { "epoch": 0.29, "grad_norm": 1.0511732864639134, "learning_rate": 1.6569923001319916e-05, "loss": 0.5967, "step": 3614 }, { "epoch": 0.29, "grad_norm": 0.9686847202243349, "learning_rate": 1.6567938159842807e-05, "loss": 0.6398, "step": 3615 }, { "epoch": 0.29, "grad_norm": 0.8933691431087123, "learning_rate": 1.6565952863213407e-05, "loss": 0.5597, "step": 3616 }, { "epoch": 0.29, "grad_norm": 0.8945158188311984, "learning_rate": 1.65639671115693e-05, "loss": 0.5664, "step": 3617 }, { "epoch": 0.29, "grad_norm": 0.8154853016245658, "learning_rate": 1.6561980905048087e-05, "loss": 0.5101, "step": 3618 }, { "epoch": 0.29, "grad_norm": 0.8273776997836692, "learning_rate": 1.6559994243787427e-05, "loss": 0.4892, "step": 3619 }, { "epoch": 0.29, "grad_norm": 1.0302635716889421, "learning_rate": 1.655800712792498e-05, "loss": 0.5976, "step": 3620 }, { "epoch": 0.29, "grad_norm": 1.0043279125071218, "learning_rate": 1.6556019557598453e-05, "loss": 0.5788, "step": 3621 }, { "epoch": 0.29, "grad_norm": 0.9753388668576959, "learning_rate": 1.6554031532945588e-05, "loss": 0.6504, "step": 3622 }, { "epoch": 0.29, "grad_norm": 0.9597580412921606, "learning_rate": 1.6552043054104153e-05, "loss": 0.5883, "step": 3623 }, { "epoch": 0.29, "grad_norm": 1.0242495713982198, "learning_rate": 1.6550054121211946e-05, "loss": 0.5753, "step": 3624 }, { "epoch": 0.29, "grad_norm": 0.7932872760131129, "learning_rate": 1.6548064734406798e-05, "loss": 0.4609, "step": 3625 }, { "epoch": 0.29, "grad_norm": 0.9463995135467075, "learning_rate": 1.654607489382657e-05, "loss": 0.588, "step": 3626 }, { "epoch": 0.29, "grad_norm": 0.9566650907230213, "learning_rate": 1.654408459960916e-05, "loss": 0.6569, "step": 3627 }, { "epoch": 0.29, "grad_norm": 0.9198226238212484, "learning_rate": 1.6542093851892493e-05, "loss": 0.5615, "step": 3628 }, { "epoch": 0.29, "grad_norm": 0.9137048670369079, "learning_rate": 1.654010265081452e-05, "loss": 0.5889, "step": 3629 }, { "epoch": 0.3, "grad_norm": 1.0686054851272107, "learning_rate": 1.653811099651324e-05, "loss": 0.5917, "step": 3630 }, { "epoch": 0.3, "grad_norm": 0.8316623722805385, "learning_rate": 1.6536118889126665e-05, "loss": 0.5438, "step": 3631 }, { "epoch": 0.3, "grad_norm": 0.900597140469948, "learning_rate": 1.6534126328792846e-05, "loss": 0.5737, "step": 3632 }, { "epoch": 0.3, "grad_norm": 0.983236693684268, "learning_rate": 1.653213331564987e-05, "loss": 0.5542, "step": 3633 }, { "epoch": 0.3, "grad_norm": 0.8923703352786861, "learning_rate": 1.653013984983585e-05, "loss": 0.5854, "step": 3634 }, { "epoch": 0.3, "grad_norm": 0.860331550092371, "learning_rate": 1.6528145931488934e-05, "loss": 0.4834, "step": 3635 }, { "epoch": 0.3, "grad_norm": 0.934322417674138, "learning_rate": 1.6526151560747294e-05, "loss": 0.5552, "step": 3636 }, { "epoch": 0.3, "grad_norm": 1.0653721164248258, "learning_rate": 1.6524156737749132e-05, "loss": 0.6018, "step": 3637 }, { "epoch": 0.3, "grad_norm": 0.8718089486691982, "learning_rate": 1.6522161462632705e-05, "loss": 0.5718, "step": 3638 }, { "epoch": 0.3, "grad_norm": 0.9025461147431527, "learning_rate": 1.6520165735536268e-05, "loss": 0.4984, "step": 3639 }, { "epoch": 0.3, "grad_norm": 1.0448045557872034, "learning_rate": 1.651816955659813e-05, "loss": 0.6191, "step": 3640 }, { "epoch": 0.3, "grad_norm": 0.9643104817920352, "learning_rate": 1.6516172925956624e-05, "loss": 0.5801, "step": 3641 }, { "epoch": 0.3, "grad_norm": 0.8573755137208006, "learning_rate": 1.6514175843750112e-05, "loss": 0.5469, "step": 3642 }, { "epoch": 0.3, "grad_norm": 0.9168281111480824, "learning_rate": 1.6512178310116994e-05, "loss": 0.498, "step": 3643 }, { "epoch": 0.3, "grad_norm": 0.9980497424146102, "learning_rate": 1.6510180325195696e-05, "loss": 0.6049, "step": 3644 }, { "epoch": 0.3, "grad_norm": 0.8352927441471474, "learning_rate": 1.6508181889124678e-05, "loss": 0.562, "step": 3645 }, { "epoch": 0.3, "grad_norm": 0.9098235155117534, "learning_rate": 1.650618300204242e-05, "loss": 0.5712, "step": 3646 }, { "epoch": 0.3, "grad_norm": 0.9318970570266037, "learning_rate": 1.6504183664087458e-05, "loss": 0.5857, "step": 3647 }, { "epoch": 0.3, "grad_norm": 0.9322556307904534, "learning_rate": 1.6502183875398335e-05, "loss": 0.4988, "step": 3648 }, { "epoch": 0.3, "grad_norm": 0.8689832946583307, "learning_rate": 1.6500183636113637e-05, "loss": 0.5214, "step": 3649 }, { "epoch": 0.3, "grad_norm": 0.9994019497128986, "learning_rate": 1.649818294637198e-05, "loss": 0.589, "step": 3650 }, { "epoch": 0.3, "grad_norm": 0.9892900604091126, "learning_rate": 1.6496181806312005e-05, "loss": 0.5811, "step": 3651 }, { "epoch": 0.3, "grad_norm": 0.8117548953306063, "learning_rate": 1.6494180216072397e-05, "loss": 0.5841, "step": 3652 }, { "epoch": 0.3, "grad_norm": 0.9213653596995112, "learning_rate": 1.649217817579186e-05, "loss": 0.5614, "step": 3653 }, { "epoch": 0.3, "grad_norm": 0.988994948049542, "learning_rate": 1.6490175685609133e-05, "loss": 0.5912, "step": 3654 }, { "epoch": 0.3, "grad_norm": 0.9457260692079452, "learning_rate": 1.6488172745662984e-05, "loss": 0.5493, "step": 3655 }, { "epoch": 0.3, "grad_norm": 0.8973934121192513, "learning_rate": 1.6486169356092224e-05, "loss": 0.6416, "step": 3656 }, { "epoch": 0.3, "grad_norm": 1.0003701542148136, "learning_rate": 1.648416551703568e-05, "loss": 0.601, "step": 3657 }, { "epoch": 0.3, "grad_norm": 0.9375819350879385, "learning_rate": 1.6482161228632217e-05, "loss": 0.5633, "step": 3658 }, { "epoch": 0.3, "grad_norm": 0.8715810769204608, "learning_rate": 1.648015649102073e-05, "loss": 0.5143, "step": 3659 }, { "epoch": 0.3, "grad_norm": 0.956343085905747, "learning_rate": 1.6478151304340144e-05, "loss": 0.598, "step": 3660 }, { "epoch": 0.3, "grad_norm": 0.8466436290465038, "learning_rate": 1.647614566872942e-05, "loss": 0.5448, "step": 3661 }, { "epoch": 0.3, "grad_norm": 1.0018889533958015, "learning_rate": 1.6474139584327548e-05, "loss": 0.6558, "step": 3662 }, { "epoch": 0.3, "grad_norm": 0.9768304608041964, "learning_rate": 1.647213305127354e-05, "loss": 0.4928, "step": 3663 }, { "epoch": 0.3, "grad_norm": 0.9476325871815199, "learning_rate": 1.6470126069706456e-05, "loss": 0.6763, "step": 3664 }, { "epoch": 0.3, "grad_norm": 0.9116612300802762, "learning_rate": 1.6468118639765376e-05, "loss": 0.5577, "step": 3665 }, { "epoch": 0.3, "grad_norm": 0.9705610218173512, "learning_rate": 1.646611076158941e-05, "loss": 0.6202, "step": 3666 }, { "epoch": 0.3, "grad_norm": 0.9044189603954035, "learning_rate": 1.6464102435317702e-05, "loss": 0.5508, "step": 3667 }, { "epoch": 0.3, "grad_norm": 0.8682864121536775, "learning_rate": 1.6462093661089432e-05, "loss": 0.643, "step": 3668 }, { "epoch": 0.3, "grad_norm": 0.9042250301625764, "learning_rate": 1.64600844390438e-05, "loss": 0.614, "step": 3669 }, { "epoch": 0.3, "grad_norm": 0.915033666481628, "learning_rate": 1.6458074769320046e-05, "loss": 0.5764, "step": 3670 }, { "epoch": 0.3, "grad_norm": 0.8218929072923022, "learning_rate": 1.6456064652057443e-05, "loss": 0.5159, "step": 3671 }, { "epoch": 0.3, "grad_norm": 0.8778388677902503, "learning_rate": 1.6454054087395284e-05, "loss": 0.5133, "step": 3672 }, { "epoch": 0.3, "grad_norm": 0.8389863239156715, "learning_rate": 1.6452043075472898e-05, "loss": 0.5176, "step": 3673 }, { "epoch": 0.3, "grad_norm": 0.9435807760681507, "learning_rate": 1.6450031616429655e-05, "loss": 0.6082, "step": 3674 }, { "epoch": 0.3, "grad_norm": 0.9169409518546056, "learning_rate": 1.6448019710404938e-05, "loss": 0.554, "step": 3675 }, { "epoch": 0.3, "grad_norm": 0.9801572359431846, "learning_rate": 1.6446007357538178e-05, "loss": 0.5588, "step": 3676 }, { "epoch": 0.3, "grad_norm": 0.9262934877911382, "learning_rate": 1.6443994557968826e-05, "loss": 0.5349, "step": 3677 }, { "epoch": 0.3, "grad_norm": 0.9778690714497104, "learning_rate": 1.6441981311836363e-05, "loss": 0.5439, "step": 3678 }, { "epoch": 0.3, "grad_norm": 0.9550533350922927, "learning_rate": 1.643996761928031e-05, "loss": 0.603, "step": 3679 }, { "epoch": 0.3, "grad_norm": 0.9514029700405338, "learning_rate": 1.6437953480440217e-05, "loss": 0.6207, "step": 3680 }, { "epoch": 0.3, "grad_norm": 0.9208468742014804, "learning_rate": 1.6435938895455653e-05, "loss": 0.5565, "step": 3681 }, { "epoch": 0.3, "grad_norm": 0.9614563981238538, "learning_rate": 1.6433923864466235e-05, "loss": 0.5365, "step": 3682 }, { "epoch": 0.3, "grad_norm": 1.0040116203536171, "learning_rate": 1.6431908387611604e-05, "loss": 0.5948, "step": 3683 }, { "epoch": 0.3, "grad_norm": 0.8657093495585827, "learning_rate": 1.642989246503142e-05, "loss": 0.5876, "step": 3684 }, { "epoch": 0.3, "grad_norm": 0.9683313092980244, "learning_rate": 1.6427876096865394e-05, "loss": 0.5575, "step": 3685 }, { "epoch": 0.3, "grad_norm": 0.9735112189429884, "learning_rate": 1.6425859283253255e-05, "loss": 0.6037, "step": 3686 }, { "epoch": 0.3, "grad_norm": 0.8483864927774368, "learning_rate": 1.642384202433477e-05, "loss": 0.5197, "step": 3687 }, { "epoch": 0.3, "grad_norm": 1.0289501655311173, "learning_rate": 1.6421824320249732e-05, "loss": 0.5883, "step": 3688 }, { "epoch": 0.3, "grad_norm": 0.8065027388887831, "learning_rate": 1.641980617113796e-05, "loss": 0.5542, "step": 3689 }, { "epoch": 0.3, "grad_norm": 0.9721774427241994, "learning_rate": 1.6417787577139317e-05, "loss": 0.5892, "step": 3690 }, { "epoch": 0.3, "grad_norm": 0.903531584027664, "learning_rate": 1.641576853839369e-05, "loss": 0.5987, "step": 3691 }, { "epoch": 0.3, "grad_norm": 0.9247698353933245, "learning_rate": 1.641374905504099e-05, "loss": 0.5728, "step": 3692 }, { "epoch": 0.3, "grad_norm": 0.9114264057369905, "learning_rate": 1.641172912722117e-05, "loss": 0.5938, "step": 3693 }, { "epoch": 0.3, "grad_norm": 0.9351345133128272, "learning_rate": 1.640970875507421e-05, "loss": 0.5918, "step": 3694 }, { "epoch": 0.3, "grad_norm": 0.847331838532848, "learning_rate": 1.640768793874012e-05, "loss": 0.5788, "step": 3695 }, { "epoch": 0.3, "grad_norm": 0.9834974607562615, "learning_rate": 1.6405666678358934e-05, "loss": 0.553, "step": 3696 }, { "epoch": 0.3, "grad_norm": 0.9635080252497199, "learning_rate": 1.6403644974070732e-05, "loss": 0.5437, "step": 3697 }, { "epoch": 0.3, "grad_norm": 0.9081995856472028, "learning_rate": 1.6401622826015616e-05, "loss": 0.5518, "step": 3698 }, { "epoch": 0.3, "grad_norm": 0.8448121229095162, "learning_rate": 1.6399600234333716e-05, "loss": 0.5327, "step": 3699 }, { "epoch": 0.3, "grad_norm": 0.9396638616959802, "learning_rate": 1.6397577199165192e-05, "loss": 0.6707, "step": 3700 }, { "epoch": 0.3, "grad_norm": 0.8932577772825789, "learning_rate": 1.639555372065025e-05, "loss": 0.5025, "step": 3701 }, { "epoch": 0.3, "grad_norm": 0.9081954188369703, "learning_rate": 1.6393529798929103e-05, "loss": 0.5871, "step": 3702 }, { "epoch": 0.3, "grad_norm": 0.8853290866239732, "learning_rate": 1.639150543414201e-05, "loss": 0.5313, "step": 3703 }, { "epoch": 0.3, "grad_norm": 0.9233280166174495, "learning_rate": 1.6389480626429262e-05, "loss": 0.5942, "step": 3704 }, { "epoch": 0.3, "grad_norm": 0.9463063765768046, "learning_rate": 1.6387455375931174e-05, "loss": 0.5392, "step": 3705 }, { "epoch": 0.3, "grad_norm": 0.8730763067649788, "learning_rate": 1.6385429682788095e-05, "loss": 0.544, "step": 3706 }, { "epoch": 0.3, "grad_norm": 0.9316457082221453, "learning_rate": 1.63834035471404e-05, "loss": 0.5758, "step": 3707 }, { "epoch": 0.3, "grad_norm": 0.9542199542196371, "learning_rate": 1.6381376969128508e-05, "loss": 0.6123, "step": 3708 }, { "epoch": 0.3, "grad_norm": 0.9416443659815187, "learning_rate": 1.6379349948892845e-05, "loss": 0.4821, "step": 3709 }, { "epoch": 0.3, "grad_norm": 0.9333823224097688, "learning_rate": 1.6377322486573892e-05, "loss": 0.602, "step": 3710 }, { "epoch": 0.3, "grad_norm": 0.8409452002423059, "learning_rate": 1.637529458231215e-05, "loss": 0.5059, "step": 3711 }, { "epoch": 0.3, "grad_norm": 0.9280610886053118, "learning_rate": 1.637326623624814e-05, "loss": 0.6287, "step": 3712 }, { "epoch": 0.3, "grad_norm": 0.9096302289339779, "learning_rate": 1.637123744852244e-05, "loss": 0.5049, "step": 3713 }, { "epoch": 0.3, "grad_norm": 1.0049544036616702, "learning_rate": 1.6369208219275635e-05, "loss": 0.6629, "step": 3714 }, { "epoch": 0.3, "grad_norm": 0.9013101190306692, "learning_rate": 1.6367178548648347e-05, "loss": 0.5969, "step": 3715 }, { "epoch": 0.3, "grad_norm": 1.0162996039363488, "learning_rate": 1.6365148436781235e-05, "loss": 0.5464, "step": 3716 }, { "epoch": 0.3, "grad_norm": 1.0263918742129827, "learning_rate": 1.6363117883814986e-05, "loss": 0.6456, "step": 3717 }, { "epoch": 0.3, "grad_norm": 0.860333941053944, "learning_rate": 1.6361086889890307e-05, "loss": 0.5763, "step": 3718 }, { "epoch": 0.3, "grad_norm": 0.8528743046913408, "learning_rate": 1.635905545514795e-05, "loss": 0.5548, "step": 3719 }, { "epoch": 0.3, "grad_norm": 0.9660427612544795, "learning_rate": 1.635702357972869e-05, "loss": 0.6462, "step": 3720 }, { "epoch": 0.3, "grad_norm": 0.9134307436287689, "learning_rate": 1.6354991263773338e-05, "loss": 0.5735, "step": 3721 }, { "epoch": 0.3, "grad_norm": 0.902943882911617, "learning_rate": 1.6352958507422727e-05, "loss": 0.53, "step": 3722 }, { "epoch": 0.3, "grad_norm": 0.9348713243516186, "learning_rate": 1.635092531081772e-05, "loss": 0.6143, "step": 3723 }, { "epoch": 0.3, "grad_norm": 0.8468682556085794, "learning_rate": 1.634889167409923e-05, "loss": 0.4976, "step": 3724 }, { "epoch": 0.3, "grad_norm": 0.8311154744443643, "learning_rate": 1.6346857597408174e-05, "loss": 0.455, "step": 3725 }, { "epoch": 0.3, "grad_norm": 0.8780250093580937, "learning_rate": 1.634482308088552e-05, "loss": 0.579, "step": 3726 }, { "epoch": 0.3, "grad_norm": 0.8297776086320948, "learning_rate": 1.6342788124672255e-05, "loss": 0.5381, "step": 3727 }, { "epoch": 0.3, "grad_norm": 0.9539167630401393, "learning_rate": 1.634075272890939e-05, "loss": 0.5416, "step": 3728 }, { "epoch": 0.3, "grad_norm": 1.0019019974902386, "learning_rate": 1.6338716893737995e-05, "loss": 0.6171, "step": 3729 }, { "epoch": 0.3, "grad_norm": 0.9788484051442959, "learning_rate": 1.6336680619299138e-05, "loss": 0.5867, "step": 3730 }, { "epoch": 0.3, "grad_norm": 0.9222090771321823, "learning_rate": 1.6334643905733932e-05, "loss": 0.5908, "step": 3731 }, { "epoch": 0.3, "grad_norm": 0.876664958229384, "learning_rate": 1.633260675318353e-05, "loss": 0.5143, "step": 3732 }, { "epoch": 0.3, "grad_norm": 0.8933905718102726, "learning_rate": 1.633056916178909e-05, "loss": 0.5128, "step": 3733 }, { "epoch": 0.3, "grad_norm": 0.8924163368771747, "learning_rate": 1.6328531131691823e-05, "loss": 0.5776, "step": 3734 }, { "epoch": 0.3, "grad_norm": 0.8375540811736663, "learning_rate": 1.6326492663032964e-05, "loss": 0.5809, "step": 3735 }, { "epoch": 0.3, "grad_norm": 0.9864446004257903, "learning_rate": 1.6324453755953772e-05, "loss": 0.6347, "step": 3736 }, { "epoch": 0.3, "grad_norm": 0.9735173503758445, "learning_rate": 1.6322414410595548e-05, "loss": 0.5306, "step": 3737 }, { "epoch": 0.3, "grad_norm": 0.9369967945864461, "learning_rate": 1.6320374627099612e-05, "loss": 0.5496, "step": 3738 }, { "epoch": 0.3, "grad_norm": 0.8931827555344554, "learning_rate": 1.631833440560732e-05, "loss": 0.5092, "step": 3739 }, { "epoch": 0.3, "grad_norm": 1.0429319179996712, "learning_rate": 1.631629374626006e-05, "loss": 0.5573, "step": 3740 }, { "epoch": 0.3, "grad_norm": 1.0092123786527019, "learning_rate": 1.6314252649199244e-05, "loss": 0.5645, "step": 3741 }, { "epoch": 0.3, "grad_norm": 0.9221786043515009, "learning_rate": 1.6312211114566322e-05, "loss": 0.5584, "step": 3742 }, { "epoch": 0.3, "grad_norm": 0.8863761142956672, "learning_rate": 1.6310169142502767e-05, "loss": 0.525, "step": 3743 }, { "epoch": 0.3, "grad_norm": 0.9347174132990639, "learning_rate": 1.6308126733150088e-05, "loss": 0.6126, "step": 3744 }, { "epoch": 0.3, "grad_norm": 1.0632686311470156, "learning_rate": 1.6306083886649823e-05, "loss": 0.6301, "step": 3745 }, { "epoch": 0.3, "grad_norm": 1.0074442161292392, "learning_rate": 1.6304040603143537e-05, "loss": 0.5749, "step": 3746 }, { "epoch": 0.3, "grad_norm": 0.9485310866229572, "learning_rate": 1.6301996882772828e-05, "loss": 0.6036, "step": 3747 }, { "epoch": 0.3, "grad_norm": 0.9259055405982872, "learning_rate": 1.6299952725679325e-05, "loss": 0.6138, "step": 3748 }, { "epoch": 0.3, "grad_norm": 0.9216543019525134, "learning_rate": 1.6297908132004688e-05, "loss": 0.5322, "step": 3749 }, { "epoch": 0.3, "grad_norm": 0.8867586367561826, "learning_rate": 1.6295863101890603e-05, "loss": 0.5266, "step": 3750 }, { "epoch": 0.3, "grad_norm": 0.930621927448145, "learning_rate": 1.6293817635478787e-05, "loss": 0.5562, "step": 3751 }, { "epoch": 0.3, "grad_norm": 0.834710674438063, "learning_rate": 1.629177173291099e-05, "loss": 0.5912, "step": 3752 }, { "epoch": 0.31, "grad_norm": 0.945039577387616, "learning_rate": 1.6289725394328998e-05, "loss": 0.5675, "step": 3753 }, { "epoch": 0.31, "grad_norm": 1.0046881856885677, "learning_rate": 1.6287678619874614e-05, "loss": 0.5822, "step": 3754 }, { "epoch": 0.31, "grad_norm": 0.9444310869381672, "learning_rate": 1.628563140968968e-05, "loss": 0.6171, "step": 3755 }, { "epoch": 0.31, "grad_norm": 0.8936053185510102, "learning_rate": 1.6283583763916062e-05, "loss": 0.5566, "step": 3756 }, { "epoch": 0.31, "grad_norm": 0.851601255245641, "learning_rate": 1.6281535682695663e-05, "loss": 0.5427, "step": 3757 }, { "epoch": 0.31, "grad_norm": 0.9061461756195821, "learning_rate": 1.6279487166170412e-05, "loss": 0.4949, "step": 3758 }, { "epoch": 0.31, "grad_norm": 0.9271870381103675, "learning_rate": 1.627743821448227e-05, "loss": 0.5722, "step": 3759 }, { "epoch": 0.31, "grad_norm": 1.0091095183053302, "learning_rate": 1.6275388827773235e-05, "loss": 0.5942, "step": 3760 }, { "epoch": 0.31, "grad_norm": 0.8860061586258776, "learning_rate": 1.627333900618531e-05, "loss": 0.5878, "step": 3761 }, { "epoch": 0.31, "grad_norm": 0.9550515256655273, "learning_rate": 1.627128874986056e-05, "loss": 0.534, "step": 3762 }, { "epoch": 0.31, "grad_norm": 0.9466939591667332, "learning_rate": 1.626923805894107e-05, "loss": 0.5818, "step": 3763 }, { "epoch": 0.31, "grad_norm": 0.8593634170820906, "learning_rate": 1.6267186933568934e-05, "loss": 0.623, "step": 3764 }, { "epoch": 0.31, "grad_norm": 0.9055110244963029, "learning_rate": 1.6265135373886303e-05, "loss": 0.5561, "step": 3765 }, { "epoch": 0.31, "grad_norm": 0.9522349882853491, "learning_rate": 1.6263083380035352e-05, "loss": 0.6138, "step": 3766 }, { "epoch": 0.31, "grad_norm": 0.9804021740033947, "learning_rate": 1.6261030952158275e-05, "loss": 0.6136, "step": 3767 }, { "epoch": 0.31, "grad_norm": 0.9771039368379923, "learning_rate": 1.625897809039731e-05, "loss": 0.5797, "step": 3768 }, { "epoch": 0.31, "grad_norm": 0.993286275378227, "learning_rate": 1.625692479489471e-05, "loss": 0.6008, "step": 3769 }, { "epoch": 0.31, "grad_norm": 0.9382597208111868, "learning_rate": 1.6254871065792776e-05, "loss": 0.6317, "step": 3770 }, { "epoch": 0.31, "grad_norm": 0.9130802421175084, "learning_rate": 1.6252816903233822e-05, "loss": 0.6397, "step": 3771 }, { "epoch": 0.31, "grad_norm": 0.8870309436502534, "learning_rate": 1.6250762307360206e-05, "loss": 0.5487, "step": 3772 }, { "epoch": 0.31, "grad_norm": 0.8630507613282848, "learning_rate": 1.6248707278314303e-05, "loss": 0.6063, "step": 3773 }, { "epoch": 0.31, "grad_norm": 0.8204465669044464, "learning_rate": 1.6246651816238533e-05, "loss": 0.5359, "step": 3774 }, { "epoch": 0.31, "grad_norm": 0.8791486581655621, "learning_rate": 1.6244595921275327e-05, "loss": 0.5696, "step": 3775 }, { "epoch": 0.31, "grad_norm": 0.8884869209713185, "learning_rate": 1.624253959356717e-05, "loss": 0.5646, "step": 3776 }, { "epoch": 0.31, "grad_norm": 0.8825282203386668, "learning_rate": 1.6240482833256548e-05, "loss": 0.5236, "step": 3777 }, { "epoch": 0.31, "grad_norm": 0.9366188651664406, "learning_rate": 1.6238425640486005e-05, "loss": 0.5926, "step": 3778 }, { "epoch": 0.31, "grad_norm": 0.9339163870662345, "learning_rate": 1.62363680153981e-05, "loss": 0.5729, "step": 3779 }, { "epoch": 0.31, "grad_norm": 0.9361957987894465, "learning_rate": 1.623430995813542e-05, "loss": 0.6206, "step": 3780 }, { "epoch": 0.31, "grad_norm": 0.9142015976316188, "learning_rate": 1.6232251468840593e-05, "loss": 0.6301, "step": 3781 }, { "epoch": 0.31, "grad_norm": 0.8585683173377849, "learning_rate": 1.6230192547656264e-05, "loss": 0.5282, "step": 3782 }, { "epoch": 0.31, "grad_norm": 0.8833783575926998, "learning_rate": 1.622813319472512e-05, "loss": 0.5974, "step": 3783 }, { "epoch": 0.31, "grad_norm": 0.8608490199464518, "learning_rate": 1.622607341018987e-05, "loss": 0.5524, "step": 3784 }, { "epoch": 0.31, "grad_norm": 0.8848172429593518, "learning_rate": 1.622401319419325e-05, "loss": 0.5606, "step": 3785 }, { "epoch": 0.31, "grad_norm": 0.8042328269827678, "learning_rate": 1.6221952546878044e-05, "loss": 0.5154, "step": 3786 }, { "epoch": 0.31, "grad_norm": 0.8958394235761792, "learning_rate": 1.621989146838704e-05, "loss": 0.6145, "step": 3787 }, { "epoch": 0.31, "grad_norm": 0.8922850703740579, "learning_rate": 1.6217829958863077e-05, "loss": 0.5611, "step": 3788 }, { "epoch": 0.31, "grad_norm": 0.9013966060877183, "learning_rate": 1.6215768018449015e-05, "loss": 0.5549, "step": 3789 }, { "epoch": 0.31, "grad_norm": 0.9854311867726121, "learning_rate": 1.621370564728774e-05, "loss": 0.5854, "step": 3790 }, { "epoch": 0.31, "grad_norm": 0.9686953885478913, "learning_rate": 1.6211642845522173e-05, "loss": 0.5728, "step": 3791 }, { "epoch": 0.31, "grad_norm": 0.848322047870251, "learning_rate": 1.620957961329527e-05, "loss": 0.573, "step": 3792 }, { "epoch": 0.31, "grad_norm": 0.9168811248245282, "learning_rate": 1.620751595075001e-05, "loss": 0.5687, "step": 3793 }, { "epoch": 0.31, "grad_norm": 0.8563048407779456, "learning_rate": 1.6205451858029392e-05, "loss": 0.5997, "step": 3794 }, { "epoch": 0.31, "grad_norm": 0.9474923997403427, "learning_rate": 1.620338733527647e-05, "loss": 0.5352, "step": 3795 }, { "epoch": 0.31, "grad_norm": 0.9299568257108631, "learning_rate": 1.6201322382634307e-05, "loss": 0.5198, "step": 3796 }, { "epoch": 0.31, "grad_norm": 0.9148308383657838, "learning_rate": 1.6199257000246004e-05, "loss": 0.5742, "step": 3797 }, { "epoch": 0.31, "grad_norm": 0.9363226570594413, "learning_rate": 1.6197191188254692e-05, "loss": 0.5975, "step": 3798 }, { "epoch": 0.31, "grad_norm": 1.0230584582962952, "learning_rate": 1.6195124946803527e-05, "loss": 0.6454, "step": 3799 }, { "epoch": 0.31, "grad_norm": 0.8742058995001382, "learning_rate": 1.6193058276035696e-05, "loss": 0.545, "step": 3800 }, { "epoch": 0.31, "grad_norm": 0.8711637158069439, "learning_rate": 1.6190991176094416e-05, "loss": 0.6298, "step": 3801 }, { "epoch": 0.31, "grad_norm": 0.9418798072740513, "learning_rate": 1.6188923647122946e-05, "loss": 0.5593, "step": 3802 }, { "epoch": 0.31, "grad_norm": 1.0387832078071524, "learning_rate": 1.6186855689264556e-05, "loss": 0.6113, "step": 3803 }, { "epoch": 0.31, "grad_norm": 0.926162906855895, "learning_rate": 1.618478730266255e-05, "loss": 0.572, "step": 3804 }, { "epoch": 0.31, "grad_norm": 0.885613716903195, "learning_rate": 1.6182718487460274e-05, "loss": 0.5771, "step": 3805 }, { "epoch": 0.31, "grad_norm": 0.943864432630467, "learning_rate": 1.618064924380109e-05, "loss": 0.5278, "step": 3806 }, { "epoch": 0.31, "grad_norm": 0.9306896144782546, "learning_rate": 1.6178579571828392e-05, "loss": 0.5818, "step": 3807 }, { "epoch": 0.31, "grad_norm": 0.9218870472956118, "learning_rate": 1.6176509471685616e-05, "loss": 0.5568, "step": 3808 }, { "epoch": 0.31, "grad_norm": 0.8969001171613205, "learning_rate": 1.6174438943516206e-05, "loss": 0.5828, "step": 3809 }, { "epoch": 0.31, "grad_norm": 0.9669693971910587, "learning_rate": 1.617236798746366e-05, "loss": 0.58, "step": 3810 }, { "epoch": 0.31, "grad_norm": 0.9136607722559579, "learning_rate": 1.6170296603671483e-05, "loss": 0.5508, "step": 3811 }, { "epoch": 0.31, "grad_norm": 0.8605010059912521, "learning_rate": 1.6168224792283226e-05, "loss": 0.5363, "step": 3812 }, { "epoch": 0.31, "grad_norm": 0.9501176650973622, "learning_rate": 1.616615255344246e-05, "loss": 0.5826, "step": 3813 }, { "epoch": 0.31, "grad_norm": 1.0011978284197116, "learning_rate": 1.6164079887292795e-05, "loss": 0.6175, "step": 3814 }, { "epoch": 0.31, "grad_norm": 0.8646956502967698, "learning_rate": 1.6162006793977858e-05, "loss": 0.5554, "step": 3815 }, { "epoch": 0.31, "grad_norm": 0.8299241484026281, "learning_rate": 1.615993327364132e-05, "loss": 0.57, "step": 3816 }, { "epoch": 0.31, "grad_norm": 0.9461613392861088, "learning_rate": 1.6157859326426865e-05, "loss": 0.6507, "step": 3817 }, { "epoch": 0.31, "grad_norm": 0.9065221382423563, "learning_rate": 1.6155784952478227e-05, "loss": 0.544, "step": 3818 }, { "epoch": 0.31, "grad_norm": 0.9547433861128523, "learning_rate": 1.6153710151939145e-05, "loss": 0.5617, "step": 3819 }, { "epoch": 0.31, "grad_norm": 0.9843060940520709, "learning_rate": 1.615163492495341e-05, "loss": 0.6266, "step": 3820 }, { "epoch": 0.31, "grad_norm": 0.9216513794765518, "learning_rate": 1.6149559271664835e-05, "loss": 0.5406, "step": 3821 }, { "epoch": 0.31, "grad_norm": 0.8997307743019719, "learning_rate": 1.6147483192217252e-05, "loss": 0.5437, "step": 3822 }, { "epoch": 0.31, "grad_norm": 0.8841110559575263, "learning_rate": 1.614540668675454e-05, "loss": 0.5412, "step": 3823 }, { "epoch": 0.31, "grad_norm": 0.8190597944819852, "learning_rate": 1.6143329755420592e-05, "loss": 0.5199, "step": 3824 }, { "epoch": 0.31, "grad_norm": 0.8811575545591398, "learning_rate": 1.6141252398359347e-05, "loss": 0.5517, "step": 3825 }, { "epoch": 0.31, "grad_norm": 0.9495478226528136, "learning_rate": 1.6139174615714753e-05, "loss": 0.6203, "step": 3826 }, { "epoch": 0.31, "grad_norm": 0.8797998768085065, "learning_rate": 1.6137096407630805e-05, "loss": 0.5824, "step": 3827 }, { "epoch": 0.31, "grad_norm": 0.9478838526700144, "learning_rate": 1.613501777425152e-05, "loss": 0.5624, "step": 3828 }, { "epoch": 0.31, "grad_norm": 0.8509726001493284, "learning_rate": 1.6132938715720946e-05, "loss": 0.5516, "step": 3829 }, { "epoch": 0.31, "grad_norm": 0.9303984729844625, "learning_rate": 1.6130859232183155e-05, "loss": 0.5794, "step": 3830 }, { "epoch": 0.31, "grad_norm": 0.8570119246962986, "learning_rate": 1.612877932378226e-05, "loss": 0.5966, "step": 3831 }, { "epoch": 0.31, "grad_norm": 0.903863068393604, "learning_rate": 1.6126698990662393e-05, "loss": 0.6146, "step": 3832 }, { "epoch": 0.31, "grad_norm": 0.910826964802132, "learning_rate": 1.6124618232967722e-05, "loss": 0.5759, "step": 3833 }, { "epoch": 0.31, "grad_norm": 0.8503801686254258, "learning_rate": 1.6122537050842443e-05, "loss": 0.5785, "step": 3834 }, { "epoch": 0.31, "grad_norm": 0.8646395305483849, "learning_rate": 1.612045544443077e-05, "loss": 0.5956, "step": 3835 }, { "epoch": 0.31, "grad_norm": 0.8979625542240427, "learning_rate": 1.611837341387697e-05, "loss": 0.5682, "step": 3836 }, { "epoch": 0.31, "grad_norm": 0.877739368460374, "learning_rate": 1.6116290959325318e-05, "loss": 0.567, "step": 3837 }, { "epoch": 0.31, "grad_norm": 0.8941178925109462, "learning_rate": 1.6114208080920125e-05, "loss": 0.4819, "step": 3838 }, { "epoch": 0.31, "grad_norm": 0.9798634018310648, "learning_rate": 1.6112124778805734e-05, "loss": 0.6096, "step": 3839 }, { "epoch": 0.31, "grad_norm": 0.9032849513071829, "learning_rate": 1.611004105312652e-05, "loss": 0.4802, "step": 3840 }, { "epoch": 0.31, "grad_norm": 0.9156165559397718, "learning_rate": 1.610795690402688e-05, "loss": 0.5825, "step": 3841 }, { "epoch": 0.31, "grad_norm": 0.9257049329284944, "learning_rate": 1.6105872331651245e-05, "loss": 0.5467, "step": 3842 }, { "epoch": 0.31, "grad_norm": 0.9240778610764073, "learning_rate": 1.610378733614407e-05, "loss": 0.6246, "step": 3843 }, { "epoch": 0.31, "grad_norm": 0.9901370283969468, "learning_rate": 1.6101701917649852e-05, "loss": 0.6262, "step": 3844 }, { "epoch": 0.31, "grad_norm": 1.0349073072890402, "learning_rate": 1.60996160763131e-05, "loss": 0.5461, "step": 3845 }, { "epoch": 0.31, "grad_norm": 0.8941247919258477, "learning_rate": 1.6097529812278364e-05, "loss": 0.5748, "step": 3846 }, { "epoch": 0.31, "grad_norm": 0.8401322694110076, "learning_rate": 1.6095443125690222e-05, "loss": 0.4761, "step": 3847 }, { "epoch": 0.31, "grad_norm": 0.9457057504482168, "learning_rate": 1.609335601669328e-05, "loss": 0.6018, "step": 3848 }, { "epoch": 0.31, "grad_norm": 0.898059756360942, "learning_rate": 1.6091268485432165e-05, "loss": 0.6018, "step": 3849 }, { "epoch": 0.31, "grad_norm": 0.9574558404478161, "learning_rate": 1.6089180532051552e-05, "loss": 0.5048, "step": 3850 }, { "epoch": 0.31, "grad_norm": 0.9188939124978395, "learning_rate": 1.6087092156696127e-05, "loss": 0.5522, "step": 3851 }, { "epoch": 0.31, "grad_norm": 0.9691554158650906, "learning_rate": 1.6085003359510616e-05, "loss": 0.5651, "step": 3852 }, { "epoch": 0.31, "grad_norm": 0.9953360714177077, "learning_rate": 1.6082914140639768e-05, "loss": 0.5312, "step": 3853 }, { "epoch": 0.31, "grad_norm": 0.9516548297556112, "learning_rate": 1.6080824500228367e-05, "loss": 0.5831, "step": 3854 }, { "epoch": 0.31, "grad_norm": 0.9744591097344041, "learning_rate": 1.607873443842122e-05, "loss": 0.5357, "step": 3855 }, { "epoch": 0.31, "grad_norm": 0.9916786491593069, "learning_rate": 1.607664395536317e-05, "loss": 0.5184, "step": 3856 }, { "epoch": 0.31, "grad_norm": 1.0783830257128157, "learning_rate": 1.6074553051199084e-05, "loss": 0.589, "step": 3857 }, { "epoch": 0.31, "grad_norm": 0.9327083156351761, "learning_rate": 1.6072461726073856e-05, "loss": 0.6027, "step": 3858 }, { "epoch": 0.31, "grad_norm": 1.072289447924501, "learning_rate": 1.6070369980132425e-05, "loss": 0.6064, "step": 3859 }, { "epoch": 0.31, "grad_norm": 0.8299534240728982, "learning_rate": 1.6068277813519733e-05, "loss": 0.5044, "step": 3860 }, { "epoch": 0.31, "grad_norm": 0.9522751323222246, "learning_rate": 1.606618522638077e-05, "loss": 0.537, "step": 3861 }, { "epoch": 0.31, "grad_norm": 0.9331646723727852, "learning_rate": 1.6064092218860553e-05, "loss": 0.5846, "step": 3862 }, { "epoch": 0.31, "grad_norm": 0.9322832535527018, "learning_rate": 1.6061998791104125e-05, "loss": 0.5319, "step": 3863 }, { "epoch": 0.31, "grad_norm": 0.8936974228375246, "learning_rate": 1.6059904943256557e-05, "loss": 0.5557, "step": 3864 }, { "epoch": 0.31, "grad_norm": 0.8527992025507102, "learning_rate": 1.605781067546295e-05, "loss": 0.5803, "step": 3865 }, { "epoch": 0.31, "grad_norm": 0.858642960030146, "learning_rate": 1.605571598786844e-05, "loss": 0.5272, "step": 3866 }, { "epoch": 0.31, "grad_norm": 0.7971785297359876, "learning_rate": 1.605362088061818e-05, "loss": 0.5018, "step": 3867 }, { "epoch": 0.31, "grad_norm": 0.8986389122506443, "learning_rate": 1.6051525353857364e-05, "loss": 0.5706, "step": 3868 }, { "epoch": 0.31, "grad_norm": 0.9365303829334433, "learning_rate": 1.604942940773121e-05, "loss": 0.6165, "step": 3869 }, { "epoch": 0.31, "grad_norm": 1.0017125459415919, "learning_rate": 1.604733304238496e-05, "loss": 0.5739, "step": 3870 }, { "epoch": 0.31, "grad_norm": 0.9031465462671923, "learning_rate": 1.60452362579639e-05, "loss": 0.6438, "step": 3871 }, { "epoch": 0.31, "grad_norm": 1.0046701734849914, "learning_rate": 1.6043139054613326e-05, "loss": 0.5714, "step": 3872 }, { "epoch": 0.31, "grad_norm": 0.9879187604061295, "learning_rate": 1.6041041432478573e-05, "loss": 0.5833, "step": 3873 }, { "epoch": 0.31, "grad_norm": 0.9316863553413524, "learning_rate": 1.603894339170501e-05, "loss": 0.6159, "step": 3874 }, { "epoch": 0.31, "grad_norm": 0.946948808147859, "learning_rate": 1.6036844932438028e-05, "loss": 0.5334, "step": 3875 }, { "epoch": 0.32, "grad_norm": 0.997715143378706, "learning_rate": 1.603474605482305e-05, "loss": 0.6068, "step": 3876 }, { "epoch": 0.32, "grad_norm": 1.0769994222963795, "learning_rate": 1.6032646759005515e-05, "loss": 0.6482, "step": 3877 }, { "epoch": 0.32, "grad_norm": 0.9817520043293302, "learning_rate": 1.6030547045130912e-05, "loss": 0.5853, "step": 3878 }, { "epoch": 0.32, "grad_norm": 0.9648721660602143, "learning_rate": 1.6028446913344754e-05, "loss": 0.5616, "step": 3879 }, { "epoch": 0.32, "grad_norm": 0.9278261013834327, "learning_rate": 1.6026346363792565e-05, "loss": 0.5881, "step": 3880 }, { "epoch": 0.32, "grad_norm": 0.976882951080119, "learning_rate": 1.6024245396619923e-05, "loss": 0.5802, "step": 3881 }, { "epoch": 0.32, "grad_norm": 0.982550166661968, "learning_rate": 1.6022144011972415e-05, "loss": 0.588, "step": 3882 }, { "epoch": 0.32, "grad_norm": 0.8778654274821055, "learning_rate": 1.6020042209995674e-05, "loss": 0.5717, "step": 3883 }, { "epoch": 0.32, "grad_norm": 0.9761246649548841, "learning_rate": 1.601793999083534e-05, "loss": 0.5979, "step": 3884 }, { "epoch": 0.32, "grad_norm": 0.9127386804169086, "learning_rate": 1.601583735463711e-05, "loss": 0.5232, "step": 3885 }, { "epoch": 0.32, "grad_norm": 0.943428272432976, "learning_rate": 1.6013734301546682e-05, "loss": 0.596, "step": 3886 }, { "epoch": 0.32, "grad_norm": 0.8873528540453057, "learning_rate": 1.6011630831709802e-05, "loss": 0.4994, "step": 3887 }, { "epoch": 0.32, "grad_norm": 1.0020297517768617, "learning_rate": 1.6009526945272243e-05, "loss": 0.6594, "step": 3888 }, { "epoch": 0.32, "grad_norm": 0.969974182360959, "learning_rate": 1.600742264237979e-05, "loss": 0.4831, "step": 3889 }, { "epoch": 0.32, "grad_norm": 0.8565424953400176, "learning_rate": 1.600531792317828e-05, "loss": 0.5725, "step": 3890 }, { "epoch": 0.32, "grad_norm": 0.943954280881943, "learning_rate": 1.6003212787813566e-05, "loss": 0.5465, "step": 3891 }, { "epoch": 0.32, "grad_norm": 0.8430972074442779, "learning_rate": 1.6001107236431525e-05, "loss": 0.5275, "step": 3892 }, { "epoch": 0.32, "grad_norm": 0.8757053789419513, "learning_rate": 1.5999001269178082e-05, "loss": 0.5166, "step": 3893 }, { "epoch": 0.32, "grad_norm": 1.0103937818297317, "learning_rate": 1.5996894886199167e-05, "loss": 0.578, "step": 3894 }, { "epoch": 0.32, "grad_norm": 0.9004178001640264, "learning_rate": 1.599478808764076e-05, "loss": 0.6012, "step": 3895 }, { "epoch": 0.32, "grad_norm": 0.9064418582095709, "learning_rate": 1.5992680873648852e-05, "loss": 0.5688, "step": 3896 }, { "epoch": 0.32, "grad_norm": 0.913659760784911, "learning_rate": 1.5990573244369478e-05, "loss": 0.5659, "step": 3897 }, { "epoch": 0.32, "grad_norm": 1.0124956917343046, "learning_rate": 1.5988465199948692e-05, "loss": 0.6485, "step": 3898 }, { "epoch": 0.32, "grad_norm": 0.8901834527319934, "learning_rate": 1.5986356740532577e-05, "loss": 0.5982, "step": 3899 }, { "epoch": 0.32, "grad_norm": 0.8764696457969312, "learning_rate": 1.5984247866267253e-05, "loss": 0.5469, "step": 3900 }, { "epoch": 0.32, "grad_norm": 0.9406358564079779, "learning_rate": 1.5982138577298857e-05, "loss": 0.5012, "step": 3901 }, { "epoch": 0.32, "grad_norm": 0.8906184319109551, "learning_rate": 1.5980028873773563e-05, "loss": 0.5248, "step": 3902 }, { "epoch": 0.32, "grad_norm": 0.9257365052996489, "learning_rate": 1.5977918755837576e-05, "loss": 0.5771, "step": 3903 }, { "epoch": 0.32, "grad_norm": 0.8697721075412118, "learning_rate": 1.5975808223637117e-05, "loss": 0.6153, "step": 3904 }, { "epoch": 0.32, "grad_norm": 0.9582866753626065, "learning_rate": 1.5973697277318452e-05, "loss": 0.6421, "step": 3905 }, { "epoch": 0.32, "grad_norm": 0.9999765624853517, "learning_rate": 1.5971585917027864e-05, "loss": 0.6416, "step": 3906 }, { "epoch": 0.32, "grad_norm": 0.9084148944426773, "learning_rate": 1.596947414291167e-05, "loss": 0.5444, "step": 3907 }, { "epoch": 0.32, "grad_norm": 0.9706443582125923, "learning_rate": 1.5967361955116207e-05, "loss": 0.5201, "step": 3908 }, { "epoch": 0.32, "grad_norm": 0.9972933163735651, "learning_rate": 1.596524935378786e-05, "loss": 0.5934, "step": 3909 }, { "epoch": 0.32, "grad_norm": 0.8746715492924646, "learning_rate": 1.5963136339073023e-05, "loss": 0.5472, "step": 3910 }, { "epoch": 0.32, "grad_norm": 0.8610502255050615, "learning_rate": 1.5961022911118124e-05, "loss": 0.6129, "step": 3911 }, { "epoch": 0.32, "grad_norm": 0.908968711863703, "learning_rate": 1.5958909070069627e-05, "loss": 0.5596, "step": 3912 }, { "epoch": 0.32, "grad_norm": 0.8760887786049828, "learning_rate": 1.5956794816074015e-05, "loss": 0.5631, "step": 3913 }, { "epoch": 0.32, "grad_norm": 0.9159331172803, "learning_rate": 1.5954680149277807e-05, "loss": 0.5787, "step": 3914 }, { "epoch": 0.32, "grad_norm": 0.8659525657404382, "learning_rate": 1.5952565069827544e-05, "loss": 0.47, "step": 3915 }, { "epoch": 0.32, "grad_norm": 0.8250608098886822, "learning_rate": 1.5950449577869807e-05, "loss": 0.5444, "step": 3916 }, { "epoch": 0.32, "grad_norm": 0.8310286356585561, "learning_rate": 1.594833367355119e-05, "loss": 0.4955, "step": 3917 }, { "epoch": 0.32, "grad_norm": 0.940905437018659, "learning_rate": 1.5946217357018322e-05, "loss": 0.61, "step": 3918 }, { "epoch": 0.32, "grad_norm": 0.9313010060308121, "learning_rate": 1.594410062841787e-05, "loss": 0.5724, "step": 3919 }, { "epoch": 0.32, "grad_norm": 0.9309610664870516, "learning_rate": 1.5941983487896515e-05, "loss": 0.5331, "step": 3920 }, { "epoch": 0.32, "grad_norm": 0.8901671550634485, "learning_rate": 1.5939865935600976e-05, "loss": 0.5906, "step": 3921 }, { "epoch": 0.32, "grad_norm": 0.9543630353086977, "learning_rate": 1.5937747971677996e-05, "loss": 0.5637, "step": 3922 }, { "epoch": 0.32, "grad_norm": 0.8939676646410317, "learning_rate": 1.5935629596274345e-05, "loss": 0.5744, "step": 3923 }, { "epoch": 0.32, "grad_norm": 0.8888549200288502, "learning_rate": 1.593351080953683e-05, "loss": 0.5727, "step": 3924 }, { "epoch": 0.32, "grad_norm": 0.8968565334675708, "learning_rate": 1.5931391611612283e-05, "loss": 0.5727, "step": 3925 }, { "epoch": 0.32, "grad_norm": 0.9970235070283097, "learning_rate": 1.5929272002647554e-05, "loss": 0.6307, "step": 3926 }, { "epoch": 0.32, "grad_norm": 0.8795770579019754, "learning_rate": 1.5927151982789535e-05, "loss": 0.5426, "step": 3927 }, { "epoch": 0.32, "grad_norm": 0.8724285002585467, "learning_rate": 1.592503155218514e-05, "loss": 0.5284, "step": 3928 }, { "epoch": 0.32, "grad_norm": 0.909354024769601, "learning_rate": 1.592291071098132e-05, "loss": 0.5683, "step": 3929 }, { "epoch": 0.32, "grad_norm": 1.004209989415013, "learning_rate": 1.5920789459325034e-05, "loss": 0.563, "step": 3930 }, { "epoch": 0.32, "grad_norm": 0.9043095354424483, "learning_rate": 1.5918667797363295e-05, "loss": 0.5364, "step": 3931 }, { "epoch": 0.32, "grad_norm": 0.9581734823349979, "learning_rate": 1.5916545725243124e-05, "loss": 0.5751, "step": 3932 }, { "epoch": 0.32, "grad_norm": 0.9672287773164598, "learning_rate": 1.5914423243111582e-05, "loss": 0.5979, "step": 3933 }, { "epoch": 0.32, "grad_norm": 0.9256755293346672, "learning_rate": 1.591230035111576e-05, "loss": 0.5826, "step": 3934 }, { "epoch": 0.32, "grad_norm": 0.9089934348766084, "learning_rate": 1.5910177049402762e-05, "loss": 0.579, "step": 3935 }, { "epoch": 0.32, "grad_norm": 0.9235722253896989, "learning_rate": 1.5908053338119743e-05, "loss": 0.5909, "step": 3936 }, { "epoch": 0.32, "grad_norm": 0.9773177163752066, "learning_rate": 1.590592921741386e-05, "loss": 0.6234, "step": 3937 }, { "epoch": 0.32, "grad_norm": 0.9177701352719017, "learning_rate": 1.5903804687432325e-05, "loss": 0.5361, "step": 3938 }, { "epoch": 0.32, "grad_norm": 0.8916835330713223, "learning_rate": 1.5901679748322367e-05, "loss": 0.5138, "step": 3939 }, { "epoch": 0.32, "grad_norm": 1.0161600179707424, "learning_rate": 1.5899554400231233e-05, "loss": 0.545, "step": 3940 }, { "epoch": 0.32, "grad_norm": 0.9160973920565795, "learning_rate": 1.5897428643306207e-05, "loss": 0.5696, "step": 3941 }, { "epoch": 0.32, "grad_norm": 0.9168375963244876, "learning_rate": 1.5895302477694614e-05, "loss": 0.5171, "step": 3942 }, { "epoch": 0.32, "grad_norm": 0.9596387856101649, "learning_rate": 1.5893175903543788e-05, "loss": 0.557, "step": 3943 }, { "epoch": 0.32, "grad_norm": 0.8806056535691328, "learning_rate": 1.5891048921001094e-05, "loss": 0.5787, "step": 3944 }, { "epoch": 0.32, "grad_norm": 0.9184353595815069, "learning_rate": 1.5888921530213938e-05, "loss": 0.5489, "step": 3945 }, { "epoch": 0.32, "grad_norm": 0.9008051585907707, "learning_rate": 1.5886793731329743e-05, "loss": 0.5742, "step": 3946 }, { "epoch": 0.32, "grad_norm": 0.9214844580354372, "learning_rate": 1.5884665524495965e-05, "loss": 0.633, "step": 3947 }, { "epoch": 0.32, "grad_norm": 1.0020293010722414, "learning_rate": 1.5882536909860086e-05, "loss": 0.5728, "step": 3948 }, { "epoch": 0.32, "grad_norm": 0.8578081842433017, "learning_rate": 1.5880407887569617e-05, "loss": 0.5171, "step": 3949 }, { "epoch": 0.32, "grad_norm": 0.8587555597349382, "learning_rate": 1.5878278457772095e-05, "loss": 0.5575, "step": 3950 }, { "epoch": 0.32, "grad_norm": 1.0361111128796203, "learning_rate": 1.5876148620615094e-05, "loss": 0.5906, "step": 3951 }, { "epoch": 0.32, "grad_norm": 0.8893338118813109, "learning_rate": 1.5874018376246204e-05, "loss": 0.5418, "step": 3952 }, { "epoch": 0.32, "grad_norm": 0.9951994215390055, "learning_rate": 1.587188772481305e-05, "loss": 0.6434, "step": 3953 }, { "epoch": 0.32, "grad_norm": 0.8760987314596554, "learning_rate": 1.586975666646328e-05, "loss": 0.5719, "step": 3954 }, { "epoch": 0.32, "grad_norm": 0.9741708146173176, "learning_rate": 1.586762520134459e-05, "loss": 0.5114, "step": 3955 }, { "epoch": 0.32, "grad_norm": 0.9053015926217912, "learning_rate": 1.586549332960467e-05, "loss": 0.5896, "step": 3956 }, { "epoch": 0.32, "grad_norm": 1.0322490493095118, "learning_rate": 1.586336105139127e-05, "loss": 0.6103, "step": 3957 }, { "epoch": 0.32, "grad_norm": 0.912987608954962, "learning_rate": 1.5861228366852148e-05, "loss": 0.5358, "step": 3958 }, { "epoch": 0.32, "grad_norm": 0.945625272219544, "learning_rate": 1.58590952761351e-05, "loss": 0.547, "step": 3959 }, { "epoch": 0.32, "grad_norm": 1.0679542684563517, "learning_rate": 1.5856961779387945e-05, "loss": 0.5971, "step": 3960 }, { "epoch": 0.32, "grad_norm": 0.8642824221120665, "learning_rate": 1.5854827876758535e-05, "loss": 0.5196, "step": 3961 }, { "epoch": 0.32, "grad_norm": 0.990179303335577, "learning_rate": 1.5852693568394743e-05, "loss": 0.5959, "step": 3962 }, { "epoch": 0.32, "grad_norm": 0.8666422528701844, "learning_rate": 1.585055885444448e-05, "loss": 0.4666, "step": 3963 }, { "epoch": 0.32, "grad_norm": 0.9884825573976187, "learning_rate": 1.584842373505568e-05, "loss": 0.5883, "step": 3964 }, { "epoch": 0.32, "grad_norm": 0.8598450286175446, "learning_rate": 1.58462882103763e-05, "loss": 0.5026, "step": 3965 }, { "epoch": 0.32, "grad_norm": 0.8665646932315791, "learning_rate": 1.5844152280554333e-05, "loss": 0.4779, "step": 3966 }, { "epoch": 0.32, "grad_norm": 0.9650599543426119, "learning_rate": 1.5842015945737798e-05, "loss": 0.6127, "step": 3967 }, { "epoch": 0.32, "grad_norm": 0.8378359622706862, "learning_rate": 1.583987920607474e-05, "loss": 0.5244, "step": 3968 }, { "epoch": 0.32, "grad_norm": 0.9662255575979611, "learning_rate": 1.583774206171323e-05, "loss": 0.6374, "step": 3969 }, { "epoch": 0.32, "grad_norm": 0.8429884326613644, "learning_rate": 1.5835604512801375e-05, "loss": 0.553, "step": 3970 }, { "epoch": 0.32, "grad_norm": 0.9376564970901118, "learning_rate": 1.5833466559487305e-05, "loss": 0.5976, "step": 3971 }, { "epoch": 0.32, "grad_norm": 0.9324971037844922, "learning_rate": 1.5831328201919175e-05, "loss": 0.6169, "step": 3972 }, { "epoch": 0.32, "grad_norm": 1.0128425702126547, "learning_rate": 1.5829189440245175e-05, "loss": 0.6142, "step": 3973 }, { "epoch": 0.32, "grad_norm": 1.0372645855687352, "learning_rate": 1.5827050274613512e-05, "loss": 0.6401, "step": 3974 }, { "epoch": 0.32, "grad_norm": 0.9380399027238666, "learning_rate": 1.5824910705172437e-05, "loss": 0.5905, "step": 3975 }, { "epoch": 0.32, "grad_norm": 0.9415163524530699, "learning_rate": 1.5822770732070222e-05, "loss": 0.5514, "step": 3976 }, { "epoch": 0.32, "grad_norm": 0.866600935516856, "learning_rate": 1.5820630355455155e-05, "loss": 0.5763, "step": 3977 }, { "epoch": 0.32, "grad_norm": 0.9152366905386166, "learning_rate": 1.5818489575475564e-05, "loss": 0.5363, "step": 3978 }, { "epoch": 0.32, "grad_norm": 0.9422393689041517, "learning_rate": 1.5816348392279814e-05, "loss": 0.6887, "step": 3979 }, { "epoch": 0.32, "grad_norm": 0.9066783559458047, "learning_rate": 1.5814206806016273e-05, "loss": 0.6047, "step": 3980 }, { "epoch": 0.32, "grad_norm": 0.9600494430271688, "learning_rate": 1.581206481683336e-05, "loss": 0.6092, "step": 3981 }, { "epoch": 0.32, "grad_norm": 0.8981869604826622, "learning_rate": 1.580992242487951e-05, "loss": 0.5736, "step": 3982 }, { "epoch": 0.32, "grad_norm": 0.8496609966141786, "learning_rate": 1.580777963030319e-05, "loss": 0.5108, "step": 3983 }, { "epoch": 0.32, "grad_norm": 0.8995294057437336, "learning_rate": 1.5805636433252892e-05, "loss": 0.5595, "step": 3984 }, { "epoch": 0.32, "grad_norm": 0.8649395089955623, "learning_rate": 1.5803492833877143e-05, "loss": 0.5506, "step": 3985 }, { "epoch": 0.32, "grad_norm": 0.9194681120880075, "learning_rate": 1.5801348832324483e-05, "loss": 0.5092, "step": 3986 }, { "epoch": 0.32, "grad_norm": 0.9022113242229857, "learning_rate": 1.5799204428743497e-05, "loss": 0.5777, "step": 3987 }, { "epoch": 0.32, "grad_norm": 0.9521786178423841, "learning_rate": 1.5797059623282787e-05, "loss": 0.5979, "step": 3988 }, { "epoch": 0.32, "grad_norm": 0.975208854083481, "learning_rate": 1.5794914416090988e-05, "loss": 0.5444, "step": 3989 }, { "epoch": 0.32, "grad_norm": 1.0034484704900803, "learning_rate": 1.579276880731676e-05, "loss": 0.6227, "step": 3990 }, { "epoch": 0.32, "grad_norm": 1.0597278667233871, "learning_rate": 1.579062279710879e-05, "loss": 0.6034, "step": 3991 }, { "epoch": 0.32, "grad_norm": 0.8784580768653654, "learning_rate": 1.57884763856158e-05, "loss": 0.5405, "step": 3992 }, { "epoch": 0.32, "grad_norm": 0.9531035237183869, "learning_rate": 1.5786329572986527e-05, "loss": 0.6006, "step": 3993 }, { "epoch": 0.32, "grad_norm": 0.9197563734914485, "learning_rate": 1.578418235936975e-05, "loss": 0.5324, "step": 3994 }, { "epoch": 0.32, "grad_norm": 0.9454280511184845, "learning_rate": 1.5782034744914264e-05, "loss": 0.5458, "step": 3995 }, { "epoch": 0.32, "grad_norm": 0.8474779750455188, "learning_rate": 1.57798867297689e-05, "loss": 0.5221, "step": 3996 }, { "epoch": 0.32, "grad_norm": 0.9766094107559433, "learning_rate": 1.5777738314082514e-05, "loss": 0.6148, "step": 3997 }, { "epoch": 0.32, "grad_norm": 0.9217892458525768, "learning_rate": 1.5775589498003984e-05, "loss": 0.5567, "step": 3998 }, { "epoch": 0.33, "grad_norm": 0.9409235290685977, "learning_rate": 1.5773440281682226e-05, "loss": 0.5966, "step": 3999 }, { "epoch": 0.33, "grad_norm": 0.904426711240461, "learning_rate": 1.577129066526618e-05, "loss": 0.553, "step": 4000 }, { "epoch": 0.33, "grad_norm": 0.9370662463188123, "learning_rate": 1.5769140648904806e-05, "loss": 0.5454, "step": 4001 }, { "epoch": 0.33, "grad_norm": 0.8846935856523952, "learning_rate": 1.5766990232747106e-05, "loss": 0.5365, "step": 4002 }, { "epoch": 0.33, "grad_norm": 0.9434664117402615, "learning_rate": 1.5764839416942097e-05, "loss": 0.5992, "step": 4003 }, { "epoch": 0.33, "grad_norm": 1.3096888551499557, "learning_rate": 1.576268820163883e-05, "loss": 0.521, "step": 4004 }, { "epoch": 0.33, "grad_norm": 0.9698155276466377, "learning_rate": 1.576053658698638e-05, "loss": 0.6299, "step": 4005 }, { "epoch": 0.33, "grad_norm": 0.9371335851184125, "learning_rate": 1.5758384573133857e-05, "loss": 0.5855, "step": 4006 }, { "epoch": 0.33, "grad_norm": 1.0166311336574105, "learning_rate": 1.5756232160230388e-05, "loss": 0.6351, "step": 4007 }, { "epoch": 0.33, "grad_norm": 0.9017351451582668, "learning_rate": 1.5754079348425137e-05, "loss": 0.5309, "step": 4008 }, { "epoch": 0.33, "grad_norm": 1.0872595226025266, "learning_rate": 1.575192613786729e-05, "loss": 0.5589, "step": 4009 }, { "epoch": 0.33, "grad_norm": 0.9429036522959239, "learning_rate": 1.574977252870607e-05, "loss": 0.5597, "step": 4010 }, { "epoch": 0.33, "grad_norm": 1.006473815437657, "learning_rate": 1.5747618521090706e-05, "loss": 0.6188, "step": 4011 }, { "epoch": 0.33, "grad_norm": 0.9831280828790899, "learning_rate": 1.574546411517048e-05, "loss": 0.5073, "step": 4012 }, { "epoch": 0.33, "grad_norm": 0.9614882582761742, "learning_rate": 1.5743309311094687e-05, "loss": 0.5916, "step": 4013 }, { "epoch": 0.33, "grad_norm": 0.9834440770393784, "learning_rate": 1.574115410901265e-05, "loss": 0.5796, "step": 4014 }, { "epoch": 0.33, "grad_norm": 0.862865878132095, "learning_rate": 1.573899850907373e-05, "loss": 0.5184, "step": 4015 }, { "epoch": 0.33, "grad_norm": 0.9369647382877944, "learning_rate": 1.5736842511427302e-05, "loss": 0.5914, "step": 4016 }, { "epoch": 0.33, "grad_norm": 0.8918627312384875, "learning_rate": 1.5734686116222775e-05, "loss": 0.502, "step": 4017 }, { "epoch": 0.33, "grad_norm": 0.976668862121526, "learning_rate": 1.573252932360959e-05, "loss": 0.5887, "step": 4018 }, { "epoch": 0.33, "grad_norm": 0.8604324291777963, "learning_rate": 1.5730372133737206e-05, "loss": 0.5676, "step": 4019 }, { "epoch": 0.33, "grad_norm": 0.9355398542457567, "learning_rate": 1.5728214546755117e-05, "loss": 0.5784, "step": 4020 }, { "epoch": 0.33, "grad_norm": 1.0053396355524888, "learning_rate": 1.572605656281284e-05, "loss": 0.5561, "step": 4021 }, { "epoch": 0.33, "grad_norm": 0.8865428977414068, "learning_rate": 1.572389818205992e-05, "loss": 0.5344, "step": 4022 }, { "epoch": 0.33, "grad_norm": 0.917616140853694, "learning_rate": 1.5721739404645937e-05, "loss": 0.5448, "step": 4023 }, { "epoch": 0.33, "grad_norm": 2.0039270053413536, "learning_rate": 1.5719580230720485e-05, "loss": 0.5648, "step": 4024 }, { "epoch": 0.33, "grad_norm": 0.8715238705686509, "learning_rate": 1.57174206604332e-05, "loss": 0.6044, "step": 4025 }, { "epoch": 0.33, "grad_norm": 0.9609013329719558, "learning_rate": 1.571526069393373e-05, "loss": 0.5953, "step": 4026 }, { "epoch": 0.33, "grad_norm": 0.9475604172383607, "learning_rate": 1.5713100331371768e-05, "loss": 0.5605, "step": 4027 }, { "epoch": 0.33, "grad_norm": 0.8078213940067273, "learning_rate": 1.5710939572897018e-05, "loss": 0.5468, "step": 4028 }, { "epoch": 0.33, "grad_norm": 0.9691697196000543, "learning_rate": 1.570877841865922e-05, "loss": 0.5711, "step": 4029 }, { "epoch": 0.33, "grad_norm": 1.005924058700349, "learning_rate": 1.5706616868808142e-05, "loss": 0.5627, "step": 4030 }, { "epoch": 0.33, "grad_norm": 0.8603428183105295, "learning_rate": 1.5704454923493577e-05, "loss": 0.5344, "step": 4031 }, { "epoch": 0.33, "grad_norm": 1.0494686065684906, "learning_rate": 1.570229258286535e-05, "loss": 0.5895, "step": 4032 }, { "epoch": 0.33, "grad_norm": 0.8563575513133033, "learning_rate": 1.5700129847073298e-05, "loss": 0.5631, "step": 4033 }, { "epoch": 0.33, "grad_norm": 1.0982265801325355, "learning_rate": 1.5697966716267308e-05, "loss": 0.6157, "step": 4034 }, { "epoch": 0.33, "grad_norm": 0.8426234383113499, "learning_rate": 1.5695803190597275e-05, "loss": 0.5765, "step": 4035 }, { "epoch": 0.33, "grad_norm": 0.9077511704024396, "learning_rate": 1.5693639270213138e-05, "loss": 0.5276, "step": 4036 }, { "epoch": 0.33, "grad_norm": 0.8654428007933888, "learning_rate": 1.5691474955264848e-05, "loss": 0.4938, "step": 4037 }, { "epoch": 0.33, "grad_norm": 0.9792980677325999, "learning_rate": 1.568931024590239e-05, "loss": 0.5882, "step": 4038 }, { "epoch": 0.33, "grad_norm": 1.0353408848047134, "learning_rate": 1.568714514227578e-05, "loss": 0.6413, "step": 4039 }, { "epoch": 0.33, "grad_norm": 0.8556489487154524, "learning_rate": 1.5684979644535053e-05, "loss": 0.5847, "step": 4040 }, { "epoch": 0.33, "grad_norm": 0.9468726518340129, "learning_rate": 1.5682813752830284e-05, "loss": 0.5498, "step": 4041 }, { "epoch": 0.33, "grad_norm": 0.9710687607873638, "learning_rate": 1.568064746731156e-05, "loss": 0.5876, "step": 4042 }, { "epoch": 0.33, "grad_norm": 0.8165640547630525, "learning_rate": 1.5678480788129003e-05, "loss": 0.5817, "step": 4043 }, { "epoch": 0.33, "grad_norm": 0.8358262932296227, "learning_rate": 1.567631371543277e-05, "loss": 0.5138, "step": 4044 }, { "epoch": 0.33, "grad_norm": 0.8043492548198713, "learning_rate": 1.5674146249373027e-05, "loss": 0.5326, "step": 4045 }, { "epoch": 0.33, "grad_norm": 0.8762669905406324, "learning_rate": 1.5671978390099985e-05, "loss": 0.5032, "step": 4046 }, { "epoch": 0.33, "grad_norm": 0.962549549006086, "learning_rate": 1.566981013776387e-05, "loss": 0.5345, "step": 4047 }, { "epoch": 0.33, "grad_norm": 0.8584925617245548, "learning_rate": 1.5667641492514942e-05, "loss": 0.4845, "step": 4048 }, { "epoch": 0.33, "grad_norm": 0.9768208555080496, "learning_rate": 1.5665472454503484e-05, "loss": 0.6085, "step": 4049 }, { "epoch": 0.33, "grad_norm": 0.8910712523051837, "learning_rate": 1.566330302387981e-05, "loss": 0.5695, "step": 4050 }, { "epoch": 0.33, "grad_norm": 0.8993957926471542, "learning_rate": 1.566113320079426e-05, "loss": 0.5671, "step": 4051 }, { "epoch": 0.33, "grad_norm": 0.9509042340172726, "learning_rate": 1.5658962985397202e-05, "loss": 0.5668, "step": 4052 }, { "epoch": 0.33, "grad_norm": 0.8274785334369242, "learning_rate": 1.565679237783903e-05, "loss": 0.5048, "step": 4053 }, { "epoch": 0.33, "grad_norm": 0.8602365961998485, "learning_rate": 1.565462137827016e-05, "loss": 0.5589, "step": 4054 }, { "epoch": 0.33, "grad_norm": 0.9312739718943414, "learning_rate": 1.5652449986841048e-05, "loss": 0.574, "step": 4055 }, { "epoch": 0.33, "grad_norm": 0.8972888462688227, "learning_rate": 1.5650278203702162e-05, "loss": 0.5738, "step": 4056 }, { "epoch": 0.33, "grad_norm": 0.9849922783424293, "learning_rate": 1.564810602900401e-05, "loss": 0.6366, "step": 4057 }, { "epoch": 0.33, "grad_norm": 0.9266568698973598, "learning_rate": 1.564593346289712e-05, "loss": 0.5807, "step": 4058 }, { "epoch": 0.33, "grad_norm": 0.9689728059389319, "learning_rate": 1.564376050553205e-05, "loss": 0.6002, "step": 4059 }, { "epoch": 0.33, "grad_norm": 0.9657283266055987, "learning_rate": 1.564158715705938e-05, "loss": 0.5452, "step": 4060 }, { "epoch": 0.33, "grad_norm": 0.9252710654263362, "learning_rate": 1.563941341762973e-05, "loss": 0.6325, "step": 4061 }, { "epoch": 0.33, "grad_norm": 0.9067245498427078, "learning_rate": 1.5637239287393725e-05, "loss": 0.601, "step": 4062 }, { "epoch": 0.33, "grad_norm": 0.9155849496185144, "learning_rate": 1.5635064766502042e-05, "loss": 0.5615, "step": 4063 }, { "epoch": 0.33, "grad_norm": 0.8936140240745216, "learning_rate": 1.563288985510537e-05, "loss": 0.5471, "step": 4064 }, { "epoch": 0.33, "grad_norm": 0.942245882106811, "learning_rate": 1.5630714553354425e-05, "loss": 0.5919, "step": 4065 }, { "epoch": 0.33, "grad_norm": 0.9363613460777342, "learning_rate": 1.5628538861399956e-05, "loss": 0.5929, "step": 4066 }, { "epoch": 0.33, "grad_norm": 1.0024484438838643, "learning_rate": 1.5626362779392738e-05, "loss": 0.6062, "step": 4067 }, { "epoch": 0.33, "grad_norm": 0.8589020510416536, "learning_rate": 1.562418630748357e-05, "loss": 0.5201, "step": 4068 }, { "epoch": 0.33, "grad_norm": 0.859399789929067, "learning_rate": 1.5622009445823274e-05, "loss": 0.5157, "step": 4069 }, { "epoch": 0.33, "grad_norm": 0.9655223138192177, "learning_rate": 1.5619832194562716e-05, "loss": 0.5716, "step": 4070 }, { "epoch": 0.33, "grad_norm": 0.9628342084902699, "learning_rate": 1.561765455385277e-05, "loss": 0.5767, "step": 4071 }, { "epoch": 0.33, "grad_norm": 0.9572679965465991, "learning_rate": 1.5615476523844346e-05, "loss": 0.5719, "step": 4072 }, { "epoch": 0.33, "grad_norm": 0.8314228385114546, "learning_rate": 1.5613298104688383e-05, "loss": 0.4915, "step": 4073 }, { "epoch": 0.33, "grad_norm": 0.8856555588832423, "learning_rate": 1.5611119296535836e-05, "loss": 0.4953, "step": 4074 }, { "epoch": 0.33, "grad_norm": 0.8825697950973398, "learning_rate": 1.56089400995377e-05, "loss": 0.5186, "step": 4075 }, { "epoch": 0.33, "grad_norm": 1.0123348773261922, "learning_rate": 1.560676051384499e-05, "loss": 0.6382, "step": 4076 }, { "epoch": 0.33, "grad_norm": 0.9949471643486758, "learning_rate": 1.560458053960875e-05, "loss": 0.5926, "step": 4077 }, { "epoch": 0.33, "grad_norm": 0.9208384971441007, "learning_rate": 1.560240017698005e-05, "loss": 0.5815, "step": 4078 }, { "epoch": 0.33, "grad_norm": 1.0898988852025966, "learning_rate": 1.5600219426109986e-05, "loss": 0.6395, "step": 4079 }, { "epoch": 0.33, "grad_norm": 0.9118273390877075, "learning_rate": 1.5598038287149684e-05, "loss": 0.6092, "step": 4080 }, { "epoch": 0.33, "grad_norm": 0.8525027875549489, "learning_rate": 1.5595856760250296e-05, "loss": 0.5162, "step": 4081 }, { "epoch": 0.33, "grad_norm": 0.9278775945269585, "learning_rate": 1.5593674845562994e-05, "loss": 0.5954, "step": 4082 }, { "epoch": 0.33, "grad_norm": 1.330704959252747, "learning_rate": 1.559149254323899e-05, "loss": 0.6494, "step": 4083 }, { "epoch": 0.33, "grad_norm": 0.9244176454658858, "learning_rate": 1.558930985342951e-05, "loss": 0.5549, "step": 4084 }, { "epoch": 0.33, "grad_norm": 0.9455312227753272, "learning_rate": 1.5587126776285818e-05, "loss": 0.6124, "step": 4085 }, { "epoch": 0.33, "grad_norm": 0.9137367270037585, "learning_rate": 1.5584943311959197e-05, "loss": 0.5225, "step": 4086 }, { "epoch": 0.33, "grad_norm": 0.9787600920662352, "learning_rate": 1.5582759460600952e-05, "loss": 0.5767, "step": 4087 }, { "epoch": 0.33, "grad_norm": 0.8621461831215322, "learning_rate": 1.5580575222362435e-05, "loss": 0.5406, "step": 4088 }, { "epoch": 0.33, "grad_norm": 0.8804137554755774, "learning_rate": 1.5578390597395e-05, "loss": 0.5362, "step": 4089 }, { "epoch": 0.33, "grad_norm": 0.8723709928624246, "learning_rate": 1.5576205585850052e-05, "loss": 0.4771, "step": 4090 }, { "epoch": 0.33, "grad_norm": 0.9936210472826803, "learning_rate": 1.5574020187878994e-05, "loss": 0.603, "step": 4091 }, { "epoch": 0.33, "grad_norm": 1.0090338812951476, "learning_rate": 1.557183440363329e-05, "loss": 0.6171, "step": 4092 }, { "epoch": 0.33, "grad_norm": 0.8932855373353723, "learning_rate": 1.5569648233264395e-05, "loss": 0.5652, "step": 4093 }, { "epoch": 0.33, "grad_norm": 0.8737280526971734, "learning_rate": 1.556746167692382e-05, "loss": 0.5322, "step": 4094 }, { "epoch": 0.33, "grad_norm": 0.9708220775517294, "learning_rate": 1.5565274734763094e-05, "loss": 0.5867, "step": 4095 }, { "epoch": 0.33, "grad_norm": 0.9697497442178686, "learning_rate": 1.5563087406933762e-05, "loss": 0.5511, "step": 4096 }, { "epoch": 0.33, "grad_norm": 0.9903770214845385, "learning_rate": 1.5560899693587405e-05, "loss": 0.6059, "step": 4097 }, { "epoch": 0.33, "grad_norm": 0.9203756944219564, "learning_rate": 1.5558711594875634e-05, "loss": 0.4967, "step": 4098 }, { "epoch": 0.33, "grad_norm": 0.942205169285279, "learning_rate": 1.555652311095008e-05, "loss": 0.5684, "step": 4099 }, { "epoch": 0.33, "grad_norm": 0.9600421552335939, "learning_rate": 1.5554334241962403e-05, "loss": 0.5922, "step": 4100 }, { "epoch": 0.33, "grad_norm": 0.9174980936365247, "learning_rate": 1.5552144988064292e-05, "loss": 0.5991, "step": 4101 }, { "epoch": 0.33, "grad_norm": 0.8450449715915833, "learning_rate": 1.5549955349407456e-05, "loss": 0.5217, "step": 4102 }, { "epoch": 0.33, "grad_norm": 0.8690691429197966, "learning_rate": 1.5547765326143634e-05, "loss": 0.5026, "step": 4103 }, { "epoch": 0.33, "grad_norm": 1.0294790378362721, "learning_rate": 1.5545574918424602e-05, "loss": 0.6469, "step": 4104 }, { "epoch": 0.33, "grad_norm": 0.8553897917778106, "learning_rate": 1.5543384126402144e-05, "loss": 0.5178, "step": 4105 }, { "epoch": 0.33, "grad_norm": 0.963476645302054, "learning_rate": 1.554119295022808e-05, "loss": 0.5804, "step": 4106 }, { "epoch": 0.33, "grad_norm": 0.8640671675579383, "learning_rate": 1.5539001390054265e-05, "loss": 0.5261, "step": 4107 }, { "epoch": 0.33, "grad_norm": 0.9682608000185056, "learning_rate": 1.5536809446032562e-05, "loss": 0.6721, "step": 4108 }, { "epoch": 0.33, "grad_norm": 0.8860882499843543, "learning_rate": 1.5534617118314882e-05, "loss": 0.5743, "step": 4109 }, { "epoch": 0.33, "grad_norm": 0.9069502189222868, "learning_rate": 1.553242440705314e-05, "loss": 0.5516, "step": 4110 }, { "epoch": 0.33, "grad_norm": 0.8936217039676828, "learning_rate": 1.5530231312399294e-05, "loss": 0.5435, "step": 4111 }, { "epoch": 0.33, "grad_norm": 0.9706173889045864, "learning_rate": 1.5528037834505322e-05, "loss": 0.5701, "step": 4112 }, { "epoch": 0.33, "grad_norm": 0.9178913160396679, "learning_rate": 1.5525843973523237e-05, "loss": 0.5524, "step": 4113 }, { "epoch": 0.33, "grad_norm": 0.8145499993607632, "learning_rate": 1.552364972960506e-05, "loss": 0.5285, "step": 4114 }, { "epoch": 0.33, "grad_norm": 1.016269762503253, "learning_rate": 1.552145510290286e-05, "loss": 0.5321, "step": 4115 }, { "epoch": 0.33, "grad_norm": 0.908990601168826, "learning_rate": 1.5519260093568717e-05, "loss": 0.5547, "step": 4116 }, { "epoch": 0.33, "grad_norm": 0.9328424886518241, "learning_rate": 1.5517064701754744e-05, "loss": 0.6041, "step": 4117 }, { "epoch": 0.33, "grad_norm": 0.9016063684659464, "learning_rate": 1.5514868927613084e-05, "loss": 0.5758, "step": 4118 }, { "epoch": 0.33, "grad_norm": 0.8420443749720986, "learning_rate": 1.5512672771295898e-05, "loss": 0.5029, "step": 4119 }, { "epoch": 0.33, "grad_norm": 0.9588642379929366, "learning_rate": 1.5510476232955376e-05, "loss": 0.588, "step": 4120 }, { "epoch": 0.33, "grad_norm": 0.9003302000099962, "learning_rate": 1.5508279312743742e-05, "loss": 0.5722, "step": 4121 }, { "epoch": 0.34, "grad_norm": 0.877570962504461, "learning_rate": 1.5506082010813237e-05, "loss": 0.4978, "step": 4122 }, { "epoch": 0.34, "grad_norm": 0.9989799140340428, "learning_rate": 1.550388432731613e-05, "loss": 0.5816, "step": 4123 }, { "epoch": 0.34, "grad_norm": 0.8644779147210184, "learning_rate": 1.550168626240472e-05, "loss": 0.5078, "step": 4124 }, { "epoch": 0.34, "grad_norm": 0.9713792974934381, "learning_rate": 1.549948781623134e-05, "loss": 0.6444, "step": 4125 }, { "epoch": 0.34, "grad_norm": 0.9157006453630713, "learning_rate": 1.5497288988948326e-05, "loss": 0.5919, "step": 4126 }, { "epoch": 0.34, "grad_norm": 0.9740797882912834, "learning_rate": 1.5495089780708062e-05, "loss": 0.6244, "step": 4127 }, { "epoch": 0.34, "grad_norm": 0.9011107822348161, "learning_rate": 1.5492890191662954e-05, "loss": 0.6247, "step": 4128 }, { "epoch": 0.34, "grad_norm": 1.0004936320232625, "learning_rate": 1.5490690221965424e-05, "loss": 0.5899, "step": 4129 }, { "epoch": 0.34, "grad_norm": 0.9769522662893291, "learning_rate": 1.5488489871767928e-05, "loss": 0.5603, "step": 4130 }, { "epoch": 0.34, "grad_norm": 0.9531619489136708, "learning_rate": 1.5486289141222955e-05, "loss": 0.546, "step": 4131 }, { "epoch": 0.34, "grad_norm": 0.8588746366213443, "learning_rate": 1.5484088030483015e-05, "loss": 0.548, "step": 4132 }, { "epoch": 0.34, "grad_norm": 0.9155465841984228, "learning_rate": 1.5481886539700636e-05, "loss": 0.4894, "step": 4133 }, { "epoch": 0.34, "grad_norm": 0.924147240967755, "learning_rate": 1.5479684669028384e-05, "loss": 0.5814, "step": 4134 }, { "epoch": 0.34, "grad_norm": 0.8825588133700512, "learning_rate": 1.5477482418618844e-05, "loss": 0.625, "step": 4135 }, { "epoch": 0.34, "grad_norm": 1.0442236829760536, "learning_rate": 1.547527978862463e-05, "loss": 0.6079, "step": 4136 }, { "epoch": 0.34, "grad_norm": 0.9227304530173703, "learning_rate": 1.5473076779198385e-05, "loss": 0.5721, "step": 4137 }, { "epoch": 0.34, "grad_norm": 1.0268902487716691, "learning_rate": 1.547087339049277e-05, "loss": 0.6532, "step": 4138 }, { "epoch": 0.34, "grad_norm": 0.9282329564893024, "learning_rate": 1.5468669622660487e-05, "loss": 0.5388, "step": 4139 }, { "epoch": 0.34, "grad_norm": 0.9878857633469456, "learning_rate": 1.5466465475854246e-05, "loss": 0.5986, "step": 4140 }, { "epoch": 0.34, "grad_norm": 1.0095349326177143, "learning_rate": 1.54642609502268e-05, "loss": 0.5704, "step": 4141 }, { "epoch": 0.34, "grad_norm": 0.8335990919695064, "learning_rate": 1.5462056045930912e-05, "loss": 0.5412, "step": 4142 }, { "epoch": 0.34, "grad_norm": 0.9715117775274271, "learning_rate": 1.5459850763119386e-05, "loss": 0.5278, "step": 4143 }, { "epoch": 0.34, "grad_norm": 0.9114149090614846, "learning_rate": 1.5457645101945046e-05, "loss": 0.5223, "step": 4144 }, { "epoch": 0.34, "grad_norm": 0.858592720845518, "learning_rate": 1.5455439062560743e-05, "loss": 0.5791, "step": 4145 }, { "epoch": 0.34, "grad_norm": 0.8942255917626747, "learning_rate": 1.5453232645119348e-05, "loss": 0.5552, "step": 4146 }, { "epoch": 0.34, "grad_norm": 0.8826034287798566, "learning_rate": 1.5451025849773773e-05, "loss": 0.5712, "step": 4147 }, { "epoch": 0.34, "grad_norm": 0.9978211602085963, "learning_rate": 1.544881867667694e-05, "loss": 0.587, "step": 4148 }, { "epoch": 0.34, "grad_norm": 0.9534289695842948, "learning_rate": 1.5446611125981804e-05, "loss": 0.5829, "step": 4149 }, { "epoch": 0.34, "grad_norm": 0.9171045438073293, "learning_rate": 1.5444403197841345e-05, "loss": 0.6322, "step": 4150 }, { "epoch": 0.34, "grad_norm": 0.9103267468648931, "learning_rate": 1.5442194892408583e-05, "loss": 0.5276, "step": 4151 }, { "epoch": 0.34, "grad_norm": 0.9318835958199996, "learning_rate": 1.5439986209836532e-05, "loss": 0.5889, "step": 4152 }, { "epoch": 0.34, "grad_norm": 0.9935823989772233, "learning_rate": 1.5437777150278268e-05, "loss": 0.5639, "step": 4153 }, { "epoch": 0.34, "grad_norm": 1.0297272006608487, "learning_rate": 1.543556771388687e-05, "loss": 0.6397, "step": 4154 }, { "epoch": 0.34, "grad_norm": 0.9775512865777397, "learning_rate": 1.543335790081545e-05, "loss": 0.5728, "step": 4155 }, { "epoch": 0.34, "grad_norm": 0.8264711311001698, "learning_rate": 1.5431147711217147e-05, "loss": 0.5647, "step": 4156 }, { "epoch": 0.34, "grad_norm": 0.7969675893425409, "learning_rate": 1.5428937145245126e-05, "loss": 0.5264, "step": 4157 }, { "epoch": 0.34, "grad_norm": 0.9149958151946387, "learning_rate": 1.542672620305257e-05, "loss": 0.5749, "step": 4158 }, { "epoch": 0.34, "grad_norm": 0.8931961491044399, "learning_rate": 1.542451488479271e-05, "loss": 0.5328, "step": 4159 }, { "epoch": 0.34, "grad_norm": 0.8109905646646205, "learning_rate": 1.5422303190618776e-05, "loss": 0.5047, "step": 4160 }, { "epoch": 0.34, "grad_norm": 0.9536185768596526, "learning_rate": 1.5420091120684042e-05, "loss": 0.5677, "step": 4161 }, { "epoch": 0.34, "grad_norm": 0.9340327406436755, "learning_rate": 1.54178786751418e-05, "loss": 0.5517, "step": 4162 }, { "epoch": 0.34, "grad_norm": 0.8801478055544735, "learning_rate": 1.541566585414537e-05, "loss": 0.5112, "step": 4163 }, { "epoch": 0.34, "grad_norm": 0.9203855454083592, "learning_rate": 1.5413452657848104e-05, "loss": 0.5836, "step": 4164 }, { "epoch": 0.34, "grad_norm": 0.9008530027903892, "learning_rate": 1.5411239086403367e-05, "loss": 0.5972, "step": 4165 }, { "epoch": 0.34, "grad_norm": 1.0356456643420995, "learning_rate": 1.540902513996456e-05, "loss": 0.597, "step": 4166 }, { "epoch": 0.34, "grad_norm": 0.8433055793215191, "learning_rate": 1.5406810818685113e-05, "loss": 0.5037, "step": 4167 }, { "epoch": 0.34, "grad_norm": 1.0015286153463043, "learning_rate": 1.5404596122718473e-05, "loss": 0.581, "step": 4168 }, { "epoch": 0.34, "grad_norm": 0.9712135995909356, "learning_rate": 1.540238105221811e-05, "loss": 0.5599, "step": 4169 }, { "epoch": 0.34, "grad_norm": 0.9707839063979106, "learning_rate": 1.5400165607337534e-05, "loss": 0.5304, "step": 4170 }, { "epoch": 0.34, "grad_norm": 0.9938465618035094, "learning_rate": 1.5397949788230275e-05, "loss": 0.575, "step": 4171 }, { "epoch": 0.34, "grad_norm": 1.1246282226906894, "learning_rate": 1.539573359504988e-05, "loss": 0.5929, "step": 4172 }, { "epoch": 0.34, "grad_norm": 0.952586657296879, "learning_rate": 1.539351702794993e-05, "loss": 0.5333, "step": 4173 }, { "epoch": 0.34, "grad_norm": 0.976592921038152, "learning_rate": 1.539130008708404e-05, "loss": 0.6206, "step": 4174 }, { "epoch": 0.34, "grad_norm": 0.8593233700589394, "learning_rate": 1.538908277260583e-05, "loss": 0.5927, "step": 4175 }, { "epoch": 0.34, "grad_norm": 0.9321633186484158, "learning_rate": 1.538686508466897e-05, "loss": 0.6076, "step": 4176 }, { "epoch": 0.34, "grad_norm": 0.9736653930607452, "learning_rate": 1.5384647023427136e-05, "loss": 0.6351, "step": 4177 }, { "epoch": 0.34, "grad_norm": 1.0073840250099277, "learning_rate": 1.538242858903404e-05, "loss": 0.5281, "step": 4178 }, { "epoch": 0.34, "grad_norm": 0.9670824565967566, "learning_rate": 1.538020978164341e-05, "loss": 0.5519, "step": 4179 }, { "epoch": 0.34, "grad_norm": 0.9003903583858006, "learning_rate": 1.5377990601409022e-05, "loss": 0.5621, "step": 4180 }, { "epoch": 0.34, "grad_norm": 1.07375811358448, "learning_rate": 1.5375771048484657e-05, "loss": 0.5801, "step": 4181 }, { "epoch": 0.34, "grad_norm": 0.9829663735472544, "learning_rate": 1.5373551123024123e-05, "loss": 0.6094, "step": 4182 }, { "epoch": 0.34, "grad_norm": 0.8686452093382149, "learning_rate": 1.537133082518126e-05, "loss": 0.5506, "step": 4183 }, { "epoch": 0.34, "grad_norm": 0.9804770241218727, "learning_rate": 1.536911015510994e-05, "loss": 0.5503, "step": 4184 }, { "epoch": 0.34, "grad_norm": 0.8689620504507469, "learning_rate": 1.5366889112964044e-05, "loss": 0.5606, "step": 4185 }, { "epoch": 0.34, "grad_norm": 0.880493881682588, "learning_rate": 1.5364667698897498e-05, "loss": 0.5309, "step": 4186 }, { "epoch": 0.34, "grad_norm": 0.9549420150814182, "learning_rate": 1.5362445913064238e-05, "loss": 0.5554, "step": 4187 }, { "epoch": 0.34, "grad_norm": 0.9255714148261335, "learning_rate": 1.536022375561823e-05, "loss": 0.5805, "step": 4188 }, { "epoch": 0.34, "grad_norm": 0.9160529514074018, "learning_rate": 1.535800122671347e-05, "loss": 0.5463, "step": 4189 }, { "epoch": 0.34, "grad_norm": 0.9220363895496618, "learning_rate": 1.535577832650398e-05, "loss": 0.4973, "step": 4190 }, { "epoch": 0.34, "grad_norm": 0.8529340143381954, "learning_rate": 1.53535550551438e-05, "loss": 0.5442, "step": 4191 }, { "epoch": 0.34, "grad_norm": 0.9997574057268795, "learning_rate": 1.5351331412787004e-05, "loss": 0.6065, "step": 4192 }, { "epoch": 0.34, "grad_norm": 0.9291109892458387, "learning_rate": 1.534910739958769e-05, "loss": 0.5558, "step": 4193 }, { "epoch": 0.34, "grad_norm": 0.9131668497103859, "learning_rate": 1.5346883015699976e-05, "loss": 0.5957, "step": 4194 }, { "epoch": 0.34, "grad_norm": 0.9013737322445415, "learning_rate": 1.5344658261278013e-05, "loss": 0.6395, "step": 4195 }, { "epoch": 0.34, "grad_norm": 0.8451928896140083, "learning_rate": 1.5342433136475972e-05, "loss": 0.5138, "step": 4196 }, { "epoch": 0.34, "grad_norm": 0.9204352581964403, "learning_rate": 1.5340207641448054e-05, "loss": 0.5535, "step": 4197 }, { "epoch": 0.34, "grad_norm": 0.9285526995779052, "learning_rate": 1.5337981776348484e-05, "loss": 0.52, "step": 4198 }, { "epoch": 0.34, "grad_norm": 0.96092597621691, "learning_rate": 1.533575554133151e-05, "loss": 0.5847, "step": 4199 }, { "epoch": 0.34, "grad_norm": 0.8841256267260578, "learning_rate": 1.533352893655141e-05, "loss": 0.565, "step": 4200 }, { "epoch": 0.34, "grad_norm": 0.9820667038604786, "learning_rate": 1.5331301962162485e-05, "loss": 0.6118, "step": 4201 }, { "epoch": 0.34, "grad_norm": 0.90455636810397, "learning_rate": 1.5329074618319063e-05, "loss": 0.5453, "step": 4202 }, { "epoch": 0.34, "grad_norm": 1.0119288125177617, "learning_rate": 1.5326846905175497e-05, "loss": 0.5862, "step": 4203 }, { "epoch": 0.34, "grad_norm": 0.9461879744824403, "learning_rate": 1.5324618822886167e-05, "loss": 0.6043, "step": 4204 }, { "epoch": 0.34, "grad_norm": 0.939134668597495, "learning_rate": 1.5322390371605473e-05, "loss": 0.577, "step": 4205 }, { "epoch": 0.34, "grad_norm": 0.9843968714772094, "learning_rate": 1.532016155148785e-05, "loss": 0.5525, "step": 4206 }, { "epoch": 0.34, "grad_norm": 0.9416378125162804, "learning_rate": 1.531793236268775e-05, "loss": 0.561, "step": 4207 }, { "epoch": 0.34, "grad_norm": 0.9043162067384275, "learning_rate": 1.531570280535965e-05, "loss": 0.4951, "step": 4208 }, { "epoch": 0.34, "grad_norm": 0.953135119133098, "learning_rate": 1.5313472879658066e-05, "loss": 0.5997, "step": 4209 }, { "epoch": 0.34, "grad_norm": 0.9904782885873062, "learning_rate": 1.531124258573752e-05, "loss": 0.5398, "step": 4210 }, { "epoch": 0.34, "grad_norm": 0.9141887774274177, "learning_rate": 1.530901192375258e-05, "loss": 0.5488, "step": 4211 }, { "epoch": 0.34, "grad_norm": 0.9307386669857576, "learning_rate": 1.530678089385782e-05, "loss": 0.5841, "step": 4212 }, { "epoch": 0.34, "grad_norm": 0.9567495658547867, "learning_rate": 1.5304549496207848e-05, "loss": 0.5479, "step": 4213 }, { "epoch": 0.34, "grad_norm": 0.9676728012807723, "learning_rate": 1.5302317730957305e-05, "loss": 0.5915, "step": 4214 }, { "epoch": 0.34, "grad_norm": 0.9185599117650312, "learning_rate": 1.5300085598260843e-05, "loss": 0.5809, "step": 4215 }, { "epoch": 0.34, "grad_norm": 0.8203698746978695, "learning_rate": 1.5297853098273148e-05, "loss": 0.5176, "step": 4216 }, { "epoch": 0.34, "grad_norm": 0.9929516274827571, "learning_rate": 1.529562023114894e-05, "loss": 0.5683, "step": 4217 }, { "epoch": 0.34, "grad_norm": 0.9217222912479962, "learning_rate": 1.5293386997042943e-05, "loss": 0.5301, "step": 4218 }, { "epoch": 0.34, "grad_norm": 0.8681487591067317, "learning_rate": 1.5291153396109925e-05, "loss": 0.5411, "step": 4219 }, { "epoch": 0.34, "grad_norm": 0.9882312511442278, "learning_rate": 1.5288919428504668e-05, "loss": 0.6358, "step": 4220 }, { "epoch": 0.34, "grad_norm": 0.9066244268708632, "learning_rate": 1.5286685094381984e-05, "loss": 0.5119, "step": 4221 }, { "epoch": 0.34, "grad_norm": 0.9038436185292014, "learning_rate": 1.5284450393896713e-05, "loss": 0.5951, "step": 4222 }, { "epoch": 0.34, "grad_norm": 0.9642323326452018, "learning_rate": 1.528221532720372e-05, "loss": 0.546, "step": 4223 }, { "epoch": 0.34, "grad_norm": 0.9956160521476128, "learning_rate": 1.5279979894457887e-05, "loss": 0.6047, "step": 4224 }, { "epoch": 0.34, "grad_norm": 0.8354483533906525, "learning_rate": 1.5277744095814132e-05, "loss": 0.5119, "step": 4225 }, { "epoch": 0.34, "grad_norm": 0.9604418200300491, "learning_rate": 1.5275507931427392e-05, "loss": 0.5881, "step": 4226 }, { "epoch": 0.34, "grad_norm": 0.9005288726215351, "learning_rate": 1.5273271401452633e-05, "loss": 0.5507, "step": 4227 }, { "epoch": 0.34, "grad_norm": 0.9390955908421621, "learning_rate": 1.5271034506044838e-05, "loss": 0.5362, "step": 4228 }, { "epoch": 0.34, "grad_norm": 1.004379660532751, "learning_rate": 1.5268797245359035e-05, "loss": 0.5816, "step": 4229 }, { "epoch": 0.34, "grad_norm": 0.988809361388353, "learning_rate": 1.5266559619550254e-05, "loss": 0.5677, "step": 4230 }, { "epoch": 0.34, "grad_norm": 0.8965461566573829, "learning_rate": 1.526432162877356e-05, "loss": 0.5972, "step": 4231 }, { "epoch": 0.34, "grad_norm": 0.8899439036923005, "learning_rate": 1.526208327318405e-05, "loss": 0.5549, "step": 4232 }, { "epoch": 0.34, "grad_norm": 0.8831555105843113, "learning_rate": 1.5259844552936833e-05, "loss": 0.5199, "step": 4233 }, { "epoch": 0.34, "grad_norm": 0.9053002337578933, "learning_rate": 1.5257605468187056e-05, "loss": 0.5575, "step": 4234 }, { "epoch": 0.34, "grad_norm": 1.076438001507276, "learning_rate": 1.5255366019089883e-05, "loss": 0.4907, "step": 4235 }, { "epoch": 0.34, "grad_norm": 0.9969810516684846, "learning_rate": 1.5253126205800506e-05, "loss": 0.6066, "step": 4236 }, { "epoch": 0.34, "grad_norm": 0.9872274573817507, "learning_rate": 1.525088602847414e-05, "loss": 0.5654, "step": 4237 }, { "epoch": 0.34, "grad_norm": 0.8572405337777793, "learning_rate": 1.5248645487266036e-05, "loss": 0.5443, "step": 4238 }, { "epoch": 0.34, "grad_norm": 0.8459905204744106, "learning_rate": 1.5246404582331451e-05, "loss": 0.5223, "step": 4239 }, { "epoch": 0.34, "grad_norm": 0.8089099757270066, "learning_rate": 1.5244163313825684e-05, "loss": 0.5629, "step": 4240 }, { "epoch": 0.34, "grad_norm": 0.8868275652220692, "learning_rate": 1.524192168190405e-05, "loss": 0.4984, "step": 4241 }, { "epoch": 0.34, "grad_norm": 0.9299705286995178, "learning_rate": 1.5239679686721892e-05, "loss": 0.5544, "step": 4242 }, { "epoch": 0.34, "grad_norm": 1.032961198809958, "learning_rate": 1.5237437328434581e-05, "loss": 0.6112, "step": 4243 }, { "epoch": 0.34, "grad_norm": 0.8999846365877913, "learning_rate": 1.5235194607197508e-05, "loss": 0.545, "step": 4244 }, { "epoch": 0.35, "grad_norm": 0.9422703636135591, "learning_rate": 1.5232951523166092e-05, "loss": 0.5203, "step": 4245 }, { "epoch": 0.35, "grad_norm": 0.8820713677107922, "learning_rate": 1.5230708076495777e-05, "loss": 0.5232, "step": 4246 }, { "epoch": 0.35, "grad_norm": 0.9241143950002292, "learning_rate": 1.5228464267342036e-05, "loss": 0.5773, "step": 4247 }, { "epoch": 0.35, "grad_norm": 0.9115901855811518, "learning_rate": 1.5226220095860353e-05, "loss": 0.5443, "step": 4248 }, { "epoch": 0.35, "grad_norm": 0.9692553383520067, "learning_rate": 1.5223975562206255e-05, "loss": 0.5583, "step": 4249 }, { "epoch": 0.35, "grad_norm": 1.0601455956061403, "learning_rate": 1.5221730666535285e-05, "loss": 0.6298, "step": 4250 }, { "epoch": 0.35, "grad_norm": 0.7742548362033347, "learning_rate": 1.5219485409003013e-05, "loss": 0.5086, "step": 4251 }, { "epoch": 0.35, "grad_norm": 1.0254340546298504, "learning_rate": 1.5217239789765028e-05, "loss": 0.5832, "step": 4252 }, { "epoch": 0.35, "grad_norm": 0.8661718359978385, "learning_rate": 1.5214993808976956e-05, "loss": 0.5454, "step": 4253 }, { "epoch": 0.35, "grad_norm": 0.9429372489488378, "learning_rate": 1.5212747466794437e-05, "loss": 0.5834, "step": 4254 }, { "epoch": 0.35, "grad_norm": 0.9280200691809544, "learning_rate": 1.5210500763373142e-05, "loss": 0.5302, "step": 4255 }, { "epoch": 0.35, "grad_norm": 0.9484388180375084, "learning_rate": 1.5208253698868766e-05, "loss": 0.6313, "step": 4256 }, { "epoch": 0.35, "grad_norm": 0.9170638988041517, "learning_rate": 1.5206006273437031e-05, "loss": 0.6159, "step": 4257 }, { "epoch": 0.35, "grad_norm": 1.0063274313357442, "learning_rate": 1.5203758487233677e-05, "loss": 0.5816, "step": 4258 }, { "epoch": 0.35, "grad_norm": 0.8113862998367966, "learning_rate": 1.5201510340414473e-05, "loss": 0.5125, "step": 4259 }, { "epoch": 0.35, "grad_norm": 0.958709181326403, "learning_rate": 1.5199261833135222e-05, "loss": 0.5564, "step": 4260 }, { "epoch": 0.35, "grad_norm": 0.8211454039458304, "learning_rate": 1.5197012965551733e-05, "loss": 0.5896, "step": 4261 }, { "epoch": 0.35, "grad_norm": 0.8304508772583704, "learning_rate": 1.5194763737819856e-05, "loss": 0.5553, "step": 4262 }, { "epoch": 0.35, "grad_norm": 0.8899457437890497, "learning_rate": 1.519251415009546e-05, "loss": 0.5524, "step": 4263 }, { "epoch": 0.35, "grad_norm": 0.9641008384200902, "learning_rate": 1.5190264202534442e-05, "loss": 0.5872, "step": 4264 }, { "epoch": 0.35, "grad_norm": 1.0481063603542773, "learning_rate": 1.5188013895292715e-05, "loss": 0.6124, "step": 4265 }, { "epoch": 0.35, "grad_norm": 1.0437724166588191, "learning_rate": 1.5185763228526226e-05, "loss": 0.6339, "step": 4266 }, { "epoch": 0.35, "grad_norm": 0.8858211683890729, "learning_rate": 1.5183512202390951e-05, "loss": 0.5898, "step": 4267 }, { "epoch": 0.35, "grad_norm": 0.8905028036724305, "learning_rate": 1.518126081704287e-05, "loss": 0.5172, "step": 4268 }, { "epoch": 0.35, "grad_norm": 0.8816742044266888, "learning_rate": 1.5179009072638016e-05, "loss": 0.5368, "step": 4269 }, { "epoch": 0.35, "grad_norm": 0.8944448914022917, "learning_rate": 1.5176756969332428e-05, "loss": 0.554, "step": 4270 }, { "epoch": 0.35, "grad_norm": 0.8512773616826556, "learning_rate": 1.5174504507282168e-05, "loss": 0.485, "step": 4271 }, { "epoch": 0.35, "grad_norm": 0.9369207232240409, "learning_rate": 1.517225168664334e-05, "loss": 0.5814, "step": 4272 }, { "epoch": 0.35, "grad_norm": 0.8926504243414912, "learning_rate": 1.5169998507572057e-05, "loss": 0.5155, "step": 4273 }, { "epoch": 0.35, "grad_norm": 0.9501844007981542, "learning_rate": 1.5167744970224463e-05, "loss": 0.5562, "step": 4274 }, { "epoch": 0.35, "grad_norm": 0.8999480245495015, "learning_rate": 1.5165491074756723e-05, "loss": 0.5411, "step": 4275 }, { "epoch": 0.35, "grad_norm": 0.8527653264746171, "learning_rate": 1.5163236821325037e-05, "loss": 0.5883, "step": 4276 }, { "epoch": 0.35, "grad_norm": 0.8691186223661405, "learning_rate": 1.5160982210085621e-05, "loss": 0.5269, "step": 4277 }, { "epoch": 0.35, "grad_norm": 0.933540473664518, "learning_rate": 1.515872724119471e-05, "loss": 0.5769, "step": 4278 }, { "epoch": 0.35, "grad_norm": 0.9157217785033557, "learning_rate": 1.5156471914808582e-05, "loss": 0.542, "step": 4279 }, { "epoch": 0.35, "grad_norm": 0.9699683221638261, "learning_rate": 1.5154216231083522e-05, "loss": 0.5454, "step": 4280 }, { "epoch": 0.35, "grad_norm": 0.8620056534267265, "learning_rate": 1.515196019017585e-05, "loss": 0.4944, "step": 4281 }, { "epoch": 0.35, "grad_norm": 0.8463023180790658, "learning_rate": 1.5149703792241903e-05, "loss": 0.509, "step": 4282 }, { "epoch": 0.35, "grad_norm": 0.7887007205212211, "learning_rate": 1.5147447037438055e-05, "loss": 0.4653, "step": 4283 }, { "epoch": 0.35, "grad_norm": 0.896276367645115, "learning_rate": 1.5145189925920694e-05, "loss": 0.5098, "step": 4284 }, { "epoch": 0.35, "grad_norm": 0.9270094024131633, "learning_rate": 1.514293245784623e-05, "loss": 0.5909, "step": 4285 }, { "epoch": 0.35, "grad_norm": 0.9105702606018491, "learning_rate": 1.514067463337111e-05, "loss": 0.5257, "step": 4286 }, { "epoch": 0.35, "grad_norm": 0.9046292918014648, "learning_rate": 1.5138416452651803e-05, "loss": 0.5459, "step": 4287 }, { "epoch": 0.35, "grad_norm": 0.8185620723801201, "learning_rate": 1.5136157915844787e-05, "loss": 0.4844, "step": 4288 }, { "epoch": 0.35, "grad_norm": 0.9212215102695174, "learning_rate": 1.5133899023106584e-05, "loss": 0.551, "step": 4289 }, { "epoch": 0.35, "grad_norm": 0.8689911637720217, "learning_rate": 1.5131639774593737e-05, "loss": 0.535, "step": 4290 }, { "epoch": 0.35, "grad_norm": 0.9015688037502202, "learning_rate": 1.5129380170462802e-05, "loss": 0.5815, "step": 4291 }, { "epoch": 0.35, "grad_norm": 0.8854452628551268, "learning_rate": 1.512712021087037e-05, "loss": 0.5552, "step": 4292 }, { "epoch": 0.35, "grad_norm": 0.8375236940962522, "learning_rate": 1.5124859895973058e-05, "loss": 0.5811, "step": 4293 }, { "epoch": 0.35, "grad_norm": 1.0396346487059804, "learning_rate": 1.51225992259275e-05, "loss": 0.548, "step": 4294 }, { "epoch": 0.35, "grad_norm": 0.9574274447907284, "learning_rate": 1.5120338200890356e-05, "loss": 0.5502, "step": 4295 }, { "epoch": 0.35, "grad_norm": 0.86965100414926, "learning_rate": 1.5118076821018322e-05, "loss": 0.5442, "step": 4296 }, { "epoch": 0.35, "grad_norm": 0.8883625419684474, "learning_rate": 1.5115815086468103e-05, "loss": 0.5568, "step": 4297 }, { "epoch": 0.35, "grad_norm": 0.8950422831418536, "learning_rate": 1.511355299739643e-05, "loss": 0.5023, "step": 4298 }, { "epoch": 0.35, "grad_norm": 1.0147768322049326, "learning_rate": 1.511129055396008e-05, "loss": 0.5961, "step": 4299 }, { "epoch": 0.35, "grad_norm": 0.905224107471028, "learning_rate": 1.510902775631582e-05, "loss": 0.542, "step": 4300 }, { "epoch": 0.35, "grad_norm": 0.9008359298426322, "learning_rate": 1.510676460462047e-05, "loss": 0.5406, "step": 4301 }, { "epoch": 0.35, "grad_norm": 1.0409019164334707, "learning_rate": 1.5104501099030864e-05, "loss": 0.6519, "step": 4302 }, { "epoch": 0.35, "grad_norm": 0.92608839006507, "learning_rate": 1.5102237239703858e-05, "loss": 0.4852, "step": 4303 }, { "epoch": 0.35, "grad_norm": 0.8558223846325838, "learning_rate": 1.509997302679634e-05, "loss": 0.545, "step": 4304 }, { "epoch": 0.35, "grad_norm": 0.9071792434994643, "learning_rate": 1.5097708460465214e-05, "loss": 0.5647, "step": 4305 }, { "epoch": 0.35, "grad_norm": 0.8243596727202004, "learning_rate": 1.5095443540867412e-05, "loss": 0.4917, "step": 4306 }, { "epoch": 0.35, "grad_norm": 0.8528546266985911, "learning_rate": 1.5093178268159892e-05, "loss": 0.5034, "step": 4307 }, { "epoch": 0.35, "grad_norm": 0.8606279197401389, "learning_rate": 1.5090912642499635e-05, "loss": 0.5733, "step": 4308 }, { "epoch": 0.35, "grad_norm": 0.9107650353024779, "learning_rate": 1.5088646664043652e-05, "loss": 0.5562, "step": 4309 }, { "epoch": 0.35, "grad_norm": 0.8475845684717342, "learning_rate": 1.5086380332948962e-05, "loss": 0.5049, "step": 4310 }, { "epoch": 0.35, "grad_norm": 0.900875716155614, "learning_rate": 1.5084113649372634e-05, "loss": 0.5144, "step": 4311 }, { "epoch": 0.35, "grad_norm": 0.9098977051510746, "learning_rate": 1.5081846613471736e-05, "loss": 0.5301, "step": 4312 }, { "epoch": 0.35, "grad_norm": 0.9173494547586318, "learning_rate": 1.5079579225403373e-05, "loss": 0.5734, "step": 4313 }, { "epoch": 0.35, "grad_norm": 0.8997686442804093, "learning_rate": 1.507731148532468e-05, "loss": 0.6031, "step": 4314 }, { "epoch": 0.35, "grad_norm": 1.0035543282799402, "learning_rate": 1.5075043393392799e-05, "loss": 0.594, "step": 4315 }, { "epoch": 0.35, "grad_norm": 0.9416183936432486, "learning_rate": 1.5072774949764916e-05, "loss": 0.563, "step": 4316 }, { "epoch": 0.35, "grad_norm": 1.111424103703773, "learning_rate": 1.5070506154598228e-05, "loss": 0.6619, "step": 4317 }, { "epoch": 0.35, "grad_norm": 0.9708532135401428, "learning_rate": 1.5068237008049963e-05, "loss": 0.5821, "step": 4318 }, { "epoch": 0.35, "grad_norm": 0.914665472381504, "learning_rate": 1.5065967510277366e-05, "loss": 0.5453, "step": 4319 }, { "epoch": 0.35, "grad_norm": 0.8798308983590678, "learning_rate": 1.5063697661437713e-05, "loss": 0.5403, "step": 4320 }, { "epoch": 0.35, "grad_norm": 0.8131661194438965, "learning_rate": 1.5061427461688306e-05, "loss": 0.4882, "step": 4321 }, { "epoch": 0.35, "grad_norm": 0.9820694718040851, "learning_rate": 1.5059156911186465e-05, "loss": 0.5487, "step": 4322 }, { "epoch": 0.35, "grad_norm": 1.0046413759018036, "learning_rate": 1.5056886010089536e-05, "loss": 0.6133, "step": 4323 }, { "epoch": 0.35, "grad_norm": 0.8878118116815175, "learning_rate": 1.5054614758554896e-05, "loss": 0.653, "step": 4324 }, { "epoch": 0.35, "grad_norm": 1.2206966797071999, "learning_rate": 1.5052343156739933e-05, "loss": 0.6036, "step": 4325 }, { "epoch": 0.35, "grad_norm": 0.9180573056939928, "learning_rate": 1.5050071204802073e-05, "loss": 0.5868, "step": 4326 }, { "epoch": 0.35, "grad_norm": 1.0217753319934397, "learning_rate": 1.5047798902898756e-05, "loss": 0.622, "step": 4327 }, { "epoch": 0.35, "grad_norm": 0.8974867336494516, "learning_rate": 1.5045526251187452e-05, "loss": 0.5798, "step": 4328 }, { "epoch": 0.35, "grad_norm": 0.9152499171753111, "learning_rate": 1.5043253249825656e-05, "loss": 0.6113, "step": 4329 }, { "epoch": 0.35, "grad_norm": 0.9093691066885794, "learning_rate": 1.5040979898970883e-05, "loss": 0.588, "step": 4330 }, { "epoch": 0.35, "grad_norm": 1.1133396282471304, "learning_rate": 1.5038706198780673e-05, "loss": 0.5973, "step": 4331 }, { "epoch": 0.35, "grad_norm": 0.9198753057184388, "learning_rate": 1.5036432149412592e-05, "loss": 0.5804, "step": 4332 }, { "epoch": 0.35, "grad_norm": 1.0969343843678707, "learning_rate": 1.5034157751024232e-05, "loss": 0.6763, "step": 4333 }, { "epoch": 0.35, "grad_norm": 0.9186493768039867, "learning_rate": 1.5031883003773206e-05, "loss": 0.5646, "step": 4334 }, { "epoch": 0.35, "grad_norm": 0.9582047355056563, "learning_rate": 1.502960790781715e-05, "loss": 0.5473, "step": 4335 }, { "epoch": 0.35, "grad_norm": 0.8762162500326096, "learning_rate": 1.5027332463313729e-05, "loss": 0.622, "step": 4336 }, { "epoch": 0.35, "grad_norm": 0.8232637771036523, "learning_rate": 1.5025056670420624e-05, "loss": 0.5395, "step": 4337 }, { "epoch": 0.35, "grad_norm": 0.90982048874615, "learning_rate": 1.502278052929555e-05, "loss": 0.5762, "step": 4338 }, { "epoch": 0.35, "grad_norm": 0.9655525218679801, "learning_rate": 1.5020504040096241e-05, "loss": 0.5324, "step": 4339 }, { "epoch": 0.35, "grad_norm": 0.7943921727751669, "learning_rate": 1.5018227202980455e-05, "loss": 0.4902, "step": 4340 }, { "epoch": 0.35, "grad_norm": 1.0548661431756876, "learning_rate": 1.5015950018105976e-05, "loss": 0.4857, "step": 4341 }, { "epoch": 0.35, "grad_norm": 0.9307571470045382, "learning_rate": 1.5013672485630611e-05, "loss": 0.6182, "step": 4342 }, { "epoch": 0.35, "grad_norm": 0.929896560472335, "learning_rate": 1.5011394605712188e-05, "loss": 0.4729, "step": 4343 }, { "epoch": 0.35, "grad_norm": 0.8867515976963221, "learning_rate": 1.5009116378508564e-05, "loss": 0.5139, "step": 4344 }, { "epoch": 0.35, "grad_norm": 1.02226268665367, "learning_rate": 1.5006837804177618e-05, "loss": 0.556, "step": 4345 }, { "epoch": 0.35, "grad_norm": 0.9191762225421793, "learning_rate": 1.5004558882877254e-05, "loss": 0.6007, "step": 4346 }, { "epoch": 0.35, "grad_norm": 0.9514399880147593, "learning_rate": 1.5002279614765396e-05, "loss": 0.5309, "step": 4347 }, { "epoch": 0.35, "grad_norm": 0.9494607283066798, "learning_rate": 1.5000000000000002e-05, "loss": 0.4908, "step": 4348 }, { "epoch": 0.35, "grad_norm": 1.1928182550584672, "learning_rate": 1.499772003873904e-05, "loss": 0.5622, "step": 4349 }, { "epoch": 0.35, "grad_norm": 0.9683394095413308, "learning_rate": 1.4995439731140512e-05, "loss": 0.548, "step": 4350 }, { "epoch": 0.35, "grad_norm": 1.0536520054444802, "learning_rate": 1.4993159077362445e-05, "loss": 0.6328, "step": 4351 }, { "epoch": 0.35, "grad_norm": 0.9355912661637906, "learning_rate": 1.499087807756288e-05, "loss": 0.5965, "step": 4352 }, { "epoch": 0.35, "grad_norm": 0.9159385022666104, "learning_rate": 1.4988596731899889e-05, "loss": 0.6053, "step": 4353 }, { "epoch": 0.35, "grad_norm": 0.9298009190003068, "learning_rate": 1.4986315040531574e-05, "loss": 0.5606, "step": 4354 }, { "epoch": 0.35, "grad_norm": 0.9000952798543325, "learning_rate": 1.4984033003616047e-05, "loss": 0.603, "step": 4355 }, { "epoch": 0.35, "grad_norm": 0.98380076433322, "learning_rate": 1.4981750621311453e-05, "loss": 0.5949, "step": 4356 }, { "epoch": 0.35, "grad_norm": 0.9435040964843473, "learning_rate": 1.4979467893775963e-05, "loss": 0.534, "step": 4357 }, { "epoch": 0.35, "grad_norm": 0.9903874881684613, "learning_rate": 1.4977184821167764e-05, "loss": 0.545, "step": 4358 }, { "epoch": 0.35, "grad_norm": 0.9306842224405125, "learning_rate": 1.4974901403645068e-05, "loss": 0.4705, "step": 4359 }, { "epoch": 0.35, "grad_norm": 0.879005834633366, "learning_rate": 1.497261764136612e-05, "loss": 0.5459, "step": 4360 }, { "epoch": 0.35, "grad_norm": 0.8701678877283088, "learning_rate": 1.4970333534489179e-05, "loss": 0.6014, "step": 4361 }, { "epoch": 0.35, "grad_norm": 0.7909788159248554, "learning_rate": 1.4968049083172534e-05, "loss": 0.5239, "step": 4362 }, { "epoch": 0.35, "grad_norm": 0.8666354990733092, "learning_rate": 1.4965764287574494e-05, "loss": 0.5672, "step": 4363 }, { "epoch": 0.35, "grad_norm": 0.9548946836991843, "learning_rate": 1.4963479147853393e-05, "loss": 0.5818, "step": 4364 }, { "epoch": 0.35, "grad_norm": 1.0469311919480322, "learning_rate": 1.496119366416759e-05, "loss": 0.6307, "step": 4365 }, { "epoch": 0.35, "grad_norm": 0.9131536614559036, "learning_rate": 1.4958907836675467e-05, "loss": 0.5407, "step": 4366 }, { "epoch": 0.35, "grad_norm": 0.9119903308980676, "learning_rate": 1.495662166553543e-05, "loss": 0.5048, "step": 4367 }, { "epoch": 0.36, "grad_norm": 1.139918394904237, "learning_rate": 1.4954335150905905e-05, "loss": 0.5712, "step": 4368 }, { "epoch": 0.36, "grad_norm": 0.982964987239106, "learning_rate": 1.4952048292945352e-05, "loss": 0.5777, "step": 4369 }, { "epoch": 0.36, "grad_norm": 0.9613085225608329, "learning_rate": 1.4949761091812243e-05, "loss": 0.5306, "step": 4370 }, { "epoch": 0.36, "grad_norm": 1.0194065477967498, "learning_rate": 1.4947473547665081e-05, "loss": 0.6194, "step": 4371 }, { "epoch": 0.36, "grad_norm": 0.8825081634776647, "learning_rate": 1.4945185660662391e-05, "loss": 0.5097, "step": 4372 }, { "epoch": 0.36, "grad_norm": 0.9650839534634333, "learning_rate": 1.4942897430962722e-05, "loss": 0.4978, "step": 4373 }, { "epoch": 0.36, "grad_norm": 1.0318679026761992, "learning_rate": 1.494060885872464e-05, "loss": 0.5238, "step": 4374 }, { "epoch": 0.36, "grad_norm": 0.8864586334831703, "learning_rate": 1.4938319944106754e-05, "loss": 0.5226, "step": 4375 }, { "epoch": 0.36, "grad_norm": 1.0151435374926832, "learning_rate": 1.4936030687267672e-05, "loss": 0.5641, "step": 4376 }, { "epoch": 0.36, "grad_norm": 0.9680320885205316, "learning_rate": 1.4933741088366043e-05, "loss": 0.6272, "step": 4377 }, { "epoch": 0.36, "grad_norm": 1.0063656243311683, "learning_rate": 1.4931451147560534e-05, "loss": 0.5885, "step": 4378 }, { "epoch": 0.36, "grad_norm": 0.8969813806528744, "learning_rate": 1.492916086500983e-05, "loss": 0.5366, "step": 4379 }, { "epoch": 0.36, "grad_norm": 0.89570113690597, "learning_rate": 1.4926870240872652e-05, "loss": 0.5287, "step": 4380 }, { "epoch": 0.36, "grad_norm": 1.6806229089415208, "learning_rate": 1.4924579275307738e-05, "loss": 0.5136, "step": 4381 }, { "epoch": 0.36, "grad_norm": 0.8772278715634582, "learning_rate": 1.492228796847385e-05, "loss": 0.5823, "step": 4382 }, { "epoch": 0.36, "grad_norm": 1.0148013614453049, "learning_rate": 1.4919996320529768e-05, "loss": 0.6213, "step": 4383 }, { "epoch": 0.36, "grad_norm": 0.9820137070849825, "learning_rate": 1.4917704331634306e-05, "loss": 0.5012, "step": 4384 }, { "epoch": 0.36, "grad_norm": 0.8983543389621446, "learning_rate": 1.49154120019463e-05, "loss": 0.5577, "step": 4385 }, { "epoch": 0.36, "grad_norm": 0.839493694424055, "learning_rate": 1.4913119331624597e-05, "loss": 0.5441, "step": 4386 }, { "epoch": 0.36, "grad_norm": 0.9193981006433436, "learning_rate": 1.4910826320828085e-05, "loss": 0.5411, "step": 4387 }, { "epoch": 0.36, "grad_norm": 1.0090728948785002, "learning_rate": 1.4908532969715663e-05, "loss": 0.5255, "step": 4388 }, { "epoch": 0.36, "grad_norm": 0.9836631247147275, "learning_rate": 1.490623927844626e-05, "loss": 0.5497, "step": 4389 }, { "epoch": 0.36, "grad_norm": 0.8860894415981879, "learning_rate": 1.4903945247178828e-05, "loss": 0.5318, "step": 4390 }, { "epoch": 0.36, "grad_norm": 0.8670465037749358, "learning_rate": 1.4901650876072342e-05, "loss": 0.5369, "step": 4391 }, { "epoch": 0.36, "grad_norm": 0.9953631170549834, "learning_rate": 1.4899356165285794e-05, "loss": 0.5545, "step": 4392 }, { "epoch": 0.36, "grad_norm": 0.9063420930863816, "learning_rate": 1.489706111497821e-05, "loss": 0.5357, "step": 4393 }, { "epoch": 0.36, "grad_norm": 0.881755479709398, "learning_rate": 1.489476572530864e-05, "loss": 0.5343, "step": 4394 }, { "epoch": 0.36, "grad_norm": 0.8193129452548926, "learning_rate": 1.489246999643614e-05, "loss": 0.4722, "step": 4395 }, { "epoch": 0.36, "grad_norm": 0.8783440703065661, "learning_rate": 1.489017392851981e-05, "loss": 0.5433, "step": 4396 }, { "epoch": 0.36, "grad_norm": 0.8547465151818406, "learning_rate": 1.488787752171877e-05, "loss": 0.5481, "step": 4397 }, { "epoch": 0.36, "grad_norm": 0.9133780786690582, "learning_rate": 1.4885580776192149e-05, "loss": 0.5353, "step": 4398 }, { "epoch": 0.36, "grad_norm": 0.8782420674843047, "learning_rate": 1.4883283692099114e-05, "loss": 0.5447, "step": 4399 }, { "epoch": 0.36, "grad_norm": 0.9650825987663942, "learning_rate": 1.488098626959885e-05, "loss": 0.6096, "step": 4400 }, { "epoch": 0.36, "grad_norm": 0.9457461408325257, "learning_rate": 1.4878688508850567e-05, "loss": 0.5685, "step": 4401 }, { "epoch": 0.36, "grad_norm": 0.8396109468560583, "learning_rate": 1.4876390410013498e-05, "loss": 0.5542, "step": 4402 }, { "epoch": 0.36, "grad_norm": 1.10276618772219, "learning_rate": 1.48740919732469e-05, "loss": 0.6523, "step": 4403 }, { "epoch": 0.36, "grad_norm": 0.8469604534554713, "learning_rate": 1.487179319871005e-05, "loss": 0.5672, "step": 4404 }, { "epoch": 0.36, "grad_norm": 0.9119790850830539, "learning_rate": 1.4869494086562253e-05, "loss": 0.6194, "step": 4405 }, { "epoch": 0.36, "grad_norm": 0.887038406000676, "learning_rate": 1.4867194636962836e-05, "loss": 0.5182, "step": 4406 }, { "epoch": 0.36, "grad_norm": 1.0079039365291105, "learning_rate": 1.4864894850071147e-05, "loss": 0.5612, "step": 4407 }, { "epoch": 0.36, "grad_norm": 0.8390473208179717, "learning_rate": 1.486259472604656e-05, "loss": 0.5347, "step": 4408 }, { "epoch": 0.36, "grad_norm": 0.9686207645890165, "learning_rate": 1.4860294265048474e-05, "loss": 0.5913, "step": 4409 }, { "epoch": 0.36, "grad_norm": 1.0015965610679087, "learning_rate": 1.4857993467236303e-05, "loss": 0.6163, "step": 4410 }, { "epoch": 0.36, "grad_norm": 0.9101404073900543, "learning_rate": 1.4855692332769494e-05, "loss": 0.537, "step": 4411 }, { "epoch": 0.36, "grad_norm": 0.9229731168365843, "learning_rate": 1.4853390861807518e-05, "loss": 0.5235, "step": 4412 }, { "epoch": 0.36, "grad_norm": 0.8095758622279768, "learning_rate": 1.4851089054509852e-05, "loss": 0.5163, "step": 4413 }, { "epoch": 0.36, "grad_norm": 0.9142132578654567, "learning_rate": 1.4848786911036023e-05, "loss": 0.5656, "step": 4414 }, { "epoch": 0.36, "grad_norm": 1.0076384074827356, "learning_rate": 1.4846484431545562e-05, "loss": 0.6286, "step": 4415 }, { "epoch": 0.36, "grad_norm": 1.0666728554711093, "learning_rate": 1.4844181616198028e-05, "loss": 0.598, "step": 4416 }, { "epoch": 0.36, "grad_norm": 0.9561431209856557, "learning_rate": 1.4841878465153006e-05, "loss": 0.6087, "step": 4417 }, { "epoch": 0.36, "grad_norm": 0.8399907289741158, "learning_rate": 1.4839574978570098e-05, "loss": 0.5377, "step": 4418 }, { "epoch": 0.36, "grad_norm": 0.9088906220770147, "learning_rate": 1.4837271156608938e-05, "loss": 0.5189, "step": 4419 }, { "epoch": 0.36, "grad_norm": 0.907192125612477, "learning_rate": 1.4834966999429179e-05, "loss": 0.5094, "step": 4420 }, { "epoch": 0.36, "grad_norm": 0.9549375328800763, "learning_rate": 1.4832662507190493e-05, "loss": 0.5861, "step": 4421 }, { "epoch": 0.36, "grad_norm": 0.8195915273699955, "learning_rate": 1.4830357680052586e-05, "loss": 0.448, "step": 4422 }, { "epoch": 0.36, "grad_norm": 0.8457199060127676, "learning_rate": 1.4828052518175172e-05, "loss": 0.5053, "step": 4423 }, { "epoch": 0.36, "grad_norm": 0.9655078403174225, "learning_rate": 1.4825747021718002e-05, "loss": 0.5754, "step": 4424 }, { "epoch": 0.36, "grad_norm": 0.8993455107136055, "learning_rate": 1.4823441190840844e-05, "loss": 0.5951, "step": 4425 }, { "epoch": 0.36, "grad_norm": 0.9695646262062783, "learning_rate": 1.4821135025703491e-05, "loss": 0.5539, "step": 4426 }, { "epoch": 0.36, "grad_norm": 0.9340908557152009, "learning_rate": 1.4818828526465755e-05, "loss": 0.5463, "step": 4427 }, { "epoch": 0.36, "grad_norm": 0.9484184703205448, "learning_rate": 1.4816521693287477e-05, "loss": 0.5511, "step": 4428 }, { "epoch": 0.36, "grad_norm": 0.8090461904484498, "learning_rate": 1.481421452632852e-05, "loss": 0.5391, "step": 4429 }, { "epoch": 0.36, "grad_norm": 0.9068706093760909, "learning_rate": 1.4811907025748764e-05, "loss": 0.531, "step": 4430 }, { "epoch": 0.36, "grad_norm": 0.8404838158009921, "learning_rate": 1.480959919170812e-05, "loss": 0.5492, "step": 4431 }, { "epoch": 0.36, "grad_norm": 0.8289162524796344, "learning_rate": 1.4807291024366519e-05, "loss": 0.5513, "step": 4432 }, { "epoch": 0.36, "grad_norm": 0.8394841803055377, "learning_rate": 1.4804982523883915e-05, "loss": 0.5078, "step": 4433 }, { "epoch": 0.36, "grad_norm": 0.9250383568107076, "learning_rate": 1.4802673690420281e-05, "loss": 0.5885, "step": 4434 }, { "epoch": 0.36, "grad_norm": 0.9495083386718011, "learning_rate": 1.4800364524135622e-05, "loss": 0.5778, "step": 4435 }, { "epoch": 0.36, "grad_norm": 0.9536242637144737, "learning_rate": 1.4798055025189962e-05, "loss": 0.6067, "step": 4436 }, { "epoch": 0.36, "grad_norm": 0.8677917692119125, "learning_rate": 1.4795745193743341e-05, "loss": 0.5587, "step": 4437 }, { "epoch": 0.36, "grad_norm": 0.8866934244445475, "learning_rate": 1.4793435029955832e-05, "loss": 0.5103, "step": 4438 }, { "epoch": 0.36, "grad_norm": 1.0042081691754168, "learning_rate": 1.4791124533987529e-05, "loss": 0.6197, "step": 4439 }, { "epoch": 0.36, "grad_norm": 0.869082561205368, "learning_rate": 1.4788813705998543e-05, "loss": 0.6103, "step": 4440 }, { "epoch": 0.36, "grad_norm": 0.9407422204488816, "learning_rate": 1.4786502546149015e-05, "loss": 0.5694, "step": 4441 }, { "epoch": 0.36, "grad_norm": 0.9278704826539301, "learning_rate": 1.4784191054599109e-05, "loss": 0.5968, "step": 4442 }, { "epoch": 0.36, "grad_norm": 0.9588874501973558, "learning_rate": 1.4781879231509005e-05, "loss": 0.5519, "step": 4443 }, { "epoch": 0.36, "grad_norm": 0.8591868818411956, "learning_rate": 1.4779567077038912e-05, "loss": 0.5607, "step": 4444 }, { "epoch": 0.36, "grad_norm": 0.8701437695360021, "learning_rate": 1.477725459134906e-05, "loss": 0.5065, "step": 4445 }, { "epoch": 0.36, "grad_norm": 0.9023479514656566, "learning_rate": 1.4774941774599703e-05, "loss": 0.5855, "step": 4446 }, { "epoch": 0.36, "grad_norm": 0.8904185731123726, "learning_rate": 1.4772628626951114e-05, "loss": 0.5713, "step": 4447 }, { "epoch": 0.36, "grad_norm": 0.8535558813189821, "learning_rate": 1.4770315148563596e-05, "loss": 0.5409, "step": 4448 }, { "epoch": 0.36, "grad_norm": 0.962305319537762, "learning_rate": 1.4768001339597471e-05, "loss": 0.5573, "step": 4449 }, { "epoch": 0.36, "grad_norm": 0.9126805318858925, "learning_rate": 1.4765687200213079e-05, "loss": 0.5475, "step": 4450 }, { "epoch": 0.36, "grad_norm": 0.9893873429771054, "learning_rate": 1.4763372730570793e-05, "loss": 0.6411, "step": 4451 }, { "epoch": 0.36, "grad_norm": 0.935005237685736, "learning_rate": 1.4761057930831002e-05, "loss": 0.5588, "step": 4452 }, { "epoch": 0.36, "grad_norm": 0.9159587474165702, "learning_rate": 1.475874280115412e-05, "loss": 0.5548, "step": 4453 }, { "epoch": 0.36, "grad_norm": 0.9045849178952995, "learning_rate": 1.475642734170058e-05, "loss": 0.5059, "step": 4454 }, { "epoch": 0.36, "grad_norm": 0.9272647834317185, "learning_rate": 1.475411155263085e-05, "loss": 0.5701, "step": 4455 }, { "epoch": 0.36, "grad_norm": 0.8679504825088333, "learning_rate": 1.47517954341054e-05, "loss": 0.5504, "step": 4456 }, { "epoch": 0.36, "grad_norm": 0.8784123628733355, "learning_rate": 1.4749478986284743e-05, "loss": 0.5651, "step": 4457 }, { "epoch": 0.36, "grad_norm": 0.7613363838766739, "learning_rate": 1.4747162209329408e-05, "loss": 0.5165, "step": 4458 }, { "epoch": 0.36, "grad_norm": 1.014684584701171, "learning_rate": 1.474484510339994e-05, "loss": 0.6104, "step": 4459 }, { "epoch": 0.36, "grad_norm": 0.8678082577950834, "learning_rate": 1.4742527668656915e-05, "loss": 0.5358, "step": 4460 }, { "epoch": 0.36, "grad_norm": 0.9801279027102768, "learning_rate": 1.474020990526093e-05, "loss": 0.5773, "step": 4461 }, { "epoch": 0.36, "grad_norm": 0.829818263982647, "learning_rate": 1.4737891813372605e-05, "loss": 0.5223, "step": 4462 }, { "epoch": 0.36, "grad_norm": 0.8525757596323112, "learning_rate": 1.4735573393152576e-05, "loss": 0.5602, "step": 4463 }, { "epoch": 0.36, "grad_norm": 0.9103375069043098, "learning_rate": 1.4733254644761514e-05, "loss": 0.5933, "step": 4464 }, { "epoch": 0.36, "grad_norm": 0.9087295841386293, "learning_rate": 1.4730935568360103e-05, "loss": 0.6359, "step": 4465 }, { "epoch": 0.36, "grad_norm": 0.9403445729528365, "learning_rate": 1.4728616164109051e-05, "loss": 0.5755, "step": 4466 }, { "epoch": 0.36, "grad_norm": 0.879590804392101, "learning_rate": 1.4726296432169095e-05, "loss": 0.5774, "step": 4467 }, { "epoch": 0.36, "grad_norm": 0.9251101238642484, "learning_rate": 1.472397637270099e-05, "loss": 0.5488, "step": 4468 }, { "epoch": 0.36, "grad_norm": 0.9440786095944539, "learning_rate": 1.472165598586551e-05, "loss": 0.5501, "step": 4469 }, { "epoch": 0.36, "grad_norm": 0.892311824110475, "learning_rate": 1.4719335271823461e-05, "loss": 0.5545, "step": 4470 }, { "epoch": 0.36, "grad_norm": 0.9222735252882099, "learning_rate": 1.4717014230735661e-05, "loss": 0.5671, "step": 4471 }, { "epoch": 0.36, "grad_norm": 0.8774550769864838, "learning_rate": 1.471469286276296e-05, "loss": 0.5856, "step": 4472 }, { "epoch": 0.36, "grad_norm": 0.8945744492384955, "learning_rate": 1.4712371168066227e-05, "loss": 0.5554, "step": 4473 }, { "epoch": 0.36, "grad_norm": 0.8632486488540376, "learning_rate": 1.4710049146806348e-05, "loss": 0.5571, "step": 4474 }, { "epoch": 0.36, "grad_norm": 0.9075970561265309, "learning_rate": 1.4707726799144245e-05, "loss": 0.5557, "step": 4475 }, { "epoch": 0.36, "grad_norm": 0.8623183406655154, "learning_rate": 1.4705404125240849e-05, "loss": 0.562, "step": 4476 }, { "epoch": 0.36, "grad_norm": 0.8993540931201318, "learning_rate": 1.470308112525712e-05, "loss": 0.5011, "step": 4477 }, { "epoch": 0.36, "grad_norm": 0.9070876790888006, "learning_rate": 1.470075779935404e-05, "loss": 0.5386, "step": 4478 }, { "epoch": 0.36, "grad_norm": 0.894769666418227, "learning_rate": 1.4698434147692618e-05, "loss": 0.6098, "step": 4479 }, { "epoch": 0.36, "grad_norm": 0.89583961043636, "learning_rate": 1.4696110170433873e-05, "loss": 0.5813, "step": 4480 }, { "epoch": 0.36, "grad_norm": 0.8861955251651111, "learning_rate": 1.469378586773886e-05, "loss": 0.565, "step": 4481 }, { "epoch": 0.36, "grad_norm": 0.9275619282158948, "learning_rate": 1.4691461239768649e-05, "loss": 0.5474, "step": 4482 }, { "epoch": 0.36, "grad_norm": 0.9046505136392399, "learning_rate": 1.4689136286684335e-05, "loss": 0.5759, "step": 4483 }, { "epoch": 0.36, "grad_norm": 0.9305391975015446, "learning_rate": 1.4686811008647037e-05, "loss": 0.5531, "step": 4484 }, { "epoch": 0.36, "grad_norm": 0.9903532887324388, "learning_rate": 1.4684485405817897e-05, "loss": 0.578, "step": 4485 }, { "epoch": 0.36, "grad_norm": 0.905053324048193, "learning_rate": 1.4682159478358067e-05, "loss": 0.5344, "step": 4486 }, { "epoch": 0.36, "grad_norm": 0.9706423919522855, "learning_rate": 1.467983322642874e-05, "loss": 0.5948, "step": 4487 }, { "epoch": 0.36, "grad_norm": 0.9127425851510094, "learning_rate": 1.4677506650191124e-05, "loss": 0.5651, "step": 4488 }, { "epoch": 0.36, "grad_norm": 0.9640169198932111, "learning_rate": 1.4675179749806444e-05, "loss": 0.5849, "step": 4489 }, { "epoch": 0.36, "grad_norm": 0.8409429176257024, "learning_rate": 1.4672852525435954e-05, "loss": 0.5098, "step": 4490 }, { "epoch": 0.37, "grad_norm": 0.9086225149968713, "learning_rate": 1.4670524977240929e-05, "loss": 0.5602, "step": 4491 }, { "epoch": 0.37, "grad_norm": 0.9895660646217311, "learning_rate": 1.4668197105382667e-05, "loss": 0.6056, "step": 4492 }, { "epoch": 0.37, "grad_norm": 0.9110056402144131, "learning_rate": 1.4665868910022485e-05, "loss": 0.569, "step": 4493 }, { "epoch": 0.37, "grad_norm": 0.964291622484286, "learning_rate": 1.4663540391321726e-05, "loss": 0.5913, "step": 4494 }, { "epoch": 0.37, "grad_norm": 1.0321238359078417, "learning_rate": 1.4661211549441756e-05, "loss": 0.6189, "step": 4495 }, { "epoch": 0.37, "grad_norm": 0.8883996584082586, "learning_rate": 1.465888238454396e-05, "loss": 0.5703, "step": 4496 }, { "epoch": 0.37, "grad_norm": 0.9221703556073723, "learning_rate": 1.4656552896789746e-05, "loss": 0.5558, "step": 4497 }, { "epoch": 0.37, "grad_norm": 0.8746129298054462, "learning_rate": 1.4654223086340547e-05, "loss": 0.488, "step": 4498 }, { "epoch": 0.37, "grad_norm": 0.9292851665842938, "learning_rate": 1.4651892953357816e-05, "loss": 0.5466, "step": 4499 }, { "epoch": 0.37, "grad_norm": 0.9489222789168605, "learning_rate": 1.4649562498003032e-05, "loss": 0.6406, "step": 4500 }, { "epoch": 0.37, "grad_norm": 0.9472256638031281, "learning_rate": 1.4647231720437687e-05, "loss": 0.5931, "step": 4501 }, { "epoch": 0.37, "grad_norm": 0.8927187601605701, "learning_rate": 1.4644900620823308e-05, "loss": 0.5251, "step": 4502 }, { "epoch": 0.37, "grad_norm": 0.9092235192734804, "learning_rate": 1.4642569199321436e-05, "loss": 0.5596, "step": 4503 }, { "epoch": 0.37, "grad_norm": 0.9911171371540515, "learning_rate": 1.4640237456093636e-05, "loss": 0.6254, "step": 4504 }, { "epoch": 0.37, "grad_norm": 0.8902682685340019, "learning_rate": 1.4637905391301496e-05, "loss": 0.5597, "step": 4505 }, { "epoch": 0.37, "grad_norm": 0.9779813845697346, "learning_rate": 1.4635573005106627e-05, "loss": 0.5155, "step": 4506 }, { "epoch": 0.37, "grad_norm": 0.8593153982172275, "learning_rate": 1.4633240297670661e-05, "loss": 0.5848, "step": 4507 }, { "epoch": 0.37, "grad_norm": 0.9417037319181273, "learning_rate": 1.463090726915525e-05, "loss": 0.5036, "step": 4508 }, { "epoch": 0.37, "grad_norm": 0.826959279039463, "learning_rate": 1.4628573919722073e-05, "loss": 0.5068, "step": 4509 }, { "epoch": 0.37, "grad_norm": 0.9122443267097082, "learning_rate": 1.4626240249532833e-05, "loss": 0.5697, "step": 4510 }, { "epoch": 0.37, "grad_norm": 0.911386237979581, "learning_rate": 1.4623906258749243e-05, "loss": 0.5511, "step": 4511 }, { "epoch": 0.37, "grad_norm": 0.9836468674217567, "learning_rate": 1.462157194753305e-05, "loss": 0.6439, "step": 4512 }, { "epoch": 0.37, "grad_norm": 0.9245030210290315, "learning_rate": 1.4619237316046024e-05, "loss": 0.5938, "step": 4513 }, { "epoch": 0.37, "grad_norm": 0.8823041615405315, "learning_rate": 1.4616902364449947e-05, "loss": 0.5385, "step": 4514 }, { "epoch": 0.37, "grad_norm": 0.9282146018943377, "learning_rate": 1.4614567092906631e-05, "loss": 0.5289, "step": 4515 }, { "epoch": 0.37, "grad_norm": 0.8513409879459296, "learning_rate": 1.4612231501577912e-05, "loss": 0.4712, "step": 4516 }, { "epoch": 0.37, "grad_norm": 0.8970738325052732, "learning_rate": 1.4609895590625635e-05, "loss": 0.5656, "step": 4517 }, { "epoch": 0.37, "grad_norm": 0.8977821484073438, "learning_rate": 1.4607559360211688e-05, "loss": 0.5297, "step": 4518 }, { "epoch": 0.37, "grad_norm": 0.8660669647290293, "learning_rate": 1.460522281049796e-05, "loss": 0.5524, "step": 4519 }, { "epoch": 0.37, "grad_norm": 0.8910709052637866, "learning_rate": 1.460288594164638e-05, "loss": 0.5052, "step": 4520 }, { "epoch": 0.37, "grad_norm": 0.8727890717084368, "learning_rate": 1.4600548753818884e-05, "loss": 0.4904, "step": 4521 }, { "epoch": 0.37, "grad_norm": 0.9238031222868925, "learning_rate": 1.4598211247177443e-05, "loss": 0.5652, "step": 4522 }, { "epoch": 0.37, "grad_norm": 0.9175522190299712, "learning_rate": 1.459587342188404e-05, "loss": 0.5368, "step": 4523 }, { "epoch": 0.37, "grad_norm": 0.9529440524959538, "learning_rate": 1.4593535278100684e-05, "loss": 0.5888, "step": 4524 }, { "epoch": 0.37, "grad_norm": 0.8746329567698374, "learning_rate": 1.4591196815989407e-05, "loss": 0.5749, "step": 4525 }, { "epoch": 0.37, "grad_norm": 0.9699853208903029, "learning_rate": 1.4588858035712266e-05, "loss": 0.5508, "step": 4526 }, { "epoch": 0.37, "grad_norm": 0.9555329839997029, "learning_rate": 1.4586518937431332e-05, "loss": 0.5743, "step": 4527 }, { "epoch": 0.37, "grad_norm": 0.9458608579519454, "learning_rate": 1.4584179521308703e-05, "loss": 0.608, "step": 4528 }, { "epoch": 0.37, "grad_norm": 0.8527041686893861, "learning_rate": 1.45818397875065e-05, "loss": 0.473, "step": 4529 }, { "epoch": 0.37, "grad_norm": 0.8928812741342821, "learning_rate": 1.4579499736186864e-05, "loss": 0.5538, "step": 4530 }, { "epoch": 0.37, "grad_norm": 0.9120715539821165, "learning_rate": 1.4577159367511959e-05, "loss": 0.4966, "step": 4531 }, { "epoch": 0.37, "grad_norm": 1.0404997402846254, "learning_rate": 1.457481868164397e-05, "loss": 0.5259, "step": 4532 }, { "epoch": 0.37, "grad_norm": 0.9213843354801686, "learning_rate": 1.45724776787451e-05, "loss": 0.5097, "step": 4533 }, { "epoch": 0.37, "grad_norm": 1.1301394046908915, "learning_rate": 1.4570136358977589e-05, "loss": 0.5866, "step": 4534 }, { "epoch": 0.37, "grad_norm": 0.8993396951953636, "learning_rate": 1.456779472250368e-05, "loss": 0.6042, "step": 4535 }, { "epoch": 0.37, "grad_norm": 0.9597726253126289, "learning_rate": 1.4565452769485644e-05, "loss": 0.6088, "step": 4536 }, { "epoch": 0.37, "grad_norm": 0.9236890364028195, "learning_rate": 1.4563110500085786e-05, "loss": 0.5624, "step": 4537 }, { "epoch": 0.37, "grad_norm": 0.8956434628330934, "learning_rate": 1.4560767914466417e-05, "loss": 0.5511, "step": 4538 }, { "epoch": 0.37, "grad_norm": 0.9639572643026926, "learning_rate": 1.4558425012789873e-05, "loss": 0.5579, "step": 4539 }, { "epoch": 0.37, "grad_norm": 0.9794697866313931, "learning_rate": 1.4556081795218525e-05, "loss": 0.5269, "step": 4540 }, { "epoch": 0.37, "grad_norm": 0.8887634982521235, "learning_rate": 1.4553738261914742e-05, "loss": 0.5336, "step": 4541 }, { "epoch": 0.37, "grad_norm": 0.9542512904237881, "learning_rate": 1.4551394413040942e-05, "loss": 0.5485, "step": 4542 }, { "epoch": 0.37, "grad_norm": 0.994243489296905, "learning_rate": 1.4549050248759546e-05, "loss": 0.5648, "step": 4543 }, { "epoch": 0.37, "grad_norm": 0.973290658485551, "learning_rate": 1.4546705769233003e-05, "loss": 0.5819, "step": 4544 }, { "epoch": 0.37, "grad_norm": 0.9707765308575302, "learning_rate": 1.4544360974623781e-05, "loss": 0.5097, "step": 4545 }, { "epoch": 0.37, "grad_norm": 0.9736719646068369, "learning_rate": 1.4542015865094377e-05, "loss": 0.5682, "step": 4546 }, { "epoch": 0.37, "grad_norm": 0.9026646459372545, "learning_rate": 1.4539670440807298e-05, "loss": 0.5721, "step": 4547 }, { "epoch": 0.37, "grad_norm": 0.9847250280289744, "learning_rate": 1.4537324701925088e-05, "loss": 0.632, "step": 4548 }, { "epoch": 0.37, "grad_norm": 0.9816038435971146, "learning_rate": 1.4534978648610301e-05, "loss": 0.5988, "step": 4549 }, { "epoch": 0.37, "grad_norm": 1.0238723489848807, "learning_rate": 1.4532632281025514e-05, "loss": 0.6407, "step": 4550 }, { "epoch": 0.37, "grad_norm": 0.8774092216491454, "learning_rate": 1.4530285599333332e-05, "loss": 0.5169, "step": 4551 }, { "epoch": 0.37, "grad_norm": 0.8977191065810496, "learning_rate": 1.4527938603696376e-05, "loss": 0.5758, "step": 4552 }, { "epoch": 0.37, "grad_norm": 0.8679359206799987, "learning_rate": 1.452559129427729e-05, "loss": 0.5466, "step": 4553 }, { "epoch": 0.37, "grad_norm": 0.8862085480729563, "learning_rate": 1.4523243671238741e-05, "loss": 0.5008, "step": 4554 }, { "epoch": 0.37, "grad_norm": 0.8560466744924718, "learning_rate": 1.4520895734743419e-05, "loss": 0.5498, "step": 4555 }, { "epoch": 0.37, "grad_norm": 0.870445799003857, "learning_rate": 1.4518547484954033e-05, "loss": 0.5446, "step": 4556 }, { "epoch": 0.37, "grad_norm": 0.9915550536321615, "learning_rate": 1.4516198922033313e-05, "loss": 0.5891, "step": 4557 }, { "epoch": 0.37, "grad_norm": 0.9766747506463757, "learning_rate": 1.4513850046144015e-05, "loss": 0.5856, "step": 4558 }, { "epoch": 0.37, "grad_norm": 1.1299468463134879, "learning_rate": 1.451150085744891e-05, "loss": 0.5327, "step": 4559 }, { "epoch": 0.37, "grad_norm": 0.876075346471893, "learning_rate": 1.45091513561108e-05, "loss": 0.5676, "step": 4560 }, { "epoch": 0.37, "grad_norm": 0.9026216340846099, "learning_rate": 1.4506801542292501e-05, "loss": 0.4733, "step": 4561 }, { "epoch": 0.37, "grad_norm": 0.9271284281161375, "learning_rate": 1.450445141615685e-05, "loss": 0.6201, "step": 4562 }, { "epoch": 0.37, "grad_norm": 0.9643881947320083, "learning_rate": 1.4502100977866713e-05, "loss": 0.63, "step": 4563 }, { "epoch": 0.37, "grad_norm": 0.9016319613024337, "learning_rate": 1.449975022758497e-05, "loss": 0.5579, "step": 4564 }, { "epoch": 0.37, "grad_norm": 0.8301182624895908, "learning_rate": 1.449739916547453e-05, "loss": 0.5391, "step": 4565 }, { "epoch": 0.37, "grad_norm": 1.0885222691950676, "learning_rate": 1.4495047791698316e-05, "loss": 0.5193, "step": 4566 }, { "epoch": 0.37, "grad_norm": 0.8532968406074554, "learning_rate": 1.4492696106419275e-05, "loss": 0.5216, "step": 4567 }, { "epoch": 0.37, "grad_norm": 0.9434837561969038, "learning_rate": 1.4490344109800382e-05, "loss": 0.5765, "step": 4568 }, { "epoch": 0.37, "grad_norm": 0.8803884583946759, "learning_rate": 1.4487991802004625e-05, "loss": 0.486, "step": 4569 }, { "epoch": 0.37, "grad_norm": 0.8000039279991835, "learning_rate": 1.4485639183195014e-05, "loss": 0.4766, "step": 4570 }, { "epoch": 0.37, "grad_norm": 0.9594388992920403, "learning_rate": 1.448328625353459e-05, "loss": 0.5695, "step": 4571 }, { "epoch": 0.37, "grad_norm": 0.8484072973197494, "learning_rate": 1.4480933013186403e-05, "loss": 0.5692, "step": 4572 }, { "epoch": 0.37, "grad_norm": 1.1050447422521597, "learning_rate": 1.4478579462313533e-05, "loss": 0.5268, "step": 4573 }, { "epoch": 0.37, "grad_norm": 0.8681889570895173, "learning_rate": 1.4476225601079078e-05, "loss": 0.5614, "step": 4574 }, { "epoch": 0.37, "grad_norm": 0.9827528961085784, "learning_rate": 1.447387142964616e-05, "loss": 0.6487, "step": 4575 }, { "epoch": 0.37, "grad_norm": 0.9192293189477148, "learning_rate": 1.4471516948177921e-05, "loss": 0.5596, "step": 4576 }, { "epoch": 0.37, "grad_norm": 0.8507728172540402, "learning_rate": 1.4469162156837521e-05, "loss": 0.458, "step": 4577 }, { "epoch": 0.37, "grad_norm": 0.9116255169315678, "learning_rate": 1.4466807055788152e-05, "loss": 0.543, "step": 4578 }, { "epoch": 0.37, "grad_norm": 0.8972062853518864, "learning_rate": 1.4464451645193013e-05, "loss": 0.5706, "step": 4579 }, { "epoch": 0.37, "grad_norm": 0.8844699891420403, "learning_rate": 1.4462095925215336e-05, "loss": 0.5486, "step": 4580 }, { "epoch": 0.37, "grad_norm": 0.8469427622135454, "learning_rate": 1.445973989601837e-05, "loss": 0.5295, "step": 4581 }, { "epoch": 0.37, "grad_norm": 1.0531060450779732, "learning_rate": 1.4457383557765385e-05, "loss": 0.6393, "step": 4582 }, { "epoch": 0.37, "grad_norm": 0.8996525214835874, "learning_rate": 1.4455026910619672e-05, "loss": 0.5354, "step": 4583 }, { "epoch": 0.37, "grad_norm": 0.8842873124765084, "learning_rate": 1.4452669954744545e-05, "loss": 0.5528, "step": 4584 }, { "epoch": 0.37, "grad_norm": 0.8787924380403497, "learning_rate": 1.4450312690303342e-05, "loss": 0.629, "step": 4585 }, { "epoch": 0.37, "grad_norm": 0.8125932722275822, "learning_rate": 1.4447955117459414e-05, "loss": 0.4836, "step": 4586 }, { "epoch": 0.37, "grad_norm": 0.8355003410967605, "learning_rate": 1.4445597236376143e-05, "loss": 0.5161, "step": 4587 }, { "epoch": 0.37, "grad_norm": 0.890935498183146, "learning_rate": 1.4443239047216928e-05, "loss": 0.5175, "step": 4588 }, { "epoch": 0.37, "grad_norm": 0.9152569502319765, "learning_rate": 1.4440880550145187e-05, "loss": 0.5548, "step": 4589 }, { "epoch": 0.37, "grad_norm": 0.9338050629782046, "learning_rate": 1.4438521745324363e-05, "loss": 0.605, "step": 4590 }, { "epoch": 0.37, "grad_norm": 0.8610689574026248, "learning_rate": 1.4436162632917918e-05, "loss": 0.5304, "step": 4591 }, { "epoch": 0.37, "grad_norm": 0.8372614133811489, "learning_rate": 1.4433803213089341e-05, "loss": 0.5228, "step": 4592 }, { "epoch": 0.37, "grad_norm": 0.8240098780319006, "learning_rate": 1.443144348600213e-05, "loss": 0.5509, "step": 4593 }, { "epoch": 0.37, "grad_norm": 0.9114321836325133, "learning_rate": 1.442908345181982e-05, "loss": 0.5135, "step": 4594 }, { "epoch": 0.37, "grad_norm": 0.8340969162814056, "learning_rate": 1.442672311070595e-05, "loss": 0.5039, "step": 4595 }, { "epoch": 0.37, "grad_norm": 0.886391196202956, "learning_rate": 1.44243624628241e-05, "loss": 0.5364, "step": 4596 }, { "epoch": 0.37, "grad_norm": 0.9536427603728262, "learning_rate": 1.4422001508337853e-05, "loss": 0.5255, "step": 4597 }, { "epoch": 0.37, "grad_norm": 0.9276882420801554, "learning_rate": 1.4419640247410827e-05, "loss": 0.5701, "step": 4598 }, { "epoch": 0.37, "grad_norm": 0.8960716066915557, "learning_rate": 1.4417278680206647e-05, "loss": 0.594, "step": 4599 }, { "epoch": 0.37, "grad_norm": 0.9832722742030179, "learning_rate": 1.4414916806888976e-05, "loss": 0.6219, "step": 4600 }, { "epoch": 0.37, "grad_norm": 0.9796662164059784, "learning_rate": 1.4412554627621487e-05, "loss": 0.5509, "step": 4601 }, { "epoch": 0.37, "grad_norm": 0.9263311552853005, "learning_rate": 1.4410192142567874e-05, "loss": 0.4821, "step": 4602 }, { "epoch": 0.37, "grad_norm": 0.9106880002039524, "learning_rate": 1.4407829351891858e-05, "loss": 0.5703, "step": 4603 }, { "epoch": 0.37, "grad_norm": 0.8117373702202885, "learning_rate": 1.4405466255757178e-05, "loss": 0.5324, "step": 4604 }, { "epoch": 0.37, "grad_norm": 0.9051324540084974, "learning_rate": 1.4403102854327595e-05, "loss": 0.5474, "step": 4605 }, { "epoch": 0.37, "grad_norm": 0.8988441607793508, "learning_rate": 1.4400739147766887e-05, "loss": 0.5718, "step": 4606 }, { "epoch": 0.37, "grad_norm": 0.9418428015077235, "learning_rate": 1.4398375136238864e-05, "loss": 0.5627, "step": 4607 }, { "epoch": 0.37, "grad_norm": 0.8659909136750779, "learning_rate": 1.439601081990734e-05, "loss": 0.5206, "step": 4608 }, { "epoch": 0.37, "grad_norm": 0.9068140592385949, "learning_rate": 1.4393646198936169e-05, "loss": 0.5892, "step": 4609 }, { "epoch": 0.37, "grad_norm": 0.9967771784960222, "learning_rate": 1.4391281273489216e-05, "loss": 0.6452, "step": 4610 }, { "epoch": 0.37, "grad_norm": 0.8349913229312583, "learning_rate": 1.438891604373036e-05, "loss": 0.5053, "step": 4611 }, { "epoch": 0.37, "grad_norm": 0.8885153877992091, "learning_rate": 1.4386550509823515e-05, "loss": 0.5386, "step": 4612 }, { "epoch": 0.37, "grad_norm": 0.8717848449955791, "learning_rate": 1.4384184671932616e-05, "loss": 0.4851, "step": 4613 }, { "epoch": 0.38, "grad_norm": 0.962862508614908, "learning_rate": 1.4381818530221604e-05, "loss": 0.6124, "step": 4614 }, { "epoch": 0.38, "grad_norm": 0.8910489915347034, "learning_rate": 1.4379452084854455e-05, "loss": 0.538, "step": 4615 }, { "epoch": 0.38, "grad_norm": 0.90704637704644, "learning_rate": 1.4377085335995165e-05, "loss": 0.5425, "step": 4616 }, { "epoch": 0.38, "grad_norm": 0.8258031697206105, "learning_rate": 1.4374718283807738e-05, "loss": 0.5118, "step": 4617 }, { "epoch": 0.38, "grad_norm": 0.8838282368452399, "learning_rate": 1.4372350928456218e-05, "loss": 0.5603, "step": 4618 }, { "epoch": 0.38, "grad_norm": 0.8717929142089527, "learning_rate": 1.4369983270104654e-05, "loss": 0.5141, "step": 4619 }, { "epoch": 0.38, "grad_norm": 0.9120544658922455, "learning_rate": 1.436761530891713e-05, "loss": 0.6108, "step": 4620 }, { "epoch": 0.38, "grad_norm": 0.9513750558154765, "learning_rate": 1.4365247045057732e-05, "loss": 0.5394, "step": 4621 }, { "epoch": 0.38, "grad_norm": 0.9271504423453096, "learning_rate": 1.4362878478690595e-05, "loss": 0.6059, "step": 4622 }, { "epoch": 0.38, "grad_norm": 0.935788851587579, "learning_rate": 1.4360509609979842e-05, "loss": 0.5753, "step": 4623 }, { "epoch": 0.38, "grad_norm": 0.9640035939735568, "learning_rate": 1.4358140439089644e-05, "loss": 0.5396, "step": 4624 }, { "epoch": 0.38, "grad_norm": 1.0144514931115078, "learning_rate": 1.435577096618418e-05, "loss": 0.5673, "step": 4625 }, { "epoch": 0.38, "grad_norm": 1.0271339489351563, "learning_rate": 1.435340119142765e-05, "loss": 0.5648, "step": 4626 }, { "epoch": 0.38, "grad_norm": 0.8403509294262873, "learning_rate": 1.4351031114984277e-05, "loss": 0.4805, "step": 4627 }, { "epoch": 0.38, "grad_norm": 0.9565545155982719, "learning_rate": 1.434866073701831e-05, "loss": 0.553, "step": 4628 }, { "epoch": 0.38, "grad_norm": 0.9695005048984443, "learning_rate": 1.434629005769401e-05, "loss": 0.5715, "step": 4629 }, { "epoch": 0.38, "grad_norm": 0.977899291178307, "learning_rate": 1.4343919077175662e-05, "loss": 0.5849, "step": 4630 }, { "epoch": 0.38, "grad_norm": 0.884291550415101, "learning_rate": 1.434154779562758e-05, "loss": 0.5444, "step": 4631 }, { "epoch": 0.38, "grad_norm": 0.8531258142757846, "learning_rate": 1.4339176213214084e-05, "loss": 0.5753, "step": 4632 }, { "epoch": 0.38, "grad_norm": 0.8577345206288213, "learning_rate": 1.4336804330099525e-05, "loss": 0.5059, "step": 4633 }, { "epoch": 0.38, "grad_norm": 0.7924570235333922, "learning_rate": 1.4334432146448272e-05, "loss": 0.5033, "step": 4634 }, { "epoch": 0.38, "grad_norm": 1.014824293404601, "learning_rate": 1.433205966242472e-05, "loss": 0.6218, "step": 4635 }, { "epoch": 0.38, "grad_norm": 0.9407577361679119, "learning_rate": 1.4329686878193271e-05, "loss": 0.5733, "step": 4636 }, { "epoch": 0.38, "grad_norm": 0.8702144654770304, "learning_rate": 1.4327313793918362e-05, "loss": 0.602, "step": 4637 }, { "epoch": 0.38, "grad_norm": 0.8761690276292466, "learning_rate": 1.432494040976445e-05, "loss": 0.5269, "step": 4638 }, { "epoch": 0.38, "grad_norm": 0.929781433069658, "learning_rate": 1.4322566725895998e-05, "loss": 0.5445, "step": 4639 }, { "epoch": 0.38, "grad_norm": 0.7992557146672608, "learning_rate": 1.432019274247751e-05, "loss": 0.5139, "step": 4640 }, { "epoch": 0.38, "grad_norm": 0.773596192547856, "learning_rate": 1.4317818459673496e-05, "loss": 0.4513, "step": 4641 }, { "epoch": 0.38, "grad_norm": 0.9862725969212202, "learning_rate": 1.4315443877648494e-05, "loss": 0.6312, "step": 4642 }, { "epoch": 0.38, "grad_norm": 0.9346377617043576, "learning_rate": 1.4313068996567054e-05, "loss": 0.5526, "step": 4643 }, { "epoch": 0.38, "grad_norm": 0.9096616854418217, "learning_rate": 1.4310693816593766e-05, "loss": 0.5035, "step": 4644 }, { "epoch": 0.38, "grad_norm": 0.8896567937140818, "learning_rate": 1.4308318337893214e-05, "loss": 0.5757, "step": 4645 }, { "epoch": 0.38, "grad_norm": 0.8989146692626034, "learning_rate": 1.4305942560630025e-05, "loss": 0.5482, "step": 4646 }, { "epoch": 0.38, "grad_norm": 0.9001044151809969, "learning_rate": 1.4303566484968836e-05, "loss": 0.5518, "step": 4647 }, { "epoch": 0.38, "grad_norm": 0.8284432728019371, "learning_rate": 1.4301190111074306e-05, "loss": 0.5221, "step": 4648 }, { "epoch": 0.38, "grad_norm": 0.8823025857272053, "learning_rate": 1.4298813439111116e-05, "loss": 0.5092, "step": 4649 }, { "epoch": 0.38, "grad_norm": 0.9638802531589524, "learning_rate": 1.4296436469243968e-05, "loss": 0.5593, "step": 4650 }, { "epoch": 0.38, "grad_norm": 0.8957597475760447, "learning_rate": 1.4294059201637584e-05, "loss": 0.6216, "step": 4651 }, { "epoch": 0.38, "grad_norm": 0.9922406505863793, "learning_rate": 1.4291681636456706e-05, "loss": 0.585, "step": 4652 }, { "epoch": 0.38, "grad_norm": 0.9067841463591434, "learning_rate": 1.42893037738661e-05, "loss": 0.5457, "step": 4653 }, { "epoch": 0.38, "grad_norm": 0.8603834578523885, "learning_rate": 1.4286925614030542e-05, "loss": 0.5728, "step": 4654 }, { "epoch": 0.38, "grad_norm": 0.9463071681212694, "learning_rate": 1.4284547157114846e-05, "loss": 0.5982, "step": 4655 }, { "epoch": 0.38, "grad_norm": 0.8547064534994039, "learning_rate": 1.4282168403283829e-05, "loss": 0.5284, "step": 4656 }, { "epoch": 0.38, "grad_norm": 0.883673974873926, "learning_rate": 1.4279789352702342e-05, "loss": 0.5759, "step": 4657 }, { "epoch": 0.38, "grad_norm": 0.9929381579362356, "learning_rate": 1.4277410005535249e-05, "loss": 0.5804, "step": 4658 }, { "epoch": 0.38, "grad_norm": 0.8075944223992195, "learning_rate": 1.4275030361947438e-05, "loss": 0.5404, "step": 4659 }, { "epoch": 0.38, "grad_norm": 0.9575072735956454, "learning_rate": 1.427265042210381e-05, "loss": 0.598, "step": 4660 }, { "epoch": 0.38, "grad_norm": 0.9730461139752252, "learning_rate": 1.4270270186169301e-05, "loss": 0.5552, "step": 4661 }, { "epoch": 0.38, "grad_norm": 0.8627026160167858, "learning_rate": 1.4267889654308858e-05, "loss": 0.5816, "step": 4662 }, { "epoch": 0.38, "grad_norm": 0.8134697842892554, "learning_rate": 1.4265508826687442e-05, "loss": 0.5051, "step": 4663 }, { "epoch": 0.38, "grad_norm": 0.8604152441105043, "learning_rate": 1.4263127703470053e-05, "loss": 0.5473, "step": 4664 }, { "epoch": 0.38, "grad_norm": 0.8854386728519589, "learning_rate": 1.4260746284821694e-05, "loss": 0.5699, "step": 4665 }, { "epoch": 0.38, "grad_norm": 0.8920693498069167, "learning_rate": 1.4258364570907395e-05, "loss": 0.5398, "step": 4666 }, { "epoch": 0.38, "grad_norm": 0.9962386036036195, "learning_rate": 1.4255982561892207e-05, "loss": 0.6245, "step": 4667 }, { "epoch": 0.38, "grad_norm": 0.8323716865619504, "learning_rate": 1.4253600257941208e-05, "loss": 0.5058, "step": 4668 }, { "epoch": 0.38, "grad_norm": 0.8699456667642135, "learning_rate": 1.4251217659219481e-05, "loss": 0.6105, "step": 4669 }, { "epoch": 0.38, "grad_norm": 0.9889583687179783, "learning_rate": 1.4248834765892139e-05, "loss": 0.5819, "step": 4670 }, { "epoch": 0.38, "grad_norm": 0.9126491520550216, "learning_rate": 1.4246451578124321e-05, "loss": 0.56, "step": 4671 }, { "epoch": 0.38, "grad_norm": 0.9134335213125362, "learning_rate": 1.4244068096081172e-05, "loss": 0.5301, "step": 4672 }, { "epoch": 0.38, "grad_norm": 0.9273990704818516, "learning_rate": 1.4241684319927869e-05, "loss": 0.4937, "step": 4673 }, { "epoch": 0.38, "grad_norm": 0.9268357832113618, "learning_rate": 1.4239300249829606e-05, "loss": 0.5696, "step": 4674 }, { "epoch": 0.38, "grad_norm": 0.8527064171200383, "learning_rate": 1.4236915885951592e-05, "loss": 0.553, "step": 4675 }, { "epoch": 0.38, "grad_norm": 0.9562426729410712, "learning_rate": 1.4234531228459069e-05, "loss": 0.5627, "step": 4676 }, { "epoch": 0.38, "grad_norm": 0.9183753162569681, "learning_rate": 1.4232146277517289e-05, "loss": 0.5514, "step": 4677 }, { "epoch": 0.38, "grad_norm": 0.9248669788204029, "learning_rate": 1.4229761033291523e-05, "loss": 0.5248, "step": 4678 }, { "epoch": 0.38, "grad_norm": 0.9321721977272228, "learning_rate": 1.422737549594707e-05, "loss": 0.537, "step": 4679 }, { "epoch": 0.38, "grad_norm": 0.8532668316951978, "learning_rate": 1.4224989665649248e-05, "loss": 0.5675, "step": 4680 }, { "epoch": 0.38, "grad_norm": 0.8601920380266586, "learning_rate": 1.4222603542563385e-05, "loss": 0.4955, "step": 4681 }, { "epoch": 0.38, "grad_norm": 0.869794347959696, "learning_rate": 1.4220217126854842e-05, "loss": 0.5749, "step": 4682 }, { "epoch": 0.38, "grad_norm": 0.972823849878353, "learning_rate": 1.4217830418689e-05, "loss": 0.558, "step": 4683 }, { "epoch": 0.38, "grad_norm": 0.8801348435436541, "learning_rate": 1.4215443418231248e-05, "loss": 0.5038, "step": 4684 }, { "epoch": 0.38, "grad_norm": 0.8921011812497122, "learning_rate": 1.4213056125647005e-05, "loss": 0.5297, "step": 4685 }, { "epoch": 0.38, "grad_norm": 0.9004852636949913, "learning_rate": 1.4210668541101713e-05, "loss": 0.529, "step": 4686 }, { "epoch": 0.38, "grad_norm": 1.1573939656494636, "learning_rate": 1.4208280664760823e-05, "loss": 0.5374, "step": 4687 }, { "epoch": 0.38, "grad_norm": 0.8283523390582402, "learning_rate": 1.4205892496789816e-05, "loss": 0.5117, "step": 4688 }, { "epoch": 0.38, "grad_norm": 0.8123582346827237, "learning_rate": 1.4203504037354192e-05, "loss": 0.5004, "step": 4689 }, { "epoch": 0.38, "grad_norm": 1.0285817266191963, "learning_rate": 1.4201115286619464e-05, "loss": 0.6204, "step": 4690 }, { "epoch": 0.38, "grad_norm": 0.8752059281230673, "learning_rate": 1.4198726244751173e-05, "loss": 0.5893, "step": 4691 }, { "epoch": 0.38, "grad_norm": 0.9099369702348551, "learning_rate": 1.4196336911914878e-05, "loss": 0.5702, "step": 4692 }, { "epoch": 0.38, "grad_norm": 0.8990956246038558, "learning_rate": 1.419394728827616e-05, "loss": 0.5603, "step": 4693 }, { "epoch": 0.38, "grad_norm": 0.9622419853620431, "learning_rate": 1.419155737400061e-05, "loss": 0.5678, "step": 4694 }, { "epoch": 0.38, "grad_norm": 0.8860431919599383, "learning_rate": 1.4189167169253855e-05, "loss": 0.4666, "step": 4695 }, { "epoch": 0.38, "grad_norm": 0.8895897423278988, "learning_rate": 1.4186776674201533e-05, "loss": 0.5501, "step": 4696 }, { "epoch": 0.38, "grad_norm": 0.920359064516808, "learning_rate": 1.4184385889009298e-05, "loss": 0.5521, "step": 4697 }, { "epoch": 0.38, "grad_norm": 0.8188258847505276, "learning_rate": 1.4181994813842831e-05, "loss": 0.54, "step": 4698 }, { "epoch": 0.38, "grad_norm": 0.9810026176463054, "learning_rate": 1.4179603448867836e-05, "loss": 0.5882, "step": 4699 }, { "epoch": 0.38, "grad_norm": 0.9442240534749564, "learning_rate": 1.4177211794250027e-05, "loss": 0.5584, "step": 4700 }, { "epoch": 0.38, "grad_norm": 0.9776295396882877, "learning_rate": 1.4174819850155148e-05, "loss": 0.5444, "step": 4701 }, { "epoch": 0.38, "grad_norm": 0.8815069781560079, "learning_rate": 1.4172427616748955e-05, "loss": 0.521, "step": 4702 }, { "epoch": 0.38, "grad_norm": 0.9598488602858573, "learning_rate": 1.417003509419723e-05, "loss": 0.5725, "step": 4703 }, { "epoch": 0.38, "grad_norm": 0.88162059281092, "learning_rate": 1.416764228266577e-05, "loss": 0.5713, "step": 4704 }, { "epoch": 0.38, "grad_norm": 0.9011421926797095, "learning_rate": 1.4165249182320401e-05, "loss": 0.5621, "step": 4705 }, { "epoch": 0.38, "grad_norm": 0.8956175379198454, "learning_rate": 1.4162855793326955e-05, "loss": 0.5187, "step": 4706 }, { "epoch": 0.38, "grad_norm": 0.8036258385169617, "learning_rate": 1.4160462115851292e-05, "loss": 0.5419, "step": 4707 }, { "epoch": 0.38, "grad_norm": 0.9202754450019062, "learning_rate": 1.4158068150059302e-05, "loss": 0.5333, "step": 4708 }, { "epoch": 0.38, "grad_norm": 0.896061684455917, "learning_rate": 1.415567389611687e-05, "loss": 0.5587, "step": 4709 }, { "epoch": 0.38, "grad_norm": 0.9454485617565561, "learning_rate": 1.4153279354189927e-05, "loss": 0.5858, "step": 4710 }, { "epoch": 0.38, "grad_norm": 0.9237005211467425, "learning_rate": 1.415088452444441e-05, "loss": 0.527, "step": 4711 }, { "epoch": 0.38, "grad_norm": 0.9193006748691501, "learning_rate": 1.4148489407046274e-05, "loss": 0.5338, "step": 4712 }, { "epoch": 0.38, "grad_norm": 0.8406689889011487, "learning_rate": 1.4146094002161501e-05, "loss": 0.5876, "step": 4713 }, { "epoch": 0.38, "grad_norm": 0.8588860468374754, "learning_rate": 1.4143698309956096e-05, "loss": 0.5727, "step": 4714 }, { "epoch": 0.38, "grad_norm": 0.9456318467829123, "learning_rate": 1.414130233059607e-05, "loss": 0.5408, "step": 4715 }, { "epoch": 0.38, "grad_norm": 0.8965301895023075, "learning_rate": 1.4138906064247467e-05, "loss": 0.5793, "step": 4716 }, { "epoch": 0.38, "grad_norm": 0.9239643757250254, "learning_rate": 1.4136509511076347e-05, "loss": 0.6473, "step": 4717 }, { "epoch": 0.38, "grad_norm": 0.9022228686323259, "learning_rate": 1.4134112671248783e-05, "loss": 0.4977, "step": 4718 }, { "epoch": 0.38, "grad_norm": 0.7949190841588801, "learning_rate": 1.4131715544930878e-05, "loss": 0.5347, "step": 4719 }, { "epoch": 0.38, "grad_norm": 0.9740355679714173, "learning_rate": 1.4129318132288752e-05, "loss": 0.5923, "step": 4720 }, { "epoch": 0.38, "grad_norm": 0.7708498171790932, "learning_rate": 1.4126920433488542e-05, "loss": 0.5214, "step": 4721 }, { "epoch": 0.38, "grad_norm": 0.7865502977843916, "learning_rate": 1.4124522448696407e-05, "loss": 0.5338, "step": 4722 }, { "epoch": 0.38, "grad_norm": 0.8597659959517485, "learning_rate": 1.4122124178078522e-05, "loss": 0.56, "step": 4723 }, { "epoch": 0.38, "grad_norm": 0.8978989392823783, "learning_rate": 1.4119725621801093e-05, "loss": 0.5677, "step": 4724 }, { "epoch": 0.38, "grad_norm": 0.9157028799721416, "learning_rate": 1.411732678003033e-05, "loss": 0.6368, "step": 4725 }, { "epoch": 0.38, "grad_norm": 0.9925066776480296, "learning_rate": 1.411492765293247e-05, "loss": 0.5487, "step": 4726 }, { "epoch": 0.38, "grad_norm": 0.9113747897282498, "learning_rate": 1.4112528240673779e-05, "loss": 0.5318, "step": 4727 }, { "epoch": 0.38, "grad_norm": 0.8589278502208879, "learning_rate": 1.4110128543420527e-05, "loss": 0.555, "step": 4728 }, { "epoch": 0.38, "grad_norm": 0.950191960101158, "learning_rate": 1.410772856133901e-05, "loss": 0.5285, "step": 4729 }, { "epoch": 0.38, "grad_norm": 0.9338525505422465, "learning_rate": 1.4105328294595549e-05, "loss": 0.4941, "step": 4730 }, { "epoch": 0.38, "grad_norm": 0.9615369635942083, "learning_rate": 1.4102927743356481e-05, "loss": 0.6274, "step": 4731 }, { "epoch": 0.38, "grad_norm": 0.953732288802404, "learning_rate": 1.4100526907788157e-05, "loss": 0.5737, "step": 4732 }, { "epoch": 0.38, "grad_norm": 1.0032491436975954, "learning_rate": 1.4098125788056955e-05, "loss": 0.5614, "step": 4733 }, { "epoch": 0.38, "grad_norm": 0.889558640015762, "learning_rate": 1.4095724384329272e-05, "loss": 0.5449, "step": 4734 }, { "epoch": 0.38, "grad_norm": 0.8956467014404496, "learning_rate": 1.4093322696771523e-05, "loss": 0.561, "step": 4735 }, { "epoch": 0.38, "grad_norm": 0.8551706931065691, "learning_rate": 1.409092072555014e-05, "loss": 0.598, "step": 4736 }, { "epoch": 0.38, "grad_norm": 0.8548839800087088, "learning_rate": 1.408851847083158e-05, "loss": 0.5593, "step": 4737 }, { "epoch": 0.39, "grad_norm": 0.8901266047668741, "learning_rate": 1.4086115932782316e-05, "loss": 0.5516, "step": 4738 }, { "epoch": 0.39, "grad_norm": 0.8561960107371271, "learning_rate": 1.4083713111568841e-05, "loss": 0.5536, "step": 4739 }, { "epoch": 0.39, "grad_norm": 0.9031494970498228, "learning_rate": 1.4081310007357673e-05, "loss": 0.5372, "step": 4740 }, { "epoch": 0.39, "grad_norm": 0.9312334703767707, "learning_rate": 1.4078906620315343e-05, "loss": 0.5603, "step": 4741 }, { "epoch": 0.39, "grad_norm": 0.8599402331636063, "learning_rate": 1.4076502950608397e-05, "loss": 0.544, "step": 4742 }, { "epoch": 0.39, "grad_norm": 0.8292929383194493, "learning_rate": 1.4074098998403414e-05, "loss": 0.4634, "step": 4743 }, { "epoch": 0.39, "grad_norm": 0.815490200853265, "learning_rate": 1.4071694763866988e-05, "loss": 0.538, "step": 4744 }, { "epoch": 0.39, "grad_norm": 0.894925411565493, "learning_rate": 1.406929024716572e-05, "loss": 0.6092, "step": 4745 }, { "epoch": 0.39, "grad_norm": 0.963522106511423, "learning_rate": 1.4066885448466252e-05, "loss": 0.5206, "step": 4746 }, { "epoch": 0.39, "grad_norm": 0.8876060513326824, "learning_rate": 1.406448036793523e-05, "loss": 0.5827, "step": 4747 }, { "epoch": 0.39, "grad_norm": 0.9248326157942703, "learning_rate": 1.406207500573932e-05, "loss": 0.5604, "step": 4748 }, { "epoch": 0.39, "grad_norm": 0.8549469819980957, "learning_rate": 1.4059669362045216e-05, "loss": 0.5983, "step": 4749 }, { "epoch": 0.39, "grad_norm": 0.9122826272105434, "learning_rate": 1.4057263437019631e-05, "loss": 0.5094, "step": 4750 }, { "epoch": 0.39, "grad_norm": 0.841912287988438, "learning_rate": 1.4054857230829284e-05, "loss": 0.526, "step": 4751 }, { "epoch": 0.39, "grad_norm": 0.8528346965856147, "learning_rate": 1.4052450743640926e-05, "loss": 0.5588, "step": 4752 }, { "epoch": 0.39, "grad_norm": 0.9629513938454854, "learning_rate": 1.4050043975621328e-05, "loss": 0.5635, "step": 4753 }, { "epoch": 0.39, "grad_norm": 0.9684907374115393, "learning_rate": 1.4047636926937278e-05, "loss": 0.5735, "step": 4754 }, { "epoch": 0.39, "grad_norm": 0.9225498384798198, "learning_rate": 1.4045229597755574e-05, "loss": 0.5369, "step": 4755 }, { "epoch": 0.39, "grad_norm": 0.8387551385225303, "learning_rate": 1.404282198824305e-05, "loss": 0.5656, "step": 4756 }, { "epoch": 0.39, "grad_norm": 0.9722381067509114, "learning_rate": 1.4040414098566548e-05, "loss": 0.6086, "step": 4757 }, { "epoch": 0.39, "grad_norm": 0.8585789480518193, "learning_rate": 1.4038005928892932e-05, "loss": 0.5008, "step": 4758 }, { "epoch": 0.39, "grad_norm": 0.9809877521334563, "learning_rate": 1.4035597479389088e-05, "loss": 0.5988, "step": 4759 }, { "epoch": 0.39, "grad_norm": 0.9476898779708328, "learning_rate": 1.4033188750221918e-05, "loss": 0.595, "step": 4760 }, { "epoch": 0.39, "grad_norm": 0.9190154556053546, "learning_rate": 1.4030779741558345e-05, "loss": 0.5272, "step": 4761 }, { "epoch": 0.39, "grad_norm": 0.8986826250041086, "learning_rate": 1.402837045356531e-05, "loss": 0.6074, "step": 4762 }, { "epoch": 0.39, "grad_norm": 0.9669448931268747, "learning_rate": 1.4025960886409777e-05, "loss": 0.5329, "step": 4763 }, { "epoch": 0.39, "grad_norm": 0.8597820710103147, "learning_rate": 1.4023551040258726e-05, "loss": 0.5572, "step": 4764 }, { "epoch": 0.39, "grad_norm": 0.9632057604242157, "learning_rate": 1.4021140915279157e-05, "loss": 0.5526, "step": 4765 }, { "epoch": 0.39, "grad_norm": 0.8111326793433378, "learning_rate": 1.4018730511638087e-05, "loss": 0.4572, "step": 4766 }, { "epoch": 0.39, "grad_norm": 0.8599006225258523, "learning_rate": 1.4016319829502559e-05, "loss": 0.5486, "step": 4767 }, { "epoch": 0.39, "grad_norm": 0.9054438326651731, "learning_rate": 1.4013908869039627e-05, "loss": 0.585, "step": 4768 }, { "epoch": 0.39, "grad_norm": 0.815284889579051, "learning_rate": 1.4011497630416375e-05, "loss": 0.5256, "step": 4769 }, { "epoch": 0.39, "grad_norm": 1.0681117562803168, "learning_rate": 1.4009086113799892e-05, "loss": 0.5563, "step": 4770 }, { "epoch": 0.39, "grad_norm": 0.9004269746499685, "learning_rate": 1.4006674319357298e-05, "loss": 0.5582, "step": 4771 }, { "epoch": 0.39, "grad_norm": 0.9434517262368302, "learning_rate": 1.400426224725573e-05, "loss": 0.5648, "step": 4772 }, { "epoch": 0.39, "grad_norm": 0.8505463208839589, "learning_rate": 1.4001849897662337e-05, "loss": 0.5259, "step": 4773 }, { "epoch": 0.39, "grad_norm": 0.8630515762899552, "learning_rate": 1.3999437270744296e-05, "loss": 0.5718, "step": 4774 }, { "epoch": 0.39, "grad_norm": 0.9791660948704696, "learning_rate": 1.3997024366668802e-05, "loss": 0.6139, "step": 4775 }, { "epoch": 0.39, "grad_norm": 0.9069769270170313, "learning_rate": 1.3994611185603062e-05, "loss": 0.5569, "step": 4776 }, { "epoch": 0.39, "grad_norm": 0.9980723952054046, "learning_rate": 1.399219772771431e-05, "loss": 0.5669, "step": 4777 }, { "epoch": 0.39, "grad_norm": 0.8644689973208419, "learning_rate": 1.3989783993169798e-05, "loss": 0.536, "step": 4778 }, { "epoch": 0.39, "grad_norm": 0.879292682310879, "learning_rate": 1.3987369982136794e-05, "loss": 0.5866, "step": 4779 }, { "epoch": 0.39, "grad_norm": 0.9060036514914707, "learning_rate": 1.3984955694782584e-05, "loss": 0.5524, "step": 4780 }, { "epoch": 0.39, "grad_norm": 0.8978917858742707, "learning_rate": 1.3982541131274485e-05, "loss": 0.5106, "step": 4781 }, { "epoch": 0.39, "grad_norm": 0.8626340658742684, "learning_rate": 1.3980126291779814e-05, "loss": 0.5919, "step": 4782 }, { "epoch": 0.39, "grad_norm": 0.8853907390891486, "learning_rate": 1.3977711176465923e-05, "loss": 0.5066, "step": 4783 }, { "epoch": 0.39, "grad_norm": 0.9719894539559228, "learning_rate": 1.3975295785500176e-05, "loss": 0.6066, "step": 4784 }, { "epoch": 0.39, "grad_norm": 0.8420668650366185, "learning_rate": 1.3972880119049954e-05, "loss": 0.5569, "step": 4785 }, { "epoch": 0.39, "grad_norm": 1.0819288026025051, "learning_rate": 1.3970464177282665e-05, "loss": 0.6061, "step": 4786 }, { "epoch": 0.39, "grad_norm": 0.8661264684608802, "learning_rate": 1.3968047960365733e-05, "loss": 0.529, "step": 4787 }, { "epoch": 0.39, "grad_norm": 0.9256001465669449, "learning_rate": 1.3965631468466593e-05, "loss": 0.5234, "step": 4788 }, { "epoch": 0.39, "grad_norm": 0.9031084374098787, "learning_rate": 1.3963214701752714e-05, "loss": 0.5153, "step": 4789 }, { "epoch": 0.39, "grad_norm": 0.9398534523407612, "learning_rate": 1.396079766039157e-05, "loss": 0.5374, "step": 4790 }, { "epoch": 0.39, "grad_norm": 0.9908903392902311, "learning_rate": 1.3958380344550659e-05, "loss": 0.5367, "step": 4791 }, { "epoch": 0.39, "grad_norm": 0.953551175148032, "learning_rate": 1.3955962754397505e-05, "loss": 0.4969, "step": 4792 }, { "epoch": 0.39, "grad_norm": 0.908420529771096, "learning_rate": 1.395354489009964e-05, "loss": 0.5562, "step": 4793 }, { "epoch": 0.39, "grad_norm": 0.8407068398016209, "learning_rate": 1.3951126751824618e-05, "loss": 0.4925, "step": 4794 }, { "epoch": 0.39, "grad_norm": 1.0293422320380614, "learning_rate": 1.3948708339740019e-05, "loss": 0.5454, "step": 4795 }, { "epoch": 0.39, "grad_norm": 0.9554664617714606, "learning_rate": 1.3946289654013435e-05, "loss": 0.6564, "step": 4796 }, { "epoch": 0.39, "grad_norm": 1.0877753621417259, "learning_rate": 1.3943870694812475e-05, "loss": 0.5433, "step": 4797 }, { "epoch": 0.39, "grad_norm": 0.8856524373788587, "learning_rate": 1.3941451462304778e-05, "loss": 0.553, "step": 4798 }, { "epoch": 0.39, "grad_norm": 0.9286230008806331, "learning_rate": 1.393903195665799e-05, "loss": 0.5859, "step": 4799 }, { "epoch": 0.39, "grad_norm": 0.9510367799250085, "learning_rate": 1.393661217803978e-05, "loss": 0.5666, "step": 4800 }, { "epoch": 0.39, "grad_norm": 0.9448143716639735, "learning_rate": 1.3934192126617838e-05, "loss": 0.5982, "step": 4801 }, { "epoch": 0.39, "grad_norm": 0.9379935538051243, "learning_rate": 1.3931771802559875e-05, "loss": 0.5737, "step": 4802 }, { "epoch": 0.39, "grad_norm": 0.7776618529565054, "learning_rate": 1.3929351206033607e-05, "loss": 0.4976, "step": 4803 }, { "epoch": 0.39, "grad_norm": 0.8568590847033114, "learning_rate": 1.392693033720679e-05, "loss": 0.4904, "step": 4804 }, { "epoch": 0.39, "grad_norm": 0.8834314638024736, "learning_rate": 1.3924509196247185e-05, "loss": 0.579, "step": 4805 }, { "epoch": 0.39, "grad_norm": 0.8940184195213572, "learning_rate": 1.392208778332257e-05, "loss": 0.6216, "step": 4806 }, { "epoch": 0.39, "grad_norm": 0.9112868877658168, "learning_rate": 1.3919666098600753e-05, "loss": 0.5378, "step": 4807 }, { "epoch": 0.39, "grad_norm": 0.9216500150943909, "learning_rate": 1.3917244142249551e-05, "loss": 0.5763, "step": 4808 }, { "epoch": 0.39, "grad_norm": 0.893654843861322, "learning_rate": 1.3914821914436805e-05, "loss": 0.5533, "step": 4809 }, { "epoch": 0.39, "grad_norm": 0.8614911309355482, "learning_rate": 1.3912399415330371e-05, "loss": 0.5161, "step": 4810 }, { "epoch": 0.39, "grad_norm": 0.9230499012788027, "learning_rate": 1.3909976645098131e-05, "loss": 0.5516, "step": 4811 }, { "epoch": 0.39, "grad_norm": 0.9496706748160366, "learning_rate": 1.3907553603907974e-05, "loss": 0.5344, "step": 4812 }, { "epoch": 0.39, "grad_norm": 0.8650379902912981, "learning_rate": 1.3905130291927822e-05, "loss": 0.5882, "step": 4813 }, { "epoch": 0.39, "grad_norm": 0.998972087470307, "learning_rate": 1.3902706709325603e-05, "loss": 0.5186, "step": 4814 }, { "epoch": 0.39, "grad_norm": 0.8508144263902477, "learning_rate": 1.3900282856269271e-05, "loss": 0.5418, "step": 4815 }, { "epoch": 0.39, "grad_norm": 1.0025157638908624, "learning_rate": 1.3897858732926794e-05, "loss": 0.5818, "step": 4816 }, { "epoch": 0.39, "grad_norm": 1.0516850697205595, "learning_rate": 1.3895434339466167e-05, "loss": 0.6107, "step": 4817 }, { "epoch": 0.39, "grad_norm": 0.9658348227769601, "learning_rate": 1.3893009676055395e-05, "loss": 0.6161, "step": 4818 }, { "epoch": 0.39, "grad_norm": 1.0289714416989588, "learning_rate": 1.3890584742862508e-05, "loss": 0.5631, "step": 4819 }, { "epoch": 0.39, "grad_norm": 0.9460224670710317, "learning_rate": 1.3888159540055544e-05, "loss": 0.5635, "step": 4820 }, { "epoch": 0.39, "grad_norm": 0.9800985935542305, "learning_rate": 1.3885734067802576e-05, "loss": 0.5972, "step": 4821 }, { "epoch": 0.39, "grad_norm": 0.9354319412514424, "learning_rate": 1.3883308326271682e-05, "loss": 0.546, "step": 4822 }, { "epoch": 0.39, "grad_norm": 0.9247901332190117, "learning_rate": 1.3880882315630968e-05, "loss": 0.6057, "step": 4823 }, { "epoch": 0.39, "grad_norm": 0.8687178724760034, "learning_rate": 1.387845603604855e-05, "loss": 0.5477, "step": 4824 }, { "epoch": 0.39, "grad_norm": 0.9721356384986916, "learning_rate": 1.387602948769257e-05, "loss": 0.5753, "step": 4825 }, { "epoch": 0.39, "grad_norm": 0.9264169462238878, "learning_rate": 1.3873602670731184e-05, "loss": 0.5709, "step": 4826 }, { "epoch": 0.39, "grad_norm": 0.8798247430588303, "learning_rate": 1.387117558533257e-05, "loss": 0.5449, "step": 4827 }, { "epoch": 0.39, "grad_norm": 1.027001023657605, "learning_rate": 1.3868748231664918e-05, "loss": 0.5988, "step": 4828 }, { "epoch": 0.39, "grad_norm": 0.8553348754174525, "learning_rate": 1.3866320609896449e-05, "loss": 0.5224, "step": 4829 }, { "epoch": 0.39, "grad_norm": 0.9102274530812277, "learning_rate": 1.3863892720195389e-05, "loss": 0.6143, "step": 4830 }, { "epoch": 0.39, "grad_norm": 0.8638981385026545, "learning_rate": 1.3861464562729992e-05, "loss": 0.4728, "step": 4831 }, { "epoch": 0.39, "grad_norm": 0.8269196063569016, "learning_rate": 1.3859036137668525e-05, "loss": 0.5418, "step": 4832 }, { "epoch": 0.39, "grad_norm": 1.0036783596030001, "learning_rate": 1.3856607445179278e-05, "loss": 0.6036, "step": 4833 }, { "epoch": 0.39, "grad_norm": 0.9065362519579458, "learning_rate": 1.3854178485430554e-05, "loss": 0.5078, "step": 4834 }, { "epoch": 0.39, "grad_norm": 1.048824179044158, "learning_rate": 1.3851749258590679e-05, "loss": 0.6005, "step": 4835 }, { "epoch": 0.39, "grad_norm": 0.8302691187789433, "learning_rate": 1.3849319764828e-05, "loss": 0.4975, "step": 4836 }, { "epoch": 0.39, "grad_norm": 0.9031582569220298, "learning_rate": 1.3846890004310873e-05, "loss": 0.5631, "step": 4837 }, { "epoch": 0.39, "grad_norm": 0.9177195645049511, "learning_rate": 1.3844459977207683e-05, "loss": 0.5383, "step": 4838 }, { "epoch": 0.39, "grad_norm": 0.949269225994463, "learning_rate": 1.3842029683686826e-05, "loss": 0.5748, "step": 4839 }, { "epoch": 0.39, "grad_norm": 0.937659934137604, "learning_rate": 1.3839599123916718e-05, "loss": 0.5762, "step": 4840 }, { "epoch": 0.39, "grad_norm": 0.8996926326469921, "learning_rate": 1.3837168298065798e-05, "loss": 0.61, "step": 4841 }, { "epoch": 0.39, "grad_norm": 0.94884621874138, "learning_rate": 1.3834737206302519e-05, "loss": 0.6057, "step": 4842 }, { "epoch": 0.39, "grad_norm": 0.923036476443756, "learning_rate": 1.3832305848795352e-05, "loss": 0.5301, "step": 4843 }, { "epoch": 0.39, "grad_norm": 0.8541275293194943, "learning_rate": 1.382987422571279e-05, "loss": 0.5602, "step": 4844 }, { "epoch": 0.39, "grad_norm": 0.9501069778376648, "learning_rate": 1.382744233722334e-05, "loss": 0.5255, "step": 4845 }, { "epoch": 0.39, "grad_norm": 0.9630480388424695, "learning_rate": 1.382501018349553e-05, "loss": 0.5897, "step": 4846 }, { "epoch": 0.39, "grad_norm": 0.8762902360405572, "learning_rate": 1.3822577764697908e-05, "loss": 0.5017, "step": 4847 }, { "epoch": 0.39, "grad_norm": 1.043217051840068, "learning_rate": 1.3820145080999038e-05, "loss": 0.575, "step": 4848 }, { "epoch": 0.39, "grad_norm": 0.9439300127928049, "learning_rate": 1.3817712132567503e-05, "loss": 0.5904, "step": 4849 }, { "epoch": 0.39, "grad_norm": 0.8881934762338176, "learning_rate": 1.3815278919571901e-05, "loss": 0.5236, "step": 4850 }, { "epoch": 0.39, "grad_norm": 0.9482363818913019, "learning_rate": 1.3812845442180857e-05, "loss": 0.6188, "step": 4851 }, { "epoch": 0.39, "grad_norm": 1.0714832359599773, "learning_rate": 1.3810411700563005e-05, "loss": 0.5725, "step": 4852 }, { "epoch": 0.39, "grad_norm": 0.9972613620841716, "learning_rate": 1.3807977694887003e-05, "loss": 0.6182, "step": 4853 }, { "epoch": 0.39, "grad_norm": 0.9624229426930314, "learning_rate": 1.3805543425321524e-05, "loss": 0.5063, "step": 4854 }, { "epoch": 0.39, "grad_norm": 0.8981685605321771, "learning_rate": 1.3803108892035259e-05, "loss": 0.522, "step": 4855 }, { "epoch": 0.39, "grad_norm": 0.8918230664294677, "learning_rate": 1.3800674095196922e-05, "loss": 0.5192, "step": 4856 }, { "epoch": 0.39, "grad_norm": 1.021530752522361, "learning_rate": 1.3798239034975243e-05, "loss": 0.5427, "step": 4857 }, { "epoch": 0.39, "grad_norm": 0.8679200195770674, "learning_rate": 1.3795803711538966e-05, "loss": 0.5067, "step": 4858 }, { "epoch": 0.39, "grad_norm": 1.0076351564977803, "learning_rate": 1.3793368125056859e-05, "loss": 0.6147, "step": 4859 }, { "epoch": 0.39, "grad_norm": 0.9477492129643318, "learning_rate": 1.3790932275697708e-05, "loss": 0.5777, "step": 4860 }, { "epoch": 0.4, "grad_norm": 0.9912531451536267, "learning_rate": 1.378849616363031e-05, "loss": 0.5575, "step": 4861 }, { "epoch": 0.4, "grad_norm": 1.008150805432256, "learning_rate": 1.3786059789023487e-05, "loss": 0.5991, "step": 4862 }, { "epoch": 0.4, "grad_norm": 0.967448144120769, "learning_rate": 1.3783623152046084e-05, "loss": 0.5237, "step": 4863 }, { "epoch": 0.4, "grad_norm": 1.0518336967776385, "learning_rate": 1.3781186252866948e-05, "loss": 0.5971, "step": 4864 }, { "epoch": 0.4, "grad_norm": 0.9082645655876715, "learning_rate": 1.377874909165496e-05, "loss": 0.5647, "step": 4865 }, { "epoch": 0.4, "grad_norm": 0.9465929099003216, "learning_rate": 1.3776311668579012e-05, "loss": 0.5546, "step": 4866 }, { "epoch": 0.4, "grad_norm": 0.8694075318857712, "learning_rate": 1.3773873983808014e-05, "loss": 0.5255, "step": 4867 }, { "epoch": 0.4, "grad_norm": 0.9269426482677092, "learning_rate": 1.3771436037510897e-05, "loss": 0.5775, "step": 4868 }, { "epoch": 0.4, "grad_norm": 0.9833980689058247, "learning_rate": 1.3768997829856608e-05, "loss": 0.5893, "step": 4869 }, { "epoch": 0.4, "grad_norm": 0.9336236085410238, "learning_rate": 1.3766559361014113e-05, "loss": 0.5903, "step": 4870 }, { "epoch": 0.4, "grad_norm": 0.8591684244273873, "learning_rate": 1.3764120631152395e-05, "loss": 0.5636, "step": 4871 }, { "epoch": 0.4, "grad_norm": 0.896987510351119, "learning_rate": 1.3761681640440455e-05, "loss": 0.5898, "step": 4872 }, { "epoch": 0.4, "grad_norm": 0.9260583363210217, "learning_rate": 1.3759242389047315e-05, "loss": 0.5162, "step": 4873 }, { "epoch": 0.4, "grad_norm": 0.9281921574947163, "learning_rate": 1.375680287714201e-05, "loss": 0.6638, "step": 4874 }, { "epoch": 0.4, "grad_norm": 0.9548686164779019, "learning_rate": 1.37543631048936e-05, "loss": 0.5717, "step": 4875 }, { "epoch": 0.4, "grad_norm": 0.939319463753251, "learning_rate": 1.3751923072471159e-05, "loss": 0.6109, "step": 4876 }, { "epoch": 0.4, "grad_norm": 0.9370066407879878, "learning_rate": 1.3749482780043773e-05, "loss": 0.4715, "step": 4877 }, { "epoch": 0.4, "grad_norm": 0.9853199068327506, "learning_rate": 1.3747042227780557e-05, "loss": 0.5326, "step": 4878 }, { "epoch": 0.4, "grad_norm": 0.9239784176479834, "learning_rate": 1.3744601415850637e-05, "loss": 0.5714, "step": 4879 }, { "epoch": 0.4, "grad_norm": 0.910935042749798, "learning_rate": 1.3742160344423164e-05, "loss": 0.5666, "step": 4880 }, { "epoch": 0.4, "grad_norm": 0.9811225149068671, "learning_rate": 1.3739719013667297e-05, "loss": 0.6076, "step": 4881 }, { "epoch": 0.4, "grad_norm": 0.9149100233787881, "learning_rate": 1.3737277423752218e-05, "loss": 0.5563, "step": 4882 }, { "epoch": 0.4, "grad_norm": 0.9053166838456309, "learning_rate": 1.373483557484713e-05, "loss": 0.4929, "step": 4883 }, { "epoch": 0.4, "grad_norm": 0.7872734502680776, "learning_rate": 1.3732393467121247e-05, "loss": 0.4891, "step": 4884 }, { "epoch": 0.4, "grad_norm": 0.8742286431117344, "learning_rate": 1.372995110074381e-05, "loss": 0.5329, "step": 4885 }, { "epoch": 0.4, "grad_norm": 0.8672043564902082, "learning_rate": 1.3727508475884071e-05, "loss": 0.5895, "step": 4886 }, { "epoch": 0.4, "grad_norm": 0.8414643393353751, "learning_rate": 1.3725065592711299e-05, "loss": 0.4682, "step": 4887 }, { "epoch": 0.4, "grad_norm": 0.8875840210510944, "learning_rate": 1.3722622451394784e-05, "loss": 0.6033, "step": 4888 }, { "epoch": 0.4, "grad_norm": 0.8779128007665025, "learning_rate": 1.3720179052103836e-05, "loss": 0.5105, "step": 4889 }, { "epoch": 0.4, "grad_norm": 0.9046433815894597, "learning_rate": 1.3717735395007786e-05, "loss": 0.4746, "step": 4890 }, { "epoch": 0.4, "grad_norm": 0.9050851117821455, "learning_rate": 1.3715291480275963e-05, "loss": 0.5518, "step": 4891 }, { "epoch": 0.4, "grad_norm": 0.9681695345357252, "learning_rate": 1.3712847308077737e-05, "loss": 0.5641, "step": 4892 }, { "epoch": 0.4, "grad_norm": 0.9480998717859649, "learning_rate": 1.3710402878582487e-05, "loss": 0.5638, "step": 4893 }, { "epoch": 0.4, "grad_norm": 1.010665403165591, "learning_rate": 1.3707958191959609e-05, "loss": 0.5386, "step": 4894 }, { "epoch": 0.4, "grad_norm": 0.9721875042908399, "learning_rate": 1.3705513248378517e-05, "loss": 0.605, "step": 4895 }, { "epoch": 0.4, "grad_norm": 0.8790064569896765, "learning_rate": 1.3703068048008645e-05, "loss": 0.4673, "step": 4896 }, { "epoch": 0.4, "grad_norm": 0.8314633413004586, "learning_rate": 1.3700622591019439e-05, "loss": 0.5181, "step": 4897 }, { "epoch": 0.4, "grad_norm": 0.8243806242658588, "learning_rate": 1.3698176877580372e-05, "loss": 0.5087, "step": 4898 }, { "epoch": 0.4, "grad_norm": 0.889784596263977, "learning_rate": 1.3695730907860925e-05, "loss": 0.5393, "step": 4899 }, { "epoch": 0.4, "grad_norm": 0.9543436360355445, "learning_rate": 1.3693284682030608e-05, "loss": 0.5735, "step": 4900 }, { "epoch": 0.4, "grad_norm": 0.8393976604033532, "learning_rate": 1.3690838200258936e-05, "loss": 0.5944, "step": 4901 }, { "epoch": 0.4, "grad_norm": 1.0040665844972214, "learning_rate": 1.368839146271545e-05, "loss": 0.6325, "step": 4902 }, { "epoch": 0.4, "grad_norm": 0.8661069998555928, "learning_rate": 1.368594446956971e-05, "loss": 0.549, "step": 4903 }, { "epoch": 0.4, "grad_norm": 0.980762907582011, "learning_rate": 1.3683497220991286e-05, "loss": 0.5668, "step": 4904 }, { "epoch": 0.4, "grad_norm": 0.8303767586905748, "learning_rate": 1.3681049717149773e-05, "loss": 0.5261, "step": 4905 }, { "epoch": 0.4, "grad_norm": 1.0368179040107803, "learning_rate": 1.3678601958214779e-05, "loss": 0.527, "step": 4906 }, { "epoch": 0.4, "grad_norm": 0.9249813464997794, "learning_rate": 1.367615394435593e-05, "loss": 0.6134, "step": 4907 }, { "epoch": 0.4, "grad_norm": 0.8358321020913861, "learning_rate": 1.3673705675742875e-05, "loss": 0.5125, "step": 4908 }, { "epoch": 0.4, "grad_norm": 1.0496005259301522, "learning_rate": 1.3671257152545277e-05, "loss": 0.6041, "step": 4909 }, { "epoch": 0.4, "grad_norm": 0.946126940263957, "learning_rate": 1.3668808374932812e-05, "loss": 0.572, "step": 4910 }, { "epoch": 0.4, "grad_norm": 0.848440255461486, "learning_rate": 1.3666359343075182e-05, "loss": 0.5319, "step": 4911 }, { "epoch": 0.4, "grad_norm": 0.8872846916857009, "learning_rate": 1.3663910057142102e-05, "loss": 0.5519, "step": 4912 }, { "epoch": 0.4, "grad_norm": 1.0799104995159567, "learning_rate": 1.3661460517303304e-05, "loss": 0.6399, "step": 4913 }, { "epoch": 0.4, "grad_norm": 0.8940584447880352, "learning_rate": 1.3659010723728542e-05, "loss": 0.5479, "step": 4914 }, { "epoch": 0.4, "grad_norm": 0.9056599511334902, "learning_rate": 1.3656560676587583e-05, "loss": 0.623, "step": 4915 }, { "epoch": 0.4, "grad_norm": 0.8597632452723915, "learning_rate": 1.3654110376050209e-05, "loss": 0.5268, "step": 4916 }, { "epoch": 0.4, "grad_norm": 0.9576599634253601, "learning_rate": 1.3651659822286227e-05, "loss": 0.5411, "step": 4917 }, { "epoch": 0.4, "grad_norm": 0.9237904664858961, "learning_rate": 1.364920901546546e-05, "loss": 0.5251, "step": 4918 }, { "epoch": 0.4, "grad_norm": 0.8414244460916807, "learning_rate": 1.3646757955757746e-05, "loss": 0.533, "step": 4919 }, { "epoch": 0.4, "grad_norm": 0.9113629315058073, "learning_rate": 1.3644306643332939e-05, "loss": 0.5311, "step": 4920 }, { "epoch": 0.4, "grad_norm": 0.903990767127771, "learning_rate": 1.3641855078360914e-05, "loss": 0.5323, "step": 4921 }, { "epoch": 0.4, "grad_norm": 0.9785840692406184, "learning_rate": 1.3639403261011563e-05, "loss": 0.5502, "step": 4922 }, { "epoch": 0.4, "grad_norm": 0.9387542745252239, "learning_rate": 1.3636951191454792e-05, "loss": 0.5561, "step": 4923 }, { "epoch": 0.4, "grad_norm": 0.899595616734336, "learning_rate": 1.3634498869860533e-05, "loss": 0.4931, "step": 4924 }, { "epoch": 0.4, "grad_norm": 0.8787075359867942, "learning_rate": 1.3632046296398724e-05, "loss": 0.5138, "step": 4925 }, { "epoch": 0.4, "grad_norm": 0.8436660881527684, "learning_rate": 1.3629593471239328e-05, "loss": 0.5069, "step": 4926 }, { "epoch": 0.4, "grad_norm": 0.9229983052966406, "learning_rate": 1.3627140394552326e-05, "loss": 0.5417, "step": 4927 }, { "epoch": 0.4, "grad_norm": 0.9687480936383686, "learning_rate": 1.3624687066507709e-05, "loss": 0.6037, "step": 4928 }, { "epoch": 0.4, "grad_norm": 0.862557622655597, "learning_rate": 1.3622233487275493e-05, "loss": 0.5621, "step": 4929 }, { "epoch": 0.4, "grad_norm": 0.8746701383679512, "learning_rate": 1.3619779657025714e-05, "loss": 0.574, "step": 4930 }, { "epoch": 0.4, "grad_norm": 0.9638263263332979, "learning_rate": 1.3617325575928414e-05, "loss": 0.5793, "step": 4931 }, { "epoch": 0.4, "grad_norm": 0.8753934118629156, "learning_rate": 1.3614871244153655e-05, "loss": 0.506, "step": 4932 }, { "epoch": 0.4, "grad_norm": 0.9135589434822865, "learning_rate": 1.3612416661871532e-05, "loss": 0.5127, "step": 4933 }, { "epoch": 0.4, "grad_norm": 0.9604594234952577, "learning_rate": 1.3609961829252133e-05, "loss": 0.5363, "step": 4934 }, { "epoch": 0.4, "grad_norm": 1.028473474567092, "learning_rate": 1.3607506746465584e-05, "loss": 0.5779, "step": 4935 }, { "epoch": 0.4, "grad_norm": 0.902538042974246, "learning_rate": 1.360505141368202e-05, "loss": 0.5267, "step": 4936 }, { "epoch": 0.4, "grad_norm": 0.8992938909760883, "learning_rate": 1.3602595831071586e-05, "loss": 0.5842, "step": 4937 }, { "epoch": 0.4, "grad_norm": 0.8780589847799097, "learning_rate": 1.3600139998804459e-05, "loss": 0.5513, "step": 4938 }, { "epoch": 0.4, "grad_norm": 0.9142510765861755, "learning_rate": 1.359768391705082e-05, "loss": 0.5406, "step": 4939 }, { "epoch": 0.4, "grad_norm": 0.9567959566085759, "learning_rate": 1.3595227585980881e-05, "loss": 0.5539, "step": 4940 }, { "epoch": 0.4, "grad_norm": 0.8807388652451061, "learning_rate": 1.3592771005764857e-05, "loss": 0.5587, "step": 4941 }, { "epoch": 0.4, "grad_norm": 0.8651113785797417, "learning_rate": 1.3590314176572989e-05, "loss": 0.5546, "step": 4942 }, { "epoch": 0.4, "grad_norm": 0.8171458610105073, "learning_rate": 1.3587857098575534e-05, "loss": 0.5267, "step": 4943 }, { "epoch": 0.4, "grad_norm": 1.0893608287859775, "learning_rate": 1.3585399771942764e-05, "loss": 0.5818, "step": 4944 }, { "epoch": 0.4, "grad_norm": 0.9605917712592748, "learning_rate": 1.358294219684497e-05, "loss": 0.5289, "step": 4945 }, { "epoch": 0.4, "grad_norm": 0.7972262798819203, "learning_rate": 1.3580484373452462e-05, "loss": 0.5292, "step": 4946 }, { "epoch": 0.4, "grad_norm": 0.9419043795441053, "learning_rate": 1.357802630193556e-05, "loss": 0.5731, "step": 4947 }, { "epoch": 0.4, "grad_norm": 0.8941534978079186, "learning_rate": 1.357556798246461e-05, "loss": 0.51, "step": 4948 }, { "epoch": 0.4, "grad_norm": 0.9127635202406887, "learning_rate": 1.357310941520997e-05, "loss": 0.5553, "step": 4949 }, { "epoch": 0.4, "grad_norm": 0.9002276388779009, "learning_rate": 1.3570650600342017e-05, "loss": 0.5921, "step": 4950 }, { "epoch": 0.4, "grad_norm": 0.8717763667596604, "learning_rate": 1.3568191538031146e-05, "loss": 0.5518, "step": 4951 }, { "epoch": 0.4, "grad_norm": 0.8191098322900104, "learning_rate": 1.3565732228447766e-05, "loss": 0.4965, "step": 4952 }, { "epoch": 0.4, "grad_norm": 0.9373761418635452, "learning_rate": 1.3563272671762304e-05, "loss": 0.5527, "step": 4953 }, { "epoch": 0.4, "grad_norm": 0.9116380184176788, "learning_rate": 1.3560812868145206e-05, "loss": 0.5502, "step": 4954 }, { "epoch": 0.4, "grad_norm": 0.9782949235050161, "learning_rate": 1.3558352817766935e-05, "loss": 0.5888, "step": 4955 }, { "epoch": 0.4, "grad_norm": 0.8729097488069214, "learning_rate": 1.355589252079797e-05, "loss": 0.585, "step": 4956 }, { "epoch": 0.4, "grad_norm": 0.92953206689005, "learning_rate": 1.3553431977408809e-05, "loss": 0.6116, "step": 4957 }, { "epoch": 0.4, "grad_norm": 0.9161281687717558, "learning_rate": 1.3550971187769964e-05, "loss": 0.582, "step": 4958 }, { "epoch": 0.4, "grad_norm": 0.9544317740681652, "learning_rate": 1.3548510152051963e-05, "loss": 0.5308, "step": 4959 }, { "epoch": 0.4, "grad_norm": 0.9300834006161792, "learning_rate": 1.3546048870425356e-05, "loss": 0.5908, "step": 4960 }, { "epoch": 0.4, "grad_norm": 0.8682564450672268, "learning_rate": 1.3543587343060712e-05, "loss": 0.5659, "step": 4961 }, { "epoch": 0.4, "grad_norm": 0.8507907896472288, "learning_rate": 1.3541125570128603e-05, "loss": 0.5334, "step": 4962 }, { "epoch": 0.4, "grad_norm": 0.8841622079095046, "learning_rate": 1.3538663551799636e-05, "loss": 0.6271, "step": 4963 }, { "epoch": 0.4, "grad_norm": 0.8433659287109949, "learning_rate": 1.3536201288244425e-05, "loss": 0.5682, "step": 4964 }, { "epoch": 0.4, "grad_norm": 0.9244820516318176, "learning_rate": 1.3533738779633597e-05, "loss": 0.5897, "step": 4965 }, { "epoch": 0.4, "grad_norm": 0.7981635507567023, "learning_rate": 1.3531276026137807e-05, "loss": 0.4975, "step": 4966 }, { "epoch": 0.4, "grad_norm": 1.234573810119623, "learning_rate": 1.3528813027927724e-05, "loss": 0.56, "step": 4967 }, { "epoch": 0.4, "grad_norm": 0.9720942100727602, "learning_rate": 1.3526349785174025e-05, "loss": 0.5927, "step": 4968 }, { "epoch": 0.4, "grad_norm": 0.9249900333619757, "learning_rate": 1.3523886298047412e-05, "loss": 0.5049, "step": 4969 }, { "epoch": 0.4, "grad_norm": 0.8817644529264543, "learning_rate": 1.3521422566718609e-05, "loss": 0.5317, "step": 4970 }, { "epoch": 0.4, "grad_norm": 0.8806141034443261, "learning_rate": 1.3518958591358345e-05, "loss": 0.4908, "step": 4971 }, { "epoch": 0.4, "grad_norm": 0.9637460271329971, "learning_rate": 1.3516494372137368e-05, "loss": 0.5397, "step": 4972 }, { "epoch": 0.4, "grad_norm": 0.9061409021519361, "learning_rate": 1.3514029909226454e-05, "loss": 0.5826, "step": 4973 }, { "epoch": 0.4, "grad_norm": 0.9483299745746715, "learning_rate": 1.3511565202796381e-05, "loss": 0.5759, "step": 4974 }, { "epoch": 0.4, "grad_norm": 0.9451773146461331, "learning_rate": 1.3509100253017958e-05, "loss": 0.5814, "step": 4975 }, { "epoch": 0.4, "grad_norm": 0.8567772541508121, "learning_rate": 1.3506635060062e-05, "loss": 0.5104, "step": 4976 }, { "epoch": 0.4, "grad_norm": 0.8383479350018068, "learning_rate": 1.350416962409934e-05, "loss": 0.5064, "step": 4977 }, { "epoch": 0.4, "grad_norm": 0.8912113016113629, "learning_rate": 1.3501703945300832e-05, "loss": 0.5262, "step": 4978 }, { "epoch": 0.4, "grad_norm": 0.9870956538767125, "learning_rate": 1.349923802383735e-05, "loss": 0.6047, "step": 4979 }, { "epoch": 0.4, "grad_norm": 0.977475323915746, "learning_rate": 1.3496771859879774e-05, "loss": 0.5306, "step": 4980 }, { "epoch": 0.4, "grad_norm": 0.9128506647381485, "learning_rate": 1.349430545359901e-05, "loss": 0.5356, "step": 4981 }, { "epoch": 0.4, "grad_norm": 0.9323166542632595, "learning_rate": 1.349183880516598e-05, "loss": 0.5505, "step": 4982 }, { "epoch": 0.4, "grad_norm": 0.9276052946168218, "learning_rate": 1.3489371914751616e-05, "loss": 0.5412, "step": 4983 }, { "epoch": 0.41, "grad_norm": 0.9888331579532674, "learning_rate": 1.3486904782526876e-05, "loss": 0.5327, "step": 4984 }, { "epoch": 0.41, "grad_norm": 0.8900191506817302, "learning_rate": 1.3484437408662725e-05, "loss": 0.5338, "step": 4985 }, { "epoch": 0.41, "grad_norm": 0.8606918204173645, "learning_rate": 1.3481969793330151e-05, "loss": 0.5094, "step": 4986 }, { "epoch": 0.41, "grad_norm": 0.9423971372459556, "learning_rate": 1.3479501936700161e-05, "loss": 0.5849, "step": 4987 }, { "epoch": 0.41, "grad_norm": 0.9037119495482759, "learning_rate": 1.3477033838943774e-05, "loss": 0.5204, "step": 4988 }, { "epoch": 0.41, "grad_norm": 1.0464546656121712, "learning_rate": 1.3474565500232025e-05, "loss": 0.6279, "step": 4989 }, { "epoch": 0.41, "grad_norm": 1.0047842505728581, "learning_rate": 1.3472096920735966e-05, "loss": 0.6017, "step": 4990 }, { "epoch": 0.41, "grad_norm": 0.9207006697256435, "learning_rate": 1.3469628100626678e-05, "loss": 0.5511, "step": 4991 }, { "epoch": 0.41, "grad_norm": 0.9042762572590564, "learning_rate": 1.3467159040075233e-05, "loss": 0.5697, "step": 4992 }, { "epoch": 0.41, "grad_norm": 0.9258256902752167, "learning_rate": 1.3464689739252741e-05, "loss": 0.5919, "step": 4993 }, { "epoch": 0.41, "grad_norm": 0.8510889422046063, "learning_rate": 1.346222019833033e-05, "loss": 0.5575, "step": 4994 }, { "epoch": 0.41, "grad_norm": 0.923790834710621, "learning_rate": 1.3459750417479125e-05, "loss": 0.5767, "step": 4995 }, { "epoch": 0.41, "grad_norm": 0.9655024203049409, "learning_rate": 1.3457280396870285e-05, "loss": 0.5502, "step": 4996 }, { "epoch": 0.41, "grad_norm": 0.8978053659559004, "learning_rate": 1.3454810136674983e-05, "loss": 0.5429, "step": 4997 }, { "epoch": 0.41, "grad_norm": 0.9173149006811787, "learning_rate": 1.34523396370644e-05, "loss": 0.5388, "step": 4998 }, { "epoch": 0.41, "grad_norm": 0.8803052233081673, "learning_rate": 1.3449868898209743e-05, "loss": 0.5308, "step": 4999 }, { "epoch": 0.41, "grad_norm": 0.9213276267952032, "learning_rate": 1.3447397920282232e-05, "loss": 0.5615, "step": 5000 }, { "epoch": 0.41, "grad_norm": 1.0679262404551646, "learning_rate": 1.3444926703453102e-05, "loss": 0.5619, "step": 5001 }, { "epoch": 0.41, "grad_norm": 0.8507200211182417, "learning_rate": 1.3442455247893608e-05, "loss": 0.5086, "step": 5002 }, { "epoch": 0.41, "grad_norm": 0.9397594122523698, "learning_rate": 1.3439983553775018e-05, "loss": 0.5929, "step": 5003 }, { "epoch": 0.41, "grad_norm": 0.8575371004987608, "learning_rate": 1.3437511621268622e-05, "loss": 0.4855, "step": 5004 }, { "epoch": 0.41, "grad_norm": 0.8893823279593794, "learning_rate": 1.3435039450545718e-05, "loss": 0.5046, "step": 5005 }, { "epoch": 0.41, "grad_norm": 0.9879627215267409, "learning_rate": 1.3432567041777624e-05, "loss": 0.5308, "step": 5006 }, { "epoch": 0.41, "grad_norm": 0.8223979075210116, "learning_rate": 1.3430094395135682e-05, "loss": 0.4693, "step": 5007 }, { "epoch": 0.41, "grad_norm": 1.0213441059322579, "learning_rate": 1.342762151079124e-05, "loss": 0.5125, "step": 5008 }, { "epoch": 0.41, "grad_norm": 0.8893830497058866, "learning_rate": 1.3425148388915668e-05, "loss": 0.529, "step": 5009 }, { "epoch": 0.41, "grad_norm": 0.8453904170689711, "learning_rate": 1.3422675029680352e-05, "loss": 0.5243, "step": 5010 }, { "epoch": 0.41, "grad_norm": 0.9235327456897732, "learning_rate": 1.342020143325669e-05, "loss": 0.5161, "step": 5011 }, { "epoch": 0.41, "grad_norm": 0.934246128185342, "learning_rate": 1.3417727599816101e-05, "loss": 0.5572, "step": 5012 }, { "epoch": 0.41, "grad_norm": 0.843513392770215, "learning_rate": 1.3415253529530026e-05, "loss": 0.5877, "step": 5013 }, { "epoch": 0.41, "grad_norm": 0.8979152627392312, "learning_rate": 1.3412779222569907e-05, "loss": 0.5576, "step": 5014 }, { "epoch": 0.41, "grad_norm": 0.9027225679116367, "learning_rate": 1.3410304679107214e-05, "loss": 0.5798, "step": 5015 }, { "epoch": 0.41, "grad_norm": 0.9785640983126853, "learning_rate": 1.3407829899313435e-05, "loss": 0.4948, "step": 5016 }, { "epoch": 0.41, "grad_norm": 0.9026415340866438, "learning_rate": 1.3405354883360064e-05, "loss": 0.6247, "step": 5017 }, { "epoch": 0.41, "grad_norm": 0.9659391475419029, "learning_rate": 1.3402879631418621e-05, "loss": 0.5754, "step": 5018 }, { "epoch": 0.41, "grad_norm": 0.909795574321967, "learning_rate": 1.3400404143660639e-05, "loss": 0.5855, "step": 5019 }, { "epoch": 0.41, "grad_norm": 0.9365576954027569, "learning_rate": 1.3397928420257664e-05, "loss": 0.572, "step": 5020 }, { "epoch": 0.41, "grad_norm": 0.9481969842800917, "learning_rate": 1.3395452461381265e-05, "loss": 0.5493, "step": 5021 }, { "epoch": 0.41, "grad_norm": 0.9159633447540382, "learning_rate": 1.3392976267203024e-05, "loss": 0.6018, "step": 5022 }, { "epoch": 0.41, "grad_norm": 0.929746076040423, "learning_rate": 1.3390499837894533e-05, "loss": 0.5801, "step": 5023 }, { "epoch": 0.41, "grad_norm": 0.9262633274121936, "learning_rate": 1.3388023173627413e-05, "loss": 0.5262, "step": 5024 }, { "epoch": 0.41, "grad_norm": 0.9079531784297293, "learning_rate": 1.3385546274573294e-05, "loss": 0.5131, "step": 5025 }, { "epoch": 0.41, "grad_norm": 1.161142792927715, "learning_rate": 1.3383069140903816e-05, "loss": 0.5189, "step": 5026 }, { "epoch": 0.41, "grad_norm": 0.9383572230897598, "learning_rate": 1.3380591772790652e-05, "loss": 0.5646, "step": 5027 }, { "epoch": 0.41, "grad_norm": 0.8725547375928622, "learning_rate": 1.3378114170405473e-05, "loss": 0.536, "step": 5028 }, { "epoch": 0.41, "grad_norm": 0.9961487700807296, "learning_rate": 1.3375636333919981e-05, "loss": 0.5026, "step": 5029 }, { "epoch": 0.41, "grad_norm": 0.9560730427420159, "learning_rate": 1.3373158263505886e-05, "loss": 0.5708, "step": 5030 }, { "epoch": 0.41, "grad_norm": 0.889332135064333, "learning_rate": 1.3370679959334911e-05, "loss": 0.4974, "step": 5031 }, { "epoch": 0.41, "grad_norm": 0.9112265983457682, "learning_rate": 1.336820142157881e-05, "loss": 0.5586, "step": 5032 }, { "epoch": 0.41, "grad_norm": 1.0140920595535092, "learning_rate": 1.3365722650409336e-05, "loss": 0.6051, "step": 5033 }, { "epoch": 0.41, "grad_norm": 0.8265136838840981, "learning_rate": 1.3363243645998265e-05, "loss": 0.5112, "step": 5034 }, { "epoch": 0.41, "grad_norm": 0.8664416233893417, "learning_rate": 1.3360764408517398e-05, "loss": 0.5771, "step": 5035 }, { "epoch": 0.41, "grad_norm": 0.8928211824327998, "learning_rate": 1.3358284938138532e-05, "loss": 0.552, "step": 5036 }, { "epoch": 0.41, "grad_norm": 0.8708038249180292, "learning_rate": 1.3355805235033503e-05, "loss": 0.5487, "step": 5037 }, { "epoch": 0.41, "grad_norm": 0.8648901980307919, "learning_rate": 1.3353325299374147e-05, "loss": 0.5156, "step": 5038 }, { "epoch": 0.41, "grad_norm": 0.9253251558176558, "learning_rate": 1.3350845131332322e-05, "loss": 0.5946, "step": 5039 }, { "epoch": 0.41, "grad_norm": 0.9087741111085298, "learning_rate": 1.33483647310799e-05, "loss": 0.5366, "step": 5040 }, { "epoch": 0.41, "grad_norm": 0.9031770607268709, "learning_rate": 1.3345884098788775e-05, "loss": 0.5449, "step": 5041 }, { "epoch": 0.41, "grad_norm": 0.9048945632273513, "learning_rate": 1.334340323463085e-05, "loss": 0.5902, "step": 5042 }, { "epoch": 0.41, "grad_norm": 0.928909059359663, "learning_rate": 1.3340922138778042e-05, "loss": 0.5314, "step": 5043 }, { "epoch": 0.41, "grad_norm": 0.9285281521455355, "learning_rate": 1.3338440811402298e-05, "loss": 0.5399, "step": 5044 }, { "epoch": 0.41, "grad_norm": 1.0519660440082719, "learning_rate": 1.3335959252675566e-05, "loss": 0.6227, "step": 5045 }, { "epoch": 0.41, "grad_norm": 0.8580949517974473, "learning_rate": 1.3333477462769814e-05, "loss": 0.5642, "step": 5046 }, { "epoch": 0.41, "grad_norm": 0.9121537302597602, "learning_rate": 1.333099544185703e-05, "loss": 0.5169, "step": 5047 }, { "epoch": 0.41, "grad_norm": 0.8995134871690074, "learning_rate": 1.332851319010922e-05, "loss": 0.5968, "step": 5048 }, { "epoch": 0.41, "grad_norm": 0.9135632096680434, "learning_rate": 1.3326030707698399e-05, "loss": 0.5906, "step": 5049 }, { "epoch": 0.41, "grad_norm": 0.9046364651883315, "learning_rate": 1.3323547994796597e-05, "loss": 0.6055, "step": 5050 }, { "epoch": 0.41, "grad_norm": 0.9203931898005878, "learning_rate": 1.3321065051575868e-05, "loss": 0.5704, "step": 5051 }, { "epoch": 0.41, "grad_norm": 0.8673245017777986, "learning_rate": 1.3318581878208279e-05, "loss": 0.4961, "step": 5052 }, { "epoch": 0.41, "grad_norm": 0.9076849454851069, "learning_rate": 1.3316098474865905e-05, "loss": 0.4916, "step": 5053 }, { "epoch": 0.41, "grad_norm": 0.9212244712246338, "learning_rate": 1.331361484172085e-05, "loss": 0.5526, "step": 5054 }, { "epoch": 0.41, "grad_norm": 1.046665168224382, "learning_rate": 1.3311130978945228e-05, "loss": 0.5865, "step": 5055 }, { "epoch": 0.41, "grad_norm": 0.8503823922686983, "learning_rate": 1.3308646886711163e-05, "loss": 0.449, "step": 5056 }, { "epoch": 0.41, "grad_norm": 0.9493204815760115, "learning_rate": 1.3306162565190805e-05, "loss": 0.6048, "step": 5057 }, { "epoch": 0.41, "grad_norm": 0.9107820615188169, "learning_rate": 1.3303678014556316e-05, "loss": 0.5662, "step": 5058 }, { "epoch": 0.41, "grad_norm": 0.9714108115018177, "learning_rate": 1.3301193234979865e-05, "loss": 0.5288, "step": 5059 }, { "epoch": 0.41, "grad_norm": 0.9419033164293819, "learning_rate": 1.3298708226633657e-05, "loss": 0.6086, "step": 5060 }, { "epoch": 0.41, "grad_norm": 0.907506779452863, "learning_rate": 1.3296222989689892e-05, "loss": 0.549, "step": 5061 }, { "epoch": 0.41, "grad_norm": 0.9435728748431338, "learning_rate": 1.3293737524320798e-05, "loss": 0.6021, "step": 5062 }, { "epoch": 0.41, "grad_norm": 0.9155414972518737, "learning_rate": 1.3291251830698615e-05, "loss": 0.5394, "step": 5063 }, { "epoch": 0.41, "grad_norm": 1.016370420589111, "learning_rate": 1.3288765908995598e-05, "loss": 0.5631, "step": 5064 }, { "epoch": 0.41, "grad_norm": 0.9096054349595017, "learning_rate": 1.3286279759384022e-05, "loss": 0.543, "step": 5065 }, { "epoch": 0.41, "grad_norm": 0.8356030266153411, "learning_rate": 1.3283793382036175e-05, "loss": 0.5312, "step": 5066 }, { "epoch": 0.41, "grad_norm": 0.9494496729847453, "learning_rate": 1.3281306777124356e-05, "loss": 0.5967, "step": 5067 }, { "epoch": 0.41, "grad_norm": 1.0309264218563654, "learning_rate": 1.3278819944820893e-05, "loss": 0.5769, "step": 5068 }, { "epoch": 0.41, "grad_norm": 0.8414918365636198, "learning_rate": 1.327633288529811e-05, "loss": 0.5492, "step": 5069 }, { "epoch": 0.41, "grad_norm": 0.8900427211041783, "learning_rate": 1.3273845598728367e-05, "loss": 0.5579, "step": 5070 }, { "epoch": 0.41, "grad_norm": 0.9043752020683701, "learning_rate": 1.3271358085284029e-05, "loss": 0.53, "step": 5071 }, { "epoch": 0.41, "grad_norm": 0.9876593193228603, "learning_rate": 1.3268870345137476e-05, "loss": 0.6066, "step": 5072 }, { "epoch": 0.41, "grad_norm": 0.9081892965471735, "learning_rate": 1.3266382378461109e-05, "loss": 0.5406, "step": 5073 }, { "epoch": 0.41, "grad_norm": 0.8612314037801095, "learning_rate": 1.3263894185427339e-05, "loss": 0.5397, "step": 5074 }, { "epoch": 0.41, "grad_norm": 0.9374695390411759, "learning_rate": 1.3261405766208598e-05, "loss": 0.4895, "step": 5075 }, { "epoch": 0.41, "grad_norm": 0.9984321444647333, "learning_rate": 1.3258917120977327e-05, "loss": 0.6127, "step": 5076 }, { "epoch": 0.41, "grad_norm": 0.9179346377204336, "learning_rate": 1.3256428249905998e-05, "loss": 0.5219, "step": 5077 }, { "epoch": 0.41, "grad_norm": 0.9518656711900425, "learning_rate": 1.3253939153167072e-05, "loss": 0.6394, "step": 5078 }, { "epoch": 0.41, "grad_norm": 0.8958542762619648, "learning_rate": 1.3251449830933052e-05, "loss": 0.5355, "step": 5079 }, { "epoch": 0.41, "grad_norm": 0.9534508359693109, "learning_rate": 1.3248960283376441e-05, "loss": 0.579, "step": 5080 }, { "epoch": 0.41, "grad_norm": 0.9344960582822963, "learning_rate": 1.3246470510669766e-05, "loss": 0.5403, "step": 5081 }, { "epoch": 0.41, "grad_norm": 0.9374250380548944, "learning_rate": 1.3243980512985563e-05, "loss": 0.5178, "step": 5082 }, { "epoch": 0.41, "grad_norm": 0.8228686351928819, "learning_rate": 1.3241490290496391e-05, "loss": 0.5492, "step": 5083 }, { "epoch": 0.41, "grad_norm": 0.8978557739748596, "learning_rate": 1.3238999843374814e-05, "loss": 0.5082, "step": 5084 }, { "epoch": 0.41, "grad_norm": 0.9340313155095844, "learning_rate": 1.323650917179342e-05, "loss": 0.5216, "step": 5085 }, { "epoch": 0.41, "grad_norm": 0.8916719591757329, "learning_rate": 1.3234018275924814e-05, "loss": 0.5377, "step": 5086 }, { "epoch": 0.41, "grad_norm": 1.0150853993724218, "learning_rate": 1.3231527155941607e-05, "loss": 0.6204, "step": 5087 }, { "epoch": 0.41, "grad_norm": 0.9273769401626929, "learning_rate": 1.3229035812016438e-05, "loss": 0.5609, "step": 5088 }, { "epoch": 0.41, "grad_norm": 0.9169044311151517, "learning_rate": 1.322654424432195e-05, "loss": 0.5639, "step": 5089 }, { "epoch": 0.41, "grad_norm": 0.9112610005312264, "learning_rate": 1.3224052453030806e-05, "loss": 0.5845, "step": 5090 }, { "epoch": 0.41, "grad_norm": 0.9805041567073909, "learning_rate": 1.3221560438315689e-05, "loss": 0.6059, "step": 5091 }, { "epoch": 0.41, "grad_norm": 0.9047437596079848, "learning_rate": 1.3219068200349292e-05, "loss": 0.5612, "step": 5092 }, { "epoch": 0.41, "grad_norm": 0.9327788511751683, "learning_rate": 1.321657573930432e-05, "loss": 0.5585, "step": 5093 }, { "epoch": 0.41, "grad_norm": 0.906266310194228, "learning_rate": 1.3214083055353504e-05, "loss": 0.5256, "step": 5094 }, { "epoch": 0.41, "grad_norm": 0.9780421675103058, "learning_rate": 1.3211590148669586e-05, "loss": 0.5435, "step": 5095 }, { "epoch": 0.41, "grad_norm": 0.8590529244903038, "learning_rate": 1.3209097019425317e-05, "loss": 0.5427, "step": 5096 }, { "epoch": 0.41, "grad_norm": 0.871310828995196, "learning_rate": 1.3206603667793472e-05, "loss": 0.5254, "step": 5097 }, { "epoch": 0.41, "grad_norm": 0.889387767827606, "learning_rate": 1.3204110093946835e-05, "loss": 0.4817, "step": 5098 }, { "epoch": 0.41, "grad_norm": 0.8515472409039916, "learning_rate": 1.3201616298058214e-05, "loss": 0.5029, "step": 5099 }, { "epoch": 0.41, "grad_norm": 0.9682710945847225, "learning_rate": 1.3199122280300418e-05, "loss": 0.6037, "step": 5100 }, { "epoch": 0.41, "grad_norm": 0.9251328046898974, "learning_rate": 1.319662804084629e-05, "loss": 0.5449, "step": 5101 }, { "epoch": 0.41, "grad_norm": 0.8627728010761364, "learning_rate": 1.3194133579868672e-05, "loss": 0.537, "step": 5102 }, { "epoch": 0.41, "grad_norm": 0.8675483717514128, "learning_rate": 1.319163889754043e-05, "loss": 0.5234, "step": 5103 }, { "epoch": 0.41, "grad_norm": 0.8891470238901393, "learning_rate": 1.3189143994034448e-05, "loss": 0.5632, "step": 5104 }, { "epoch": 0.41, "grad_norm": 0.878464155700256, "learning_rate": 1.318664886952361e-05, "loss": 0.483, "step": 5105 }, { "epoch": 0.41, "grad_norm": 0.9519110168495648, "learning_rate": 1.3184153524180837e-05, "loss": 0.5674, "step": 5106 }, { "epoch": 0.42, "grad_norm": 0.8828085357873369, "learning_rate": 1.3181657958179046e-05, "loss": 0.5326, "step": 5107 }, { "epoch": 0.42, "grad_norm": 1.0078717687703735, "learning_rate": 1.317916217169118e-05, "loss": 0.6089, "step": 5108 }, { "epoch": 0.42, "grad_norm": 0.8520385880997858, "learning_rate": 1.3176666164890195e-05, "loss": 0.5646, "step": 5109 }, { "epoch": 0.42, "grad_norm": 0.871313113477867, "learning_rate": 1.3174169937949066e-05, "loss": 0.4969, "step": 5110 }, { "epoch": 0.42, "grad_norm": 0.8740222761172795, "learning_rate": 1.3171673491040772e-05, "loss": 0.5236, "step": 5111 }, { "epoch": 0.42, "grad_norm": 0.9805578927608394, "learning_rate": 1.3169176824338321e-05, "loss": 0.5786, "step": 5112 }, { "epoch": 0.42, "grad_norm": 0.8763258975337161, "learning_rate": 1.3166679938014728e-05, "loss": 0.5381, "step": 5113 }, { "epoch": 0.42, "grad_norm": 0.8597644001374837, "learning_rate": 1.316418283224302e-05, "loss": 0.5418, "step": 5114 }, { "epoch": 0.42, "grad_norm": 0.97008825068724, "learning_rate": 1.3161685507196251e-05, "loss": 0.5815, "step": 5115 }, { "epoch": 0.42, "grad_norm": 0.9828993498197132, "learning_rate": 1.3159187963047481e-05, "loss": 0.571, "step": 5116 }, { "epoch": 0.42, "grad_norm": 0.8992665461979235, "learning_rate": 1.3156690199969786e-05, "loss": 0.562, "step": 5117 }, { "epoch": 0.42, "grad_norm": 0.9991378355301007, "learning_rate": 1.3154192218136261e-05, "loss": 0.6009, "step": 5118 }, { "epoch": 0.42, "grad_norm": 0.8981685770096789, "learning_rate": 1.3151694017720016e-05, "loss": 0.5287, "step": 5119 }, { "epoch": 0.42, "grad_norm": 0.9229520921845562, "learning_rate": 1.3149195598894167e-05, "loss": 0.5432, "step": 5120 }, { "epoch": 0.42, "grad_norm": 1.31688240689969, "learning_rate": 1.3146696961831858e-05, "loss": 0.526, "step": 5121 }, { "epoch": 0.42, "grad_norm": 0.8872179410218345, "learning_rate": 1.314419810670624e-05, "loss": 0.5431, "step": 5122 }, { "epoch": 0.42, "grad_norm": 0.7965128643545243, "learning_rate": 1.314169903369048e-05, "loss": 0.465, "step": 5123 }, { "epoch": 0.42, "grad_norm": 0.9228116133064066, "learning_rate": 1.3139199742957767e-05, "loss": 0.583, "step": 5124 }, { "epoch": 0.42, "grad_norm": 0.9708210120494751, "learning_rate": 1.3136700234681294e-05, "loss": 0.6015, "step": 5125 }, { "epoch": 0.42, "grad_norm": 0.9379346186917816, "learning_rate": 1.313420050903428e-05, "loss": 0.5597, "step": 5126 }, { "epoch": 0.42, "grad_norm": 0.8583394317632376, "learning_rate": 1.313170056618995e-05, "loss": 0.511, "step": 5127 }, { "epoch": 0.42, "grad_norm": 0.9664397262334126, "learning_rate": 1.3129200406321545e-05, "loss": 0.5993, "step": 5128 }, { "epoch": 0.42, "grad_norm": 0.937342255366917, "learning_rate": 1.312670002960233e-05, "loss": 0.5576, "step": 5129 }, { "epoch": 0.42, "grad_norm": 0.8938685765950013, "learning_rate": 1.3124199436205575e-05, "loss": 0.6095, "step": 5130 }, { "epoch": 0.42, "grad_norm": 0.8795874008243144, "learning_rate": 1.3121698626304574e-05, "loss": 0.5268, "step": 5131 }, { "epoch": 0.42, "grad_norm": 0.9572529534621174, "learning_rate": 1.3119197600072624e-05, "loss": 0.5539, "step": 5132 }, { "epoch": 0.42, "grad_norm": 0.7892642422145666, "learning_rate": 1.3116696357683047e-05, "loss": 0.5267, "step": 5133 }, { "epoch": 0.42, "grad_norm": 0.9124039323834103, "learning_rate": 1.3114194899309175e-05, "loss": 0.5872, "step": 5134 }, { "epoch": 0.42, "grad_norm": 0.9034469389534122, "learning_rate": 1.3111693225124365e-05, "loss": 0.5655, "step": 5135 }, { "epoch": 0.42, "grad_norm": 0.980671478125758, "learning_rate": 1.310919133530197e-05, "loss": 0.5409, "step": 5136 }, { "epoch": 0.42, "grad_norm": 0.7970800799824627, "learning_rate": 1.3106689230015372e-05, "loss": 0.4885, "step": 5137 }, { "epoch": 0.42, "grad_norm": 0.8885303582931813, "learning_rate": 1.310418690943797e-05, "loss": 0.5902, "step": 5138 }, { "epoch": 0.42, "grad_norm": 0.8250161927876943, "learning_rate": 1.3101684373743166e-05, "loss": 0.5207, "step": 5139 }, { "epoch": 0.42, "grad_norm": 0.809632259881539, "learning_rate": 1.3099181623104386e-05, "loss": 0.5252, "step": 5140 }, { "epoch": 0.42, "grad_norm": 0.9036939639535884, "learning_rate": 1.3096678657695072e-05, "loss": 0.5677, "step": 5141 }, { "epoch": 0.42, "grad_norm": 0.8522063526840421, "learning_rate": 1.3094175477688671e-05, "loss": 0.4812, "step": 5142 }, { "epoch": 0.42, "grad_norm": 0.9591812262443219, "learning_rate": 1.3091672083258653e-05, "loss": 0.5718, "step": 5143 }, { "epoch": 0.42, "grad_norm": 0.8796134146527177, "learning_rate": 1.3089168474578504e-05, "loss": 0.5509, "step": 5144 }, { "epoch": 0.42, "grad_norm": 0.9637346590878184, "learning_rate": 1.3086664651821719e-05, "loss": 0.5226, "step": 5145 }, { "epoch": 0.42, "grad_norm": 0.8654354312013556, "learning_rate": 1.308416061516181e-05, "loss": 0.5672, "step": 5146 }, { "epoch": 0.42, "grad_norm": 1.0063143032868171, "learning_rate": 1.3081656364772308e-05, "loss": 0.5448, "step": 5147 }, { "epoch": 0.42, "grad_norm": 0.9170705999289325, "learning_rate": 1.3079151900826752e-05, "loss": 0.5176, "step": 5148 }, { "epoch": 0.42, "grad_norm": 0.9637334144792751, "learning_rate": 1.3076647223498703e-05, "loss": 0.553, "step": 5149 }, { "epoch": 0.42, "grad_norm": 0.9671932172736195, "learning_rate": 1.3074142332961729e-05, "loss": 0.5069, "step": 5150 }, { "epoch": 0.42, "grad_norm": 0.9363712624459859, "learning_rate": 1.3071637229389416e-05, "loss": 0.5368, "step": 5151 }, { "epoch": 0.42, "grad_norm": 0.9080314781366527, "learning_rate": 1.3069131912955368e-05, "loss": 0.5389, "step": 5152 }, { "epoch": 0.42, "grad_norm": 0.848854023646483, "learning_rate": 1.3066626383833203e-05, "loss": 0.5374, "step": 5153 }, { "epoch": 0.42, "grad_norm": 0.991351160584258, "learning_rate": 1.3064120642196549e-05, "loss": 0.5621, "step": 5154 }, { "epoch": 0.42, "grad_norm": 0.9563550446244192, "learning_rate": 1.306161468821905e-05, "loss": 0.572, "step": 5155 }, { "epoch": 0.42, "grad_norm": 0.9125008511353809, "learning_rate": 1.3059108522074373e-05, "loss": 0.5362, "step": 5156 }, { "epoch": 0.42, "grad_norm": 0.8805060313587676, "learning_rate": 1.3056602143936185e-05, "loss": 0.5868, "step": 5157 }, { "epoch": 0.42, "grad_norm": 0.8926377582854529, "learning_rate": 1.3054095553978181e-05, "loss": 0.5372, "step": 5158 }, { "epoch": 0.42, "grad_norm": 0.8709372652470595, "learning_rate": 1.3051588752374067e-05, "loss": 0.5799, "step": 5159 }, { "epoch": 0.42, "grad_norm": 0.9118354309326878, "learning_rate": 1.3049081739297556e-05, "loss": 0.5731, "step": 5160 }, { "epoch": 0.42, "grad_norm": 0.8113643930294744, "learning_rate": 1.3046574514922386e-05, "loss": 0.5097, "step": 5161 }, { "epoch": 0.42, "grad_norm": 0.8474405953530317, "learning_rate": 1.3044067079422304e-05, "loss": 0.5331, "step": 5162 }, { "epoch": 0.42, "grad_norm": 0.9439939773612503, "learning_rate": 1.304155943297107e-05, "loss": 0.5656, "step": 5163 }, { "epoch": 0.42, "grad_norm": 0.8037282697874863, "learning_rate": 1.303905157574247e-05, "loss": 0.5426, "step": 5164 }, { "epoch": 0.42, "grad_norm": 0.879080006172946, "learning_rate": 1.303654350791029e-05, "loss": 0.6089, "step": 5165 }, { "epoch": 0.42, "grad_norm": 0.931439091942248, "learning_rate": 1.3034035229648338e-05, "loss": 0.5938, "step": 5166 }, { "epoch": 0.42, "grad_norm": 0.897235722501806, "learning_rate": 1.3031526741130435e-05, "loss": 0.6158, "step": 5167 }, { "epoch": 0.42, "grad_norm": 0.9778046778570904, "learning_rate": 1.3029018042530421e-05, "loss": 0.616, "step": 5168 }, { "epoch": 0.42, "grad_norm": 0.8938904176131726, "learning_rate": 1.3026509134022143e-05, "loss": 0.5148, "step": 5169 }, { "epoch": 0.42, "grad_norm": 0.7637438153298557, "learning_rate": 1.3024000015779462e-05, "loss": 0.5162, "step": 5170 }, { "epoch": 0.42, "grad_norm": 0.8970184303722865, "learning_rate": 1.3021490687976269e-05, "loss": 0.5411, "step": 5171 }, { "epoch": 0.42, "grad_norm": 0.9379911755413544, "learning_rate": 1.3018981150786445e-05, "loss": 0.5198, "step": 5172 }, { "epoch": 0.42, "grad_norm": 0.9159176187237429, "learning_rate": 1.3016471404383907e-05, "loss": 0.5677, "step": 5173 }, { "epoch": 0.42, "grad_norm": 0.8686715488022813, "learning_rate": 1.3013961448942578e-05, "loss": 0.5728, "step": 5174 }, { "epoch": 0.42, "grad_norm": 1.0833391331559723, "learning_rate": 1.301145128463639e-05, "loss": 0.6127, "step": 5175 }, { "epoch": 0.42, "grad_norm": 0.9097788336027876, "learning_rate": 1.3008940911639302e-05, "loss": 0.5475, "step": 5176 }, { "epoch": 0.42, "grad_norm": 0.9956793165985215, "learning_rate": 1.3006430330125279e-05, "loss": 0.511, "step": 5177 }, { "epoch": 0.42, "grad_norm": 0.9377355682219023, "learning_rate": 1.30039195402683e-05, "loss": 0.5849, "step": 5178 }, { "epoch": 0.42, "grad_norm": 0.875112847377846, "learning_rate": 1.300140854224236e-05, "loss": 0.5154, "step": 5179 }, { "epoch": 0.42, "grad_norm": 0.8801229189319658, "learning_rate": 1.299889733622147e-05, "loss": 0.4734, "step": 5180 }, { "epoch": 0.42, "grad_norm": 0.968504878800029, "learning_rate": 1.2996385922379657e-05, "loss": 0.5744, "step": 5181 }, { "epoch": 0.42, "grad_norm": 0.7574635362509162, "learning_rate": 1.2993874300890956e-05, "loss": 0.4807, "step": 5182 }, { "epoch": 0.42, "grad_norm": 1.0783731403307208, "learning_rate": 1.2991362471929421e-05, "loss": 0.6033, "step": 5183 }, { "epoch": 0.42, "grad_norm": 0.9783506627178276, "learning_rate": 1.2988850435669123e-05, "loss": 0.5147, "step": 5184 }, { "epoch": 0.42, "grad_norm": 0.9580822811329507, "learning_rate": 1.2986338192284136e-05, "loss": 0.6557, "step": 5185 }, { "epoch": 0.42, "grad_norm": 0.953670429295701, "learning_rate": 1.2983825741948564e-05, "loss": 0.5506, "step": 5186 }, { "epoch": 0.42, "grad_norm": 0.9522694105450515, "learning_rate": 1.2981313084836514e-05, "loss": 0.5054, "step": 5187 }, { "epoch": 0.42, "grad_norm": 0.8276032219953149, "learning_rate": 1.2978800221122112e-05, "loss": 0.5091, "step": 5188 }, { "epoch": 0.42, "grad_norm": 0.915831507762252, "learning_rate": 1.2976287150979497e-05, "loss": 0.5395, "step": 5189 }, { "epoch": 0.42, "grad_norm": 0.89869095528428, "learning_rate": 1.297377387458282e-05, "loss": 0.6037, "step": 5190 }, { "epoch": 0.42, "grad_norm": 0.8638883544541025, "learning_rate": 1.2971260392106255e-05, "loss": 0.5695, "step": 5191 }, { "epoch": 0.42, "grad_norm": 0.8361552108274168, "learning_rate": 1.296874670372398e-05, "loss": 0.4965, "step": 5192 }, { "epoch": 0.42, "grad_norm": 0.78892219953772, "learning_rate": 1.2966232809610189e-05, "loss": 0.459, "step": 5193 }, { "epoch": 0.42, "grad_norm": 1.0306619602849194, "learning_rate": 1.2963718709939098e-05, "loss": 0.6183, "step": 5194 }, { "epoch": 0.42, "grad_norm": 0.9698810524692302, "learning_rate": 1.2961204404884928e-05, "loss": 0.6101, "step": 5195 }, { "epoch": 0.42, "grad_norm": 0.8714946620693301, "learning_rate": 1.2958689894621918e-05, "loss": 0.5364, "step": 5196 }, { "epoch": 0.42, "grad_norm": 0.9102783335520639, "learning_rate": 1.2956175179324323e-05, "loss": 0.5288, "step": 5197 }, { "epoch": 0.42, "grad_norm": 0.9513914469151453, "learning_rate": 1.2953660259166413e-05, "loss": 0.5882, "step": 5198 }, { "epoch": 0.42, "grad_norm": 0.9080645066236228, "learning_rate": 1.2951145134322465e-05, "loss": 0.5207, "step": 5199 }, { "epoch": 0.42, "grad_norm": 0.9749830303692215, "learning_rate": 1.2948629804966776e-05, "loss": 0.5629, "step": 5200 }, { "epoch": 0.42, "grad_norm": 1.0249364407270003, "learning_rate": 1.294611427127366e-05, "loss": 0.5952, "step": 5201 }, { "epoch": 0.42, "grad_norm": 0.9204881999222911, "learning_rate": 1.2943598533417437e-05, "loss": 0.5396, "step": 5202 }, { "epoch": 0.42, "grad_norm": 0.8352575903111814, "learning_rate": 1.2941082591572443e-05, "loss": 0.494, "step": 5203 }, { "epoch": 0.42, "grad_norm": 1.0853393787202654, "learning_rate": 1.2938566445913037e-05, "loss": 0.6347, "step": 5204 }, { "epoch": 0.42, "grad_norm": 0.9185748013824929, "learning_rate": 1.2936050096613584e-05, "loss": 0.5317, "step": 5205 }, { "epoch": 0.42, "grad_norm": 0.9245617764417169, "learning_rate": 1.2933533543848462e-05, "loss": 0.5411, "step": 5206 }, { "epoch": 0.42, "grad_norm": 1.0003230199061517, "learning_rate": 1.2931016787792069e-05, "loss": 0.5995, "step": 5207 }, { "epoch": 0.42, "grad_norm": 0.9833957390509364, "learning_rate": 1.292849982861881e-05, "loss": 0.604, "step": 5208 }, { "epoch": 0.42, "grad_norm": 0.9909115252326485, "learning_rate": 1.2925982666503111e-05, "loss": 0.5742, "step": 5209 }, { "epoch": 0.42, "grad_norm": 0.9407003727079608, "learning_rate": 1.2923465301619408e-05, "loss": 0.5392, "step": 5210 }, { "epoch": 0.42, "grad_norm": 0.9458924993894788, "learning_rate": 1.2920947734142155e-05, "loss": 0.5189, "step": 5211 }, { "epoch": 0.42, "grad_norm": 0.9561205767333625, "learning_rate": 1.2918429964245813e-05, "loss": 0.5466, "step": 5212 }, { "epoch": 0.42, "grad_norm": 0.8589889002194464, "learning_rate": 1.2915911992104864e-05, "loss": 0.5233, "step": 5213 }, { "epoch": 0.42, "grad_norm": 0.8304101043631497, "learning_rate": 1.2913393817893803e-05, "loss": 0.4897, "step": 5214 }, { "epoch": 0.42, "grad_norm": 0.9015121090208729, "learning_rate": 1.291087544178713e-05, "loss": 0.5162, "step": 5215 }, { "epoch": 0.42, "grad_norm": 0.8932814665787939, "learning_rate": 1.2908356863959372e-05, "loss": 0.5288, "step": 5216 }, { "epoch": 0.42, "grad_norm": 0.8710258804583972, "learning_rate": 1.2905838084585066e-05, "loss": 0.4831, "step": 5217 }, { "epoch": 0.42, "grad_norm": 0.8989285859062396, "learning_rate": 1.2903319103838756e-05, "loss": 0.5019, "step": 5218 }, { "epoch": 0.42, "grad_norm": 0.9187626899309298, "learning_rate": 1.2900799921895004e-05, "loss": 0.5805, "step": 5219 }, { "epoch": 0.42, "grad_norm": 0.9328702855013249, "learning_rate": 1.2898280538928396e-05, "loss": 0.5927, "step": 5220 }, { "epoch": 0.42, "grad_norm": 0.9985233117601223, "learning_rate": 1.2895760955113514e-05, "loss": 0.5583, "step": 5221 }, { "epoch": 0.42, "grad_norm": 0.9419826483791731, "learning_rate": 1.2893241170624968e-05, "loss": 0.5928, "step": 5222 }, { "epoch": 0.42, "grad_norm": 0.9519831626411481, "learning_rate": 1.2890721185637376e-05, "loss": 0.5869, "step": 5223 }, { "epoch": 0.42, "grad_norm": 0.9777805383448769, "learning_rate": 1.2888201000325368e-05, "loss": 0.5647, "step": 5224 }, { "epoch": 0.42, "grad_norm": 0.8718493503514608, "learning_rate": 1.2885680614863591e-05, "loss": 0.532, "step": 5225 }, { "epoch": 0.42, "grad_norm": 0.9404180060319389, "learning_rate": 1.2883160029426712e-05, "loss": 0.5258, "step": 5226 }, { "epoch": 0.42, "grad_norm": 0.9516912980108595, "learning_rate": 1.2880639244189397e-05, "loss": 0.5553, "step": 5227 }, { "epoch": 0.42, "grad_norm": 0.920813450561122, "learning_rate": 1.2878118259326335e-05, "loss": 0.5373, "step": 5228 }, { "epoch": 0.42, "grad_norm": 0.8585172444562801, "learning_rate": 1.2875597075012236e-05, "loss": 0.5208, "step": 5229 }, { "epoch": 0.43, "grad_norm": 0.8584243934134526, "learning_rate": 1.2873075691421808e-05, "loss": 0.4836, "step": 5230 }, { "epoch": 0.43, "grad_norm": 0.8292944955247089, "learning_rate": 1.2870554108729783e-05, "loss": 0.5396, "step": 5231 }, { "epoch": 0.43, "grad_norm": 0.8223913490116747, "learning_rate": 1.2868032327110904e-05, "loss": 0.4598, "step": 5232 }, { "epoch": 0.43, "grad_norm": 0.8055596776849586, "learning_rate": 1.2865510346739928e-05, "loss": 0.5109, "step": 5233 }, { "epoch": 0.43, "grad_norm": 0.817052218239722, "learning_rate": 1.2862988167791627e-05, "loss": 0.5415, "step": 5234 }, { "epoch": 0.43, "grad_norm": 1.0810268067366446, "learning_rate": 1.2860465790440788e-05, "loss": 0.5753, "step": 5235 }, { "epoch": 0.43, "grad_norm": 0.8402617458059244, "learning_rate": 1.2857943214862205e-05, "loss": 0.4776, "step": 5236 }, { "epoch": 0.43, "grad_norm": 0.8276994257844174, "learning_rate": 1.285542044123069e-05, "loss": 0.5087, "step": 5237 }, { "epoch": 0.43, "grad_norm": 0.8298950139953116, "learning_rate": 1.2852897469721074e-05, "loss": 0.5485, "step": 5238 }, { "epoch": 0.43, "grad_norm": 0.9031430098759592, "learning_rate": 1.2850374300508195e-05, "loss": 0.5088, "step": 5239 }, { "epoch": 0.43, "grad_norm": 0.950323481245847, "learning_rate": 1.2847850933766901e-05, "loss": 0.5373, "step": 5240 }, { "epoch": 0.43, "grad_norm": 1.005940982880705, "learning_rate": 1.2845327369672069e-05, "loss": 0.6226, "step": 5241 }, { "epoch": 0.43, "grad_norm": 1.0628170200163112, "learning_rate": 1.2842803608398568e-05, "loss": 0.5751, "step": 5242 }, { "epoch": 0.43, "grad_norm": 0.9088578999699106, "learning_rate": 1.2840279650121301e-05, "loss": 0.5189, "step": 5243 }, { "epoch": 0.43, "grad_norm": 0.9250960010536801, "learning_rate": 1.2837755495015176e-05, "loss": 0.5714, "step": 5244 }, { "epoch": 0.43, "grad_norm": 0.9393467217764105, "learning_rate": 1.283523114325511e-05, "loss": 0.546, "step": 5245 }, { "epoch": 0.43, "grad_norm": 1.0273850999637157, "learning_rate": 1.283270659501604e-05, "loss": 0.5864, "step": 5246 }, { "epoch": 0.43, "grad_norm": 0.8594473911869244, "learning_rate": 1.2830181850472918e-05, "loss": 0.5368, "step": 5247 }, { "epoch": 0.43, "grad_norm": 0.8807421155459568, "learning_rate": 1.2827656909800701e-05, "loss": 0.5712, "step": 5248 }, { "epoch": 0.43, "grad_norm": 0.8545283767021522, "learning_rate": 1.2825131773174371e-05, "loss": 0.5173, "step": 5249 }, { "epoch": 0.43, "grad_norm": 0.8419064676592511, "learning_rate": 1.2822606440768911e-05, "loss": 0.5258, "step": 5250 }, { "epoch": 0.43, "grad_norm": 0.8260318829592953, "learning_rate": 1.2820080912759334e-05, "loss": 0.4934, "step": 5251 }, { "epoch": 0.43, "grad_norm": 0.9071781523737736, "learning_rate": 1.2817555189320647e-05, "loss": 0.5202, "step": 5252 }, { "epoch": 0.43, "grad_norm": 0.9027757793662681, "learning_rate": 1.2815029270627885e-05, "loss": 0.5303, "step": 5253 }, { "epoch": 0.43, "grad_norm": 1.0212430197446423, "learning_rate": 1.2812503156856093e-05, "loss": 0.6029, "step": 5254 }, { "epoch": 0.43, "grad_norm": 0.8681516837710173, "learning_rate": 1.2809976848180328e-05, "loss": 0.5677, "step": 5255 }, { "epoch": 0.43, "grad_norm": 0.93728669923876, "learning_rate": 1.2807450344775656e-05, "loss": 0.5703, "step": 5256 }, { "epoch": 0.43, "grad_norm": 0.8472921948892896, "learning_rate": 1.2804923646817169e-05, "loss": 0.4963, "step": 5257 }, { "epoch": 0.43, "grad_norm": 0.9384456367903224, "learning_rate": 1.2802396754479958e-05, "loss": 0.5729, "step": 5258 }, { "epoch": 0.43, "grad_norm": 0.9159751306971758, "learning_rate": 1.279986966793914e-05, "loss": 0.5178, "step": 5259 }, { "epoch": 0.43, "grad_norm": 0.9505984994518845, "learning_rate": 1.2797342387369837e-05, "loss": 0.5697, "step": 5260 }, { "epoch": 0.43, "grad_norm": 0.8650825044597112, "learning_rate": 1.279481491294719e-05, "loss": 0.5482, "step": 5261 }, { "epoch": 0.43, "grad_norm": 0.9432578608303249, "learning_rate": 1.2792287244846345e-05, "loss": 0.5622, "step": 5262 }, { "epoch": 0.43, "grad_norm": 0.9072994162122723, "learning_rate": 1.2789759383242471e-05, "loss": 0.5521, "step": 5263 }, { "epoch": 0.43, "grad_norm": 0.927937356858074, "learning_rate": 1.2787231328310744e-05, "loss": 0.5355, "step": 5264 }, { "epoch": 0.43, "grad_norm": 0.8077970985617918, "learning_rate": 1.2784703080226364e-05, "loss": 0.5006, "step": 5265 }, { "epoch": 0.43, "grad_norm": 0.9341490586832795, "learning_rate": 1.2782174639164528e-05, "loss": 0.5911, "step": 5266 }, { "epoch": 0.43, "grad_norm": 0.8463801416391185, "learning_rate": 1.2779646005300457e-05, "loss": 0.5753, "step": 5267 }, { "epoch": 0.43, "grad_norm": 0.8251393824383413, "learning_rate": 1.2777117178809383e-05, "loss": 0.52, "step": 5268 }, { "epoch": 0.43, "grad_norm": 0.9756413025958026, "learning_rate": 1.2774588159866554e-05, "loss": 0.5064, "step": 5269 }, { "epoch": 0.43, "grad_norm": 0.9333713823028544, "learning_rate": 1.2772058948647224e-05, "loss": 0.5589, "step": 5270 }, { "epoch": 0.43, "grad_norm": 0.8904186468309239, "learning_rate": 1.2769529545326669e-05, "loss": 0.5401, "step": 5271 }, { "epoch": 0.43, "grad_norm": 0.7971808339481724, "learning_rate": 1.2766999950080172e-05, "loss": 0.5002, "step": 5272 }, { "epoch": 0.43, "grad_norm": 0.9132419627921222, "learning_rate": 1.2764470163083034e-05, "loss": 0.5205, "step": 5273 }, { "epoch": 0.43, "grad_norm": 0.9284686013582293, "learning_rate": 1.2761940184510564e-05, "loss": 0.6319, "step": 5274 }, { "epoch": 0.43, "grad_norm": 0.8241780076529768, "learning_rate": 1.2759410014538092e-05, "loss": 0.5005, "step": 5275 }, { "epoch": 0.43, "grad_norm": 0.9026954517606373, "learning_rate": 1.275687965334095e-05, "loss": 0.503, "step": 5276 }, { "epoch": 0.43, "grad_norm": 0.8621089733539753, "learning_rate": 1.2754349101094493e-05, "loss": 0.5055, "step": 5277 }, { "epoch": 0.43, "grad_norm": 0.876532065494681, "learning_rate": 1.2751818357974092e-05, "loss": 0.5718, "step": 5278 }, { "epoch": 0.43, "grad_norm": 0.9111254296857755, "learning_rate": 1.2749287424155114e-05, "loss": 0.5633, "step": 5279 }, { "epoch": 0.43, "grad_norm": 0.8105020558905384, "learning_rate": 1.2746756299812959e-05, "loss": 0.5318, "step": 5280 }, { "epoch": 0.43, "grad_norm": 0.8516231862863062, "learning_rate": 1.2744224985123031e-05, "loss": 0.5428, "step": 5281 }, { "epoch": 0.43, "grad_norm": 0.89583325399951, "learning_rate": 1.2741693480260742e-05, "loss": 0.56, "step": 5282 }, { "epoch": 0.43, "grad_norm": 0.9014276458558959, "learning_rate": 1.2739161785401525e-05, "loss": 0.5501, "step": 5283 }, { "epoch": 0.43, "grad_norm": 0.9709594700786953, "learning_rate": 1.2736629900720832e-05, "loss": 0.6068, "step": 5284 }, { "epoch": 0.43, "grad_norm": 0.8386333307614217, "learning_rate": 1.273409782639411e-05, "loss": 0.5139, "step": 5285 }, { "epoch": 0.43, "grad_norm": 0.9843303001964966, "learning_rate": 1.2731565562596833e-05, "loss": 0.5859, "step": 5286 }, { "epoch": 0.43, "grad_norm": 0.9895222868007922, "learning_rate": 1.2729033109504489e-05, "loss": 0.5086, "step": 5287 }, { "epoch": 0.43, "grad_norm": 0.8602660281687846, "learning_rate": 1.2726500467292569e-05, "loss": 0.5303, "step": 5288 }, { "epoch": 0.43, "grad_norm": 0.9087743403337459, "learning_rate": 1.2723967636136582e-05, "loss": 0.5649, "step": 5289 }, { "epoch": 0.43, "grad_norm": 0.841464399939494, "learning_rate": 1.272143461621206e-05, "loss": 0.5266, "step": 5290 }, { "epoch": 0.43, "grad_norm": 0.945482006218386, "learning_rate": 1.2718901407694529e-05, "loss": 0.5942, "step": 5291 }, { "epoch": 0.43, "grad_norm": 1.0264055245686943, "learning_rate": 1.2716368010759541e-05, "loss": 0.625, "step": 5292 }, { "epoch": 0.43, "grad_norm": 0.8925968532334795, "learning_rate": 1.2713834425582665e-05, "loss": 0.5599, "step": 5293 }, { "epoch": 0.43, "grad_norm": 0.9076586798006714, "learning_rate": 1.2711300652339466e-05, "loss": 0.5792, "step": 5294 }, { "epoch": 0.43, "grad_norm": 0.8088359146950926, "learning_rate": 1.2708766691205536e-05, "loss": 0.542, "step": 5295 }, { "epoch": 0.43, "grad_norm": 0.8453489931750089, "learning_rate": 1.270623254235648e-05, "loss": 0.4916, "step": 5296 }, { "epoch": 0.43, "grad_norm": 0.8287437918320928, "learning_rate": 1.2703698205967907e-05, "loss": 0.4938, "step": 5297 }, { "epoch": 0.43, "grad_norm": 1.0493865520195256, "learning_rate": 1.2701163682215447e-05, "loss": 0.5401, "step": 5298 }, { "epoch": 0.43, "grad_norm": 0.8536139904524773, "learning_rate": 1.2698628971274743e-05, "loss": 0.5139, "step": 5299 }, { "epoch": 0.43, "grad_norm": 0.89012269240296, "learning_rate": 1.269609407332144e-05, "loss": 0.5295, "step": 5300 }, { "epoch": 0.43, "grad_norm": 0.9291127537817483, "learning_rate": 1.2693558988531209e-05, "loss": 0.5729, "step": 5301 }, { "epoch": 0.43, "grad_norm": 0.9126294441308344, "learning_rate": 1.2691023717079735e-05, "loss": 0.5836, "step": 5302 }, { "epoch": 0.43, "grad_norm": 0.9426586461545383, "learning_rate": 1.26884882591427e-05, "loss": 0.5436, "step": 5303 }, { "epoch": 0.43, "grad_norm": 0.9323610742444876, "learning_rate": 1.2685952614895813e-05, "loss": 0.5832, "step": 5304 }, { "epoch": 0.43, "grad_norm": 0.8529103894723162, "learning_rate": 1.2683416784514796e-05, "loss": 0.6131, "step": 5305 }, { "epoch": 0.43, "grad_norm": 0.8558498414608312, "learning_rate": 1.2680880768175372e-05, "loss": 0.5317, "step": 5306 }, { "epoch": 0.43, "grad_norm": 0.8504492314757207, "learning_rate": 1.267834456605329e-05, "loss": 0.5051, "step": 5307 }, { "epoch": 0.43, "grad_norm": 0.8464138513156245, "learning_rate": 1.2675808178324305e-05, "loss": 0.4996, "step": 5308 }, { "epoch": 0.43, "grad_norm": 0.8419298654246816, "learning_rate": 1.2673271605164189e-05, "loss": 0.5133, "step": 5309 }, { "epoch": 0.43, "grad_norm": 0.9322900796698717, "learning_rate": 1.2670734846748717e-05, "loss": 0.5407, "step": 5310 }, { "epoch": 0.43, "grad_norm": 0.8430730919042455, "learning_rate": 1.2668197903253694e-05, "loss": 0.5238, "step": 5311 }, { "epoch": 0.43, "grad_norm": 0.9579902652026588, "learning_rate": 1.266566077485492e-05, "loss": 0.5807, "step": 5312 }, { "epoch": 0.43, "grad_norm": 0.9361797772639265, "learning_rate": 1.2663123461728219e-05, "loss": 0.5349, "step": 5313 }, { "epoch": 0.43, "grad_norm": 0.9216298671386612, "learning_rate": 1.2660585964049425e-05, "loss": 0.5433, "step": 5314 }, { "epoch": 0.43, "grad_norm": 0.895439295800641, "learning_rate": 1.2658048281994386e-05, "loss": 0.4908, "step": 5315 }, { "epoch": 0.43, "grad_norm": 0.8042110357612069, "learning_rate": 1.2655510415738954e-05, "loss": 0.459, "step": 5316 }, { "epoch": 0.43, "grad_norm": 0.9505226398868347, "learning_rate": 1.2652972365459008e-05, "loss": 0.5766, "step": 5317 }, { "epoch": 0.43, "grad_norm": 1.0448013742685371, "learning_rate": 1.2650434131330434e-05, "loss": 0.6055, "step": 5318 }, { "epoch": 0.43, "grad_norm": 0.9055457061171901, "learning_rate": 1.2647895713529119e-05, "loss": 0.4885, "step": 5319 }, { "epoch": 0.43, "grad_norm": 0.9932989807705419, "learning_rate": 1.2645357112230983e-05, "loss": 0.5372, "step": 5320 }, { "epoch": 0.43, "grad_norm": 0.8511979663476752, "learning_rate": 1.2642818327611947e-05, "loss": 0.4758, "step": 5321 }, { "epoch": 0.43, "grad_norm": 0.8423055609806502, "learning_rate": 1.2640279359847942e-05, "loss": 0.4833, "step": 5322 }, { "epoch": 0.43, "grad_norm": 0.909840172729127, "learning_rate": 1.2637740209114918e-05, "loss": 0.5762, "step": 5323 }, { "epoch": 0.43, "grad_norm": 0.8666441465222685, "learning_rate": 1.2635200875588843e-05, "loss": 0.5552, "step": 5324 }, { "epoch": 0.43, "grad_norm": 0.8048930938774788, "learning_rate": 1.2632661359445682e-05, "loss": 0.4874, "step": 5325 }, { "epoch": 0.43, "grad_norm": 0.8658373056426494, "learning_rate": 1.2630121660861421e-05, "loss": 0.5751, "step": 5326 }, { "epoch": 0.43, "grad_norm": 0.873960183814025, "learning_rate": 1.2627581780012066e-05, "loss": 0.5404, "step": 5327 }, { "epoch": 0.43, "grad_norm": 0.8694573208813933, "learning_rate": 1.2625041717073623e-05, "loss": 0.5388, "step": 5328 }, { "epoch": 0.43, "grad_norm": 0.8367705185934514, "learning_rate": 1.2622501472222116e-05, "loss": 0.4857, "step": 5329 }, { "epoch": 0.43, "grad_norm": 1.021461286000572, "learning_rate": 1.2619961045633584e-05, "loss": 0.5045, "step": 5330 }, { "epoch": 0.43, "grad_norm": 0.9150106164172199, "learning_rate": 1.2617420437484076e-05, "loss": 0.5622, "step": 5331 }, { "epoch": 0.43, "grad_norm": 0.9566331795578452, "learning_rate": 1.2614879647949652e-05, "loss": 0.5835, "step": 5332 }, { "epoch": 0.43, "grad_norm": 0.860789775611763, "learning_rate": 1.261233867720639e-05, "loss": 0.5326, "step": 5333 }, { "epoch": 0.43, "grad_norm": 0.8448972384896664, "learning_rate": 1.2609797525430374e-05, "loss": 0.4775, "step": 5334 }, { "epoch": 0.43, "grad_norm": 0.847748902308439, "learning_rate": 1.2607256192797702e-05, "loss": 0.5381, "step": 5335 }, { "epoch": 0.43, "grad_norm": 0.8675705368591691, "learning_rate": 1.260471467948449e-05, "loss": 0.4952, "step": 5336 }, { "epoch": 0.43, "grad_norm": 0.8005689733352995, "learning_rate": 1.2602172985666863e-05, "loss": 0.553, "step": 5337 }, { "epoch": 0.43, "grad_norm": 1.074567676112369, "learning_rate": 1.2599631111520956e-05, "loss": 0.4664, "step": 5338 }, { "epoch": 0.43, "grad_norm": 0.8500137656374482, "learning_rate": 1.2597089057222915e-05, "loss": 0.5165, "step": 5339 }, { "epoch": 0.43, "grad_norm": 0.9387099432086206, "learning_rate": 1.2594546822948909e-05, "loss": 0.549, "step": 5340 }, { "epoch": 0.43, "grad_norm": 0.8832865858693016, "learning_rate": 1.259200440887511e-05, "loss": 0.5153, "step": 5341 }, { "epoch": 0.43, "grad_norm": 0.8929926334581008, "learning_rate": 1.2589461815177702e-05, "loss": 0.4826, "step": 5342 }, { "epoch": 0.43, "grad_norm": 1.0259069016436662, "learning_rate": 1.2586919042032889e-05, "loss": 0.5732, "step": 5343 }, { "epoch": 0.43, "grad_norm": 1.0308709360387325, "learning_rate": 1.258437608961688e-05, "loss": 0.555, "step": 5344 }, { "epoch": 0.43, "grad_norm": 0.9248441205920206, "learning_rate": 1.2581832958105902e-05, "loss": 0.5604, "step": 5345 }, { "epoch": 0.43, "grad_norm": 1.0011778449614308, "learning_rate": 1.257928964767619e-05, "loss": 0.5902, "step": 5346 }, { "epoch": 0.43, "grad_norm": 0.9170889642768937, "learning_rate": 1.2576746158503992e-05, "loss": 0.5381, "step": 5347 }, { "epoch": 0.43, "grad_norm": 0.8797931764427254, "learning_rate": 1.257420249076557e-05, "loss": 0.5294, "step": 5348 }, { "epoch": 0.43, "grad_norm": 0.835885065953182, "learning_rate": 1.25716586446372e-05, "loss": 0.4715, "step": 5349 }, { "epoch": 0.43, "grad_norm": 0.8538701496862996, "learning_rate": 1.2569114620295166e-05, "loss": 0.4901, "step": 5350 }, { "epoch": 0.43, "grad_norm": 0.9695748609946032, "learning_rate": 1.2566570417915769e-05, "loss": 0.6038, "step": 5351 }, { "epoch": 0.43, "grad_norm": 0.9125029822928001, "learning_rate": 1.2564026037675317e-05, "loss": 0.5378, "step": 5352 }, { "epoch": 0.44, "grad_norm": 0.8841389114546176, "learning_rate": 1.2561481479750135e-05, "loss": 0.5214, "step": 5353 }, { "epoch": 0.44, "grad_norm": 0.9362021878627022, "learning_rate": 1.2558936744316561e-05, "loss": 0.5825, "step": 5354 }, { "epoch": 0.44, "grad_norm": 0.917462682069863, "learning_rate": 1.2556391831550938e-05, "loss": 0.5607, "step": 5355 }, { "epoch": 0.44, "grad_norm": 0.9527731739824842, "learning_rate": 1.255384674162963e-05, "loss": 0.5483, "step": 5356 }, { "epoch": 0.44, "grad_norm": 0.9007801694808039, "learning_rate": 1.2551301474729008e-05, "loss": 0.5005, "step": 5357 }, { "epoch": 0.44, "grad_norm": 0.8961448090023717, "learning_rate": 1.2548756031025455e-05, "loss": 0.5556, "step": 5358 }, { "epoch": 0.44, "grad_norm": 0.999360309272564, "learning_rate": 1.254621041069537e-05, "loss": 0.5881, "step": 5359 }, { "epoch": 0.44, "grad_norm": 0.8481061045768453, "learning_rate": 1.2543664613915165e-05, "loss": 0.4995, "step": 5360 }, { "epoch": 0.44, "grad_norm": 0.947731835355656, "learning_rate": 1.2541118640861255e-05, "loss": 0.539, "step": 5361 }, { "epoch": 0.44, "grad_norm": 0.8498245240875937, "learning_rate": 1.2538572491710079e-05, "loss": 0.5049, "step": 5362 }, { "epoch": 0.44, "grad_norm": 0.769815234398063, "learning_rate": 1.2536026166638082e-05, "loss": 0.4527, "step": 5363 }, { "epoch": 0.44, "grad_norm": 0.9020547480424845, "learning_rate": 1.2533479665821719e-05, "loss": 0.5664, "step": 5364 }, { "epoch": 0.44, "grad_norm": 0.9948695645265985, "learning_rate": 1.2530932989437463e-05, "loss": 0.5468, "step": 5365 }, { "epoch": 0.44, "grad_norm": 0.8738954752780324, "learning_rate": 1.2528386137661797e-05, "loss": 0.5222, "step": 5366 }, { "epoch": 0.44, "grad_norm": 0.9995522905076301, "learning_rate": 1.2525839110671212e-05, "loss": 0.6052, "step": 5367 }, { "epoch": 0.44, "grad_norm": 1.0371681322258024, "learning_rate": 1.2523291908642219e-05, "loss": 0.5546, "step": 5368 }, { "epoch": 0.44, "grad_norm": 0.8721378928964585, "learning_rate": 1.2520744531751334e-05, "loss": 0.51, "step": 5369 }, { "epoch": 0.44, "grad_norm": 0.8634386973360564, "learning_rate": 1.251819698017509e-05, "loss": 0.5024, "step": 5370 }, { "epoch": 0.44, "grad_norm": 0.9503165783798369, "learning_rate": 1.2515649254090025e-05, "loss": 0.5597, "step": 5371 }, { "epoch": 0.44, "grad_norm": 0.8720813638454793, "learning_rate": 1.2513101353672703e-05, "loss": 0.5894, "step": 5372 }, { "epoch": 0.44, "grad_norm": 0.9695051345654175, "learning_rate": 1.2510553279099684e-05, "loss": 0.5444, "step": 5373 }, { "epoch": 0.44, "grad_norm": 0.8972858005853297, "learning_rate": 1.250800503054755e-05, "loss": 0.556, "step": 5374 }, { "epoch": 0.44, "grad_norm": 0.8021491655858767, "learning_rate": 1.2505456608192889e-05, "loss": 0.5289, "step": 5375 }, { "epoch": 0.44, "grad_norm": 0.9734012631123989, "learning_rate": 1.2502908012212313e-05, "loss": 0.5637, "step": 5376 }, { "epoch": 0.44, "grad_norm": 0.9441552924743251, "learning_rate": 1.2500359242782429e-05, "loss": 0.6188, "step": 5377 }, { "epoch": 0.44, "grad_norm": 0.899529468266094, "learning_rate": 1.2497810300079866e-05, "loss": 0.5009, "step": 5378 }, { "epoch": 0.44, "grad_norm": 0.8728144939932249, "learning_rate": 1.249526118428127e-05, "loss": 0.5664, "step": 5379 }, { "epoch": 0.44, "grad_norm": 0.8267144567594343, "learning_rate": 1.2492711895563281e-05, "loss": 0.5369, "step": 5380 }, { "epoch": 0.44, "grad_norm": 0.873087845840797, "learning_rate": 1.249016243410257e-05, "loss": 0.5279, "step": 5381 }, { "epoch": 0.44, "grad_norm": 0.858248198037963, "learning_rate": 1.2487612800075814e-05, "loss": 0.5452, "step": 5382 }, { "epoch": 0.44, "grad_norm": 0.9861992051149057, "learning_rate": 1.2485062993659696e-05, "loss": 0.5358, "step": 5383 }, { "epoch": 0.44, "grad_norm": 0.8845727809369965, "learning_rate": 1.2482513015030915e-05, "loss": 0.4948, "step": 5384 }, { "epoch": 0.44, "grad_norm": 0.9039360878072947, "learning_rate": 1.2479962864366186e-05, "loss": 0.5673, "step": 5385 }, { "epoch": 0.44, "grad_norm": 0.8615365882769283, "learning_rate": 1.2477412541842231e-05, "loss": 0.5202, "step": 5386 }, { "epoch": 0.44, "grad_norm": 0.7911663726867374, "learning_rate": 1.247486204763578e-05, "loss": 0.4393, "step": 5387 }, { "epoch": 0.44, "grad_norm": 0.9019516455908042, "learning_rate": 1.247231138192359e-05, "loss": 0.5104, "step": 5388 }, { "epoch": 0.44, "grad_norm": 0.9517601590994649, "learning_rate": 1.246976054488241e-05, "loss": 0.5931, "step": 5389 }, { "epoch": 0.44, "grad_norm": 0.8783075827940406, "learning_rate": 1.2467209536689016e-05, "loss": 0.512, "step": 5390 }, { "epoch": 0.44, "grad_norm": 0.9801083493781189, "learning_rate": 1.2464658357520192e-05, "loss": 0.5946, "step": 5391 }, { "epoch": 0.44, "grad_norm": 0.9940969062462093, "learning_rate": 1.2462107007552726e-05, "loss": 0.6072, "step": 5392 }, { "epoch": 0.44, "grad_norm": 0.9240346316696333, "learning_rate": 1.2459555486963431e-05, "loss": 0.5583, "step": 5393 }, { "epoch": 0.44, "grad_norm": 0.9381190189854962, "learning_rate": 1.2457003795929121e-05, "loss": 0.6326, "step": 5394 }, { "epoch": 0.44, "grad_norm": 1.0090297957220815, "learning_rate": 1.2454451934626628e-05, "loss": 0.5168, "step": 5395 }, { "epoch": 0.44, "grad_norm": 0.833215564045747, "learning_rate": 1.2451899903232793e-05, "loss": 0.5483, "step": 5396 }, { "epoch": 0.44, "grad_norm": 0.9284695649603013, "learning_rate": 1.244934770192447e-05, "loss": 0.596, "step": 5397 }, { "epoch": 0.44, "grad_norm": 0.8845855956442529, "learning_rate": 1.2446795330878522e-05, "loss": 0.5107, "step": 5398 }, { "epoch": 0.44, "grad_norm": 1.002581956309397, "learning_rate": 1.244424279027183e-05, "loss": 0.493, "step": 5399 }, { "epoch": 0.44, "grad_norm": 0.8829187422959885, "learning_rate": 1.244169008028128e-05, "loss": 0.4969, "step": 5400 }, { "epoch": 0.44, "grad_norm": 0.9295869401823778, "learning_rate": 1.2439137201083772e-05, "loss": 0.5593, "step": 5401 }, { "epoch": 0.44, "grad_norm": 0.9731943915057911, "learning_rate": 1.243658415285622e-05, "loss": 0.5267, "step": 5402 }, { "epoch": 0.44, "grad_norm": 0.9063916835917486, "learning_rate": 1.243403093577555e-05, "loss": 0.5136, "step": 5403 }, { "epoch": 0.44, "grad_norm": 0.865307935840072, "learning_rate": 1.2431477550018691e-05, "loss": 0.5174, "step": 5404 }, { "epoch": 0.44, "grad_norm": 0.9523780286387016, "learning_rate": 1.2428923995762597e-05, "loss": 0.5624, "step": 5405 }, { "epoch": 0.44, "grad_norm": 0.9867060620553869, "learning_rate": 1.2426370273184226e-05, "loss": 0.5949, "step": 5406 }, { "epoch": 0.44, "grad_norm": 0.9363530752198639, "learning_rate": 1.2423816382460544e-05, "loss": 0.5715, "step": 5407 }, { "epoch": 0.44, "grad_norm": 0.8694286672573258, "learning_rate": 1.2421262323768537e-05, "loss": 0.5034, "step": 5408 }, { "epoch": 0.44, "grad_norm": 0.8299654192646883, "learning_rate": 1.2418708097285202e-05, "loss": 0.4707, "step": 5409 }, { "epoch": 0.44, "grad_norm": 0.9104100825566318, "learning_rate": 1.2416153703187537e-05, "loss": 0.5178, "step": 5410 }, { "epoch": 0.44, "grad_norm": 0.9194277608599328, "learning_rate": 1.2413599141652565e-05, "loss": 0.4901, "step": 5411 }, { "epoch": 0.44, "grad_norm": 0.9041818758818694, "learning_rate": 1.2411044412857317e-05, "loss": 0.54, "step": 5412 }, { "epoch": 0.44, "grad_norm": 1.016449033785623, "learning_rate": 1.2408489516978824e-05, "loss": 0.5814, "step": 5413 }, { "epoch": 0.44, "grad_norm": 0.9805254090857588, "learning_rate": 1.2405934454194146e-05, "loss": 0.5594, "step": 5414 }, { "epoch": 0.44, "grad_norm": 0.9510567274523963, "learning_rate": 1.2403379224680346e-05, "loss": 0.5203, "step": 5415 }, { "epoch": 0.44, "grad_norm": 0.9394727691546123, "learning_rate": 1.2400823828614495e-05, "loss": 0.5802, "step": 5416 }, { "epoch": 0.44, "grad_norm": 0.8411449916095526, "learning_rate": 1.2398268266173683e-05, "loss": 0.4941, "step": 5417 }, { "epoch": 0.44, "grad_norm": 0.975070662691036, "learning_rate": 1.239571253753501e-05, "loss": 0.5484, "step": 5418 }, { "epoch": 0.44, "grad_norm": 0.9109850157859793, "learning_rate": 1.2393156642875579e-05, "loss": 0.4947, "step": 5419 }, { "epoch": 0.44, "grad_norm": 0.8938766973316515, "learning_rate": 1.2390600582372517e-05, "loss": 0.5626, "step": 5420 }, { "epoch": 0.44, "grad_norm": 0.9179313851288792, "learning_rate": 1.2388044356202958e-05, "loss": 0.572, "step": 5421 }, { "epoch": 0.44, "grad_norm": 0.8698575621096661, "learning_rate": 1.2385487964544038e-05, "loss": 0.5427, "step": 5422 }, { "epoch": 0.44, "grad_norm": 0.8987838459695859, "learning_rate": 1.238293140757292e-05, "loss": 0.505, "step": 5423 }, { "epoch": 0.44, "grad_norm": 0.9269681022844656, "learning_rate": 1.2380374685466772e-05, "loss": 0.537, "step": 5424 }, { "epoch": 0.44, "grad_norm": 0.9014675995828588, "learning_rate": 1.2377817798402767e-05, "loss": 0.4887, "step": 5425 }, { "epoch": 0.44, "grad_norm": 0.9120011897073391, "learning_rate": 1.2375260746558098e-05, "loss": 0.5395, "step": 5426 }, { "epoch": 0.44, "grad_norm": 0.934333432226395, "learning_rate": 1.2372703530109967e-05, "loss": 0.5583, "step": 5427 }, { "epoch": 0.44, "grad_norm": 0.990985907641905, "learning_rate": 1.2370146149235585e-05, "loss": 0.5498, "step": 5428 }, { "epoch": 0.44, "grad_norm": 0.9623084732384362, "learning_rate": 1.2367588604112177e-05, "loss": 0.5172, "step": 5429 }, { "epoch": 0.44, "grad_norm": 0.838520536415113, "learning_rate": 1.236503089491698e-05, "loss": 0.5648, "step": 5430 }, { "epoch": 0.44, "grad_norm": 0.8518399557849459, "learning_rate": 1.236247302182724e-05, "loss": 0.5575, "step": 5431 }, { "epoch": 0.44, "grad_norm": 0.8947014247499556, "learning_rate": 1.2359914985020212e-05, "loss": 0.5625, "step": 5432 }, { "epoch": 0.44, "grad_norm": 0.9468389551013499, "learning_rate": 1.2357356784673171e-05, "loss": 0.6043, "step": 5433 }, { "epoch": 0.44, "grad_norm": 0.8053680832226762, "learning_rate": 1.2354798420963396e-05, "loss": 0.4899, "step": 5434 }, { "epoch": 0.44, "grad_norm": 0.9694037917733627, "learning_rate": 1.2352239894068179e-05, "loss": 0.6325, "step": 5435 }, { "epoch": 0.44, "grad_norm": 0.8632305139677751, "learning_rate": 1.2349681204164823e-05, "loss": 0.5214, "step": 5436 }, { "epoch": 0.44, "grad_norm": 0.8563381614510859, "learning_rate": 1.2347122351430645e-05, "loss": 0.528, "step": 5437 }, { "epoch": 0.44, "grad_norm": 0.8623448064496342, "learning_rate": 1.2344563336042967e-05, "loss": 0.5471, "step": 5438 }, { "epoch": 0.44, "grad_norm": 0.8587003201570016, "learning_rate": 1.2342004158179133e-05, "loss": 0.5409, "step": 5439 }, { "epoch": 0.44, "grad_norm": 0.8868847939450765, "learning_rate": 1.2339444818016488e-05, "loss": 0.5818, "step": 5440 }, { "epoch": 0.44, "grad_norm": 0.9258515474662156, "learning_rate": 1.233688531573239e-05, "loss": 0.4862, "step": 5441 }, { "epoch": 0.44, "grad_norm": 0.8677169571698846, "learning_rate": 1.2334325651504214e-05, "loss": 0.5151, "step": 5442 }, { "epoch": 0.44, "grad_norm": 0.870121746459327, "learning_rate": 1.233176582550934e-05, "loss": 0.5004, "step": 5443 }, { "epoch": 0.44, "grad_norm": 0.8453323949166696, "learning_rate": 1.2329205837925162e-05, "loss": 0.4571, "step": 5444 }, { "epoch": 0.44, "grad_norm": 0.8392697669827083, "learning_rate": 1.2326645688929087e-05, "loss": 0.4784, "step": 5445 }, { "epoch": 0.44, "grad_norm": 0.8743550979582336, "learning_rate": 1.2324085378698529e-05, "loss": 0.5231, "step": 5446 }, { "epoch": 0.44, "grad_norm": 0.8375402911183744, "learning_rate": 1.2321524907410916e-05, "loss": 0.5185, "step": 5447 }, { "epoch": 0.44, "grad_norm": 0.90586681912078, "learning_rate": 1.2318964275243683e-05, "loss": 0.5223, "step": 5448 }, { "epoch": 0.44, "grad_norm": 0.9029561290748873, "learning_rate": 1.2316403482374289e-05, "loss": 0.5531, "step": 5449 }, { "epoch": 0.44, "grad_norm": 0.9179206919093462, "learning_rate": 1.2313842528980184e-05, "loss": 0.5461, "step": 5450 }, { "epoch": 0.44, "grad_norm": 0.9248238397243361, "learning_rate": 1.2311281415238842e-05, "loss": 0.5494, "step": 5451 }, { "epoch": 0.44, "grad_norm": 0.8941957605560482, "learning_rate": 1.2308720141327753e-05, "loss": 0.5751, "step": 5452 }, { "epoch": 0.44, "grad_norm": 0.8724227657559777, "learning_rate": 1.2306158707424402e-05, "loss": 0.4947, "step": 5453 }, { "epoch": 0.44, "grad_norm": 0.9910439980193736, "learning_rate": 1.2303597113706301e-05, "loss": 0.5735, "step": 5454 }, { "epoch": 0.44, "grad_norm": 0.764935426291101, "learning_rate": 1.2301035360350964e-05, "loss": 0.4984, "step": 5455 }, { "epoch": 0.44, "grad_norm": 0.9037221761184959, "learning_rate": 1.2298473447535914e-05, "loss": 0.503, "step": 5456 }, { "epoch": 0.44, "grad_norm": 0.8955191193698375, "learning_rate": 1.2295911375438694e-05, "loss": 0.5683, "step": 5457 }, { "epoch": 0.44, "grad_norm": 0.8596766302284218, "learning_rate": 1.2293349144236855e-05, "loss": 0.5369, "step": 5458 }, { "epoch": 0.44, "grad_norm": 0.9915006897890639, "learning_rate": 1.229078675410795e-05, "loss": 0.5778, "step": 5459 }, { "epoch": 0.44, "grad_norm": 0.8705018308535379, "learning_rate": 1.2288224205229557e-05, "loss": 0.4935, "step": 5460 }, { "epoch": 0.44, "grad_norm": 0.8800565338043739, "learning_rate": 1.228566149777926e-05, "loss": 0.5087, "step": 5461 }, { "epoch": 0.44, "grad_norm": 0.9986894910833827, "learning_rate": 1.2283098631934642e-05, "loss": 0.5404, "step": 5462 }, { "epoch": 0.44, "grad_norm": 0.8912962051173339, "learning_rate": 1.2280535607873318e-05, "loss": 0.5032, "step": 5463 }, { "epoch": 0.44, "grad_norm": 0.8038436091058793, "learning_rate": 1.22779724257729e-05, "loss": 0.5227, "step": 5464 }, { "epoch": 0.44, "grad_norm": 0.938614221484701, "learning_rate": 1.227540908581101e-05, "loss": 0.5578, "step": 5465 }, { "epoch": 0.44, "grad_norm": 0.8750121548910289, "learning_rate": 1.227284558816529e-05, "loss": 0.5488, "step": 5466 }, { "epoch": 0.44, "grad_norm": 0.9057698650290454, "learning_rate": 1.2270281933013388e-05, "loss": 0.5608, "step": 5467 }, { "epoch": 0.44, "grad_norm": 0.8328101242823822, "learning_rate": 1.2267718120532958e-05, "loss": 0.5489, "step": 5468 }, { "epoch": 0.44, "grad_norm": 0.9559423602996424, "learning_rate": 1.2265154150901677e-05, "loss": 0.5267, "step": 5469 }, { "epoch": 0.44, "grad_norm": 1.0768815635170421, "learning_rate": 1.2262590024297226e-05, "loss": 0.576, "step": 5470 }, { "epoch": 0.44, "grad_norm": 1.017951933254484, "learning_rate": 1.2260025740897286e-05, "loss": 0.5198, "step": 5471 }, { "epoch": 0.44, "grad_norm": 0.9203291906948043, "learning_rate": 1.225746130087957e-05, "loss": 0.5603, "step": 5472 }, { "epoch": 0.44, "grad_norm": 1.0045960023358238, "learning_rate": 1.2254896704421789e-05, "loss": 0.5984, "step": 5473 }, { "epoch": 0.44, "grad_norm": 0.9114512728903359, "learning_rate": 1.2252331951701665e-05, "loss": 0.5265, "step": 5474 }, { "epoch": 0.44, "grad_norm": 0.8752647547925406, "learning_rate": 1.2249767042896934e-05, "loss": 0.5302, "step": 5475 }, { "epoch": 0.45, "grad_norm": 0.8801933966164635, "learning_rate": 1.2247201978185346e-05, "loss": 0.5267, "step": 5476 }, { "epoch": 0.45, "grad_norm": 0.992872988262125, "learning_rate": 1.224463675774465e-05, "loss": 0.5711, "step": 5477 }, { "epoch": 0.45, "grad_norm": 0.9604426674753421, "learning_rate": 1.224207138175262e-05, "loss": 0.5276, "step": 5478 }, { "epoch": 0.45, "grad_norm": 0.8640035052898114, "learning_rate": 1.2239505850387032e-05, "loss": 0.618, "step": 5479 }, { "epoch": 0.45, "grad_norm": 0.9810432720165322, "learning_rate": 1.2236940163825675e-05, "loss": 0.5615, "step": 5480 }, { "epoch": 0.45, "grad_norm": 0.8961399600071736, "learning_rate": 1.2234374322246348e-05, "loss": 0.5542, "step": 5481 }, { "epoch": 0.45, "grad_norm": 0.879129224347014, "learning_rate": 1.2231808325826862e-05, "loss": 0.548, "step": 5482 }, { "epoch": 0.45, "grad_norm": 0.8514413978104286, "learning_rate": 1.222924217474504e-05, "loss": 0.5043, "step": 5483 }, { "epoch": 0.45, "grad_norm": 1.0162949354532185, "learning_rate": 1.2226675869178713e-05, "loss": 0.6186, "step": 5484 }, { "epoch": 0.45, "grad_norm": 0.9010809840353643, "learning_rate": 1.222410940930572e-05, "loss": 0.4572, "step": 5485 }, { "epoch": 0.45, "grad_norm": 0.9070348230842781, "learning_rate": 1.2221542795303921e-05, "loss": 0.5338, "step": 5486 }, { "epoch": 0.45, "grad_norm": 0.905376480185276, "learning_rate": 1.2218976027351177e-05, "loss": 0.4919, "step": 5487 }, { "epoch": 0.45, "grad_norm": 0.9351701239895585, "learning_rate": 1.221640910562536e-05, "loss": 0.5618, "step": 5488 }, { "epoch": 0.45, "grad_norm": 0.87105543303774, "learning_rate": 1.2213842030304358e-05, "loss": 0.5298, "step": 5489 }, { "epoch": 0.45, "grad_norm": 1.0073370084141626, "learning_rate": 1.221127480156607e-05, "loss": 0.5373, "step": 5490 }, { "epoch": 0.45, "grad_norm": 0.877935805226189, "learning_rate": 1.2208707419588397e-05, "loss": 0.5437, "step": 5491 }, { "epoch": 0.45, "grad_norm": 0.9243438906000147, "learning_rate": 1.220613988454926e-05, "loss": 0.5324, "step": 5492 }, { "epoch": 0.45, "grad_norm": 0.9477932541999349, "learning_rate": 1.2203572196626587e-05, "loss": 0.5556, "step": 5493 }, { "epoch": 0.45, "grad_norm": 0.9184085706505207, "learning_rate": 1.2201004355998312e-05, "loss": 0.5626, "step": 5494 }, { "epoch": 0.45, "grad_norm": 0.8732092279319581, "learning_rate": 1.2198436362842389e-05, "loss": 0.5584, "step": 5495 }, { "epoch": 0.45, "grad_norm": 0.8818231916647266, "learning_rate": 1.2195868217336778e-05, "loss": 0.4974, "step": 5496 }, { "epoch": 0.45, "grad_norm": 0.9253509177877544, "learning_rate": 1.2193299919659444e-05, "loss": 0.507, "step": 5497 }, { "epoch": 0.45, "grad_norm": 0.9976868248539731, "learning_rate": 1.2190731469988372e-05, "loss": 0.6117, "step": 5498 }, { "epoch": 0.45, "grad_norm": 0.8668678379356317, "learning_rate": 1.2188162868501557e-05, "loss": 0.5028, "step": 5499 }, { "epoch": 0.45, "grad_norm": 0.9165726909760753, "learning_rate": 1.2185594115376991e-05, "loss": 0.5579, "step": 5500 }, { "epoch": 0.45, "grad_norm": 0.9154946875925669, "learning_rate": 1.2183025210792692e-05, "loss": 0.5251, "step": 5501 }, { "epoch": 0.45, "grad_norm": 0.8404918378432502, "learning_rate": 1.218045615492668e-05, "loss": 0.5286, "step": 5502 }, { "epoch": 0.45, "grad_norm": 0.8712436361695308, "learning_rate": 1.2177886947956997e-05, "loss": 0.5304, "step": 5503 }, { "epoch": 0.45, "grad_norm": 0.8286927110237194, "learning_rate": 1.2175317590061676e-05, "loss": 0.5094, "step": 5504 }, { "epoch": 0.45, "grad_norm": 0.866565081858643, "learning_rate": 1.2172748081418775e-05, "loss": 0.5399, "step": 5505 }, { "epoch": 0.45, "grad_norm": 0.9016013035316106, "learning_rate": 1.2170178422206362e-05, "loss": 0.5273, "step": 5506 }, { "epoch": 0.45, "grad_norm": 0.8433212719566922, "learning_rate": 1.2167608612602507e-05, "loss": 0.4995, "step": 5507 }, { "epoch": 0.45, "grad_norm": 0.8611469548274269, "learning_rate": 1.2165038652785297e-05, "loss": 0.5434, "step": 5508 }, { "epoch": 0.45, "grad_norm": 0.9387736783283651, "learning_rate": 1.2162468542932832e-05, "loss": 0.5529, "step": 5509 }, { "epoch": 0.45, "grad_norm": 0.7884933758412176, "learning_rate": 1.2159898283223213e-05, "loss": 0.4898, "step": 5510 }, { "epoch": 0.45, "grad_norm": 0.8797535779005713, "learning_rate": 1.2157327873834559e-05, "loss": 0.5357, "step": 5511 }, { "epoch": 0.45, "grad_norm": 0.7845933710931979, "learning_rate": 1.2154757314944997e-05, "loss": 0.4465, "step": 5512 }, { "epoch": 0.45, "grad_norm": 0.8716534801191331, "learning_rate": 1.2152186606732665e-05, "loss": 0.4596, "step": 5513 }, { "epoch": 0.45, "grad_norm": 0.8806006921198266, "learning_rate": 1.2149615749375707e-05, "loss": 0.5238, "step": 5514 }, { "epoch": 0.45, "grad_norm": 0.9519784857876017, "learning_rate": 1.2147044743052288e-05, "loss": 0.5458, "step": 5515 }, { "epoch": 0.45, "grad_norm": 0.8809363718552123, "learning_rate": 1.2144473587940573e-05, "loss": 0.5031, "step": 5516 }, { "epoch": 0.45, "grad_norm": 0.9694116837326615, "learning_rate": 1.2141902284218738e-05, "loss": 0.4856, "step": 5517 }, { "epoch": 0.45, "grad_norm": 1.005998546729219, "learning_rate": 1.2139330832064975e-05, "loss": 0.5647, "step": 5518 }, { "epoch": 0.45, "grad_norm": 0.9363523110506934, "learning_rate": 1.2136759231657485e-05, "loss": 0.514, "step": 5519 }, { "epoch": 0.45, "grad_norm": 0.8834684878319838, "learning_rate": 1.2134187483174474e-05, "loss": 0.5651, "step": 5520 }, { "epoch": 0.45, "grad_norm": 0.9199792993437208, "learning_rate": 1.2131615586794162e-05, "loss": 0.5119, "step": 5521 }, { "epoch": 0.45, "grad_norm": 0.9083253044159368, "learning_rate": 1.2129043542694783e-05, "loss": 0.5347, "step": 5522 }, { "epoch": 0.45, "grad_norm": 0.9079207141814368, "learning_rate": 1.2126471351054574e-05, "loss": 0.5234, "step": 5523 }, { "epoch": 0.45, "grad_norm": 0.8996213537502855, "learning_rate": 1.2123899012051785e-05, "loss": 0.5154, "step": 5524 }, { "epoch": 0.45, "grad_norm": 0.8167759767195947, "learning_rate": 1.212132652586468e-05, "loss": 0.5262, "step": 5525 }, { "epoch": 0.45, "grad_norm": 0.8599043700048763, "learning_rate": 1.211875389267153e-05, "loss": 0.5309, "step": 5526 }, { "epoch": 0.45, "grad_norm": 0.8820657466473959, "learning_rate": 1.211618111265061e-05, "loss": 0.4873, "step": 5527 }, { "epoch": 0.45, "grad_norm": 0.8665427619337003, "learning_rate": 1.2113608185980221e-05, "loss": 0.5374, "step": 5528 }, { "epoch": 0.45, "grad_norm": 0.8908908247445878, "learning_rate": 1.2111035112838657e-05, "loss": 0.4539, "step": 5529 }, { "epoch": 0.45, "grad_norm": 0.8837991806660707, "learning_rate": 1.2108461893404231e-05, "loss": 0.5449, "step": 5530 }, { "epoch": 0.45, "grad_norm": 0.9001160427372054, "learning_rate": 1.210588852785527e-05, "loss": 0.608, "step": 5531 }, { "epoch": 0.45, "grad_norm": 0.9406825613037999, "learning_rate": 1.2103315016370098e-05, "loss": 0.5248, "step": 5532 }, { "epoch": 0.45, "grad_norm": 0.9492003060158064, "learning_rate": 1.2100741359127062e-05, "loss": 0.6211, "step": 5533 }, { "epoch": 0.45, "grad_norm": 0.8165428959386474, "learning_rate": 1.2098167556304514e-05, "loss": 0.5004, "step": 5534 }, { "epoch": 0.45, "grad_norm": 0.9547917538163686, "learning_rate": 1.2095593608080815e-05, "loss": 0.5788, "step": 5535 }, { "epoch": 0.45, "grad_norm": 0.8694750633823777, "learning_rate": 1.2093019514634337e-05, "loss": 0.5144, "step": 5536 }, { "epoch": 0.45, "grad_norm": 0.8783697437957985, "learning_rate": 1.2090445276143466e-05, "loss": 0.4806, "step": 5537 }, { "epoch": 0.45, "grad_norm": 0.8783928877709485, "learning_rate": 1.2087870892786588e-05, "loss": 0.5376, "step": 5538 }, { "epoch": 0.45, "grad_norm": 0.949269064846333, "learning_rate": 1.208529636474211e-05, "loss": 0.5852, "step": 5539 }, { "epoch": 0.45, "grad_norm": 0.9646333868024378, "learning_rate": 1.2082721692188446e-05, "loss": 0.5591, "step": 5540 }, { "epoch": 0.45, "grad_norm": 0.901401626566398, "learning_rate": 1.2080146875304012e-05, "loss": 0.5857, "step": 5541 }, { "epoch": 0.45, "grad_norm": 0.8454219718524448, "learning_rate": 1.2077571914267248e-05, "loss": 0.5336, "step": 5542 }, { "epoch": 0.45, "grad_norm": 0.8808345197786775, "learning_rate": 1.2074996809256594e-05, "loss": 0.5246, "step": 5543 }, { "epoch": 0.45, "grad_norm": 0.8775532655927482, "learning_rate": 1.2072421560450497e-05, "loss": 0.5933, "step": 5544 }, { "epoch": 0.45, "grad_norm": 0.9150474033054725, "learning_rate": 1.2069846168027427e-05, "loss": 0.5064, "step": 5545 }, { "epoch": 0.45, "grad_norm": 0.9070112198834746, "learning_rate": 1.2067270632165856e-05, "loss": 0.5646, "step": 5546 }, { "epoch": 0.45, "grad_norm": 0.8538367633178499, "learning_rate": 1.2064694953044259e-05, "loss": 0.5207, "step": 5547 }, { "epoch": 0.45, "grad_norm": 0.9279054018383129, "learning_rate": 1.2062119130841135e-05, "loss": 0.6147, "step": 5548 }, { "epoch": 0.45, "grad_norm": 1.0040818800527798, "learning_rate": 1.2059543165734986e-05, "loss": 0.6258, "step": 5549 }, { "epoch": 0.45, "grad_norm": 0.9316998246149493, "learning_rate": 1.2056967057904319e-05, "loss": 0.5779, "step": 5550 }, { "epoch": 0.45, "grad_norm": 0.9190557347896983, "learning_rate": 1.2054390807527661e-05, "loss": 0.5837, "step": 5551 }, { "epoch": 0.45, "grad_norm": 0.9606163824300413, "learning_rate": 1.2051814414783544e-05, "loss": 0.501, "step": 5552 }, { "epoch": 0.45, "grad_norm": 0.9640744408170316, "learning_rate": 1.2049237879850506e-05, "loss": 0.535, "step": 5553 }, { "epoch": 0.45, "grad_norm": 0.8788815642734494, "learning_rate": 1.2046661202907101e-05, "loss": 0.4897, "step": 5554 }, { "epoch": 0.45, "grad_norm": 0.7727154304321403, "learning_rate": 1.2044084384131891e-05, "loss": 0.5226, "step": 5555 }, { "epoch": 0.45, "grad_norm": 1.0129556356194376, "learning_rate": 1.2041507423703445e-05, "loss": 0.6603, "step": 5556 }, { "epoch": 0.45, "grad_norm": 0.9331842751781539, "learning_rate": 1.2038930321800346e-05, "loss": 0.5507, "step": 5557 }, { "epoch": 0.45, "grad_norm": 0.9206369409583717, "learning_rate": 1.2036353078601187e-05, "loss": 0.5849, "step": 5558 }, { "epoch": 0.45, "grad_norm": 0.9268985407098875, "learning_rate": 1.2033775694284562e-05, "loss": 0.5296, "step": 5559 }, { "epoch": 0.45, "grad_norm": 0.8851565422818308, "learning_rate": 1.2031198169029084e-05, "loss": 0.5511, "step": 5560 }, { "epoch": 0.45, "grad_norm": 0.7522171063517573, "learning_rate": 1.2028620503013377e-05, "loss": 0.5037, "step": 5561 }, { "epoch": 0.45, "grad_norm": 0.9237884426494588, "learning_rate": 1.2026042696416069e-05, "loss": 0.4642, "step": 5562 }, { "epoch": 0.45, "grad_norm": 0.8747303264360102, "learning_rate": 1.20234647494158e-05, "loss": 0.5071, "step": 5563 }, { "epoch": 0.45, "grad_norm": 0.9304691768041673, "learning_rate": 1.2020886662191216e-05, "loss": 0.5432, "step": 5564 }, { "epoch": 0.45, "grad_norm": 0.983912008294001, "learning_rate": 1.2018308434920983e-05, "loss": 0.628, "step": 5565 }, { "epoch": 0.45, "grad_norm": 0.908796074695498, "learning_rate": 1.201573006778376e-05, "loss": 0.5621, "step": 5566 }, { "epoch": 0.45, "grad_norm": 0.9866490956856787, "learning_rate": 1.2013151560958233e-05, "loss": 0.5905, "step": 5567 }, { "epoch": 0.45, "grad_norm": 0.958082254827949, "learning_rate": 1.2010572914623091e-05, "loss": 0.5013, "step": 5568 }, { "epoch": 0.45, "grad_norm": 0.9013354679959612, "learning_rate": 1.2007994128957029e-05, "loss": 0.4822, "step": 5569 }, { "epoch": 0.45, "grad_norm": 0.8861126057246507, "learning_rate": 1.2005415204138753e-05, "loss": 0.5551, "step": 5570 }, { "epoch": 0.45, "grad_norm": 0.8977421349365613, "learning_rate": 1.2002836140346984e-05, "loss": 0.5817, "step": 5571 }, { "epoch": 0.45, "grad_norm": 0.9163167902421183, "learning_rate": 1.2000256937760446e-05, "loss": 0.5701, "step": 5572 }, { "epoch": 0.45, "grad_norm": 0.8061350723006823, "learning_rate": 1.1997677596557875e-05, "loss": 0.4702, "step": 5573 }, { "epoch": 0.45, "grad_norm": 0.8525210977763026, "learning_rate": 1.1995098116918022e-05, "loss": 0.4862, "step": 5574 }, { "epoch": 0.45, "grad_norm": 0.9739495505934435, "learning_rate": 1.1992518499019637e-05, "loss": 0.5159, "step": 5575 }, { "epoch": 0.45, "grad_norm": 0.8864361775922159, "learning_rate": 1.1989938743041487e-05, "loss": 0.5461, "step": 5576 }, { "epoch": 0.45, "grad_norm": 1.004216155887953, "learning_rate": 1.1987358849162349e-05, "loss": 0.5517, "step": 5577 }, { "epoch": 0.45, "grad_norm": 0.9796249965818342, "learning_rate": 1.1984778817561002e-05, "loss": 0.5026, "step": 5578 }, { "epoch": 0.45, "grad_norm": 0.8592763662836835, "learning_rate": 1.1982198648416245e-05, "loss": 0.5431, "step": 5579 }, { "epoch": 0.45, "grad_norm": 0.8886989571121534, "learning_rate": 1.1979618341906884e-05, "loss": 0.5394, "step": 5580 }, { "epoch": 0.45, "grad_norm": 0.9863795713903801, "learning_rate": 1.1977037898211723e-05, "loss": 0.5857, "step": 5581 }, { "epoch": 0.45, "grad_norm": 0.8654981222560337, "learning_rate": 1.1974457317509591e-05, "loss": 0.5435, "step": 5582 }, { "epoch": 0.45, "grad_norm": 0.8571963875092631, "learning_rate": 1.197187659997932e-05, "loss": 0.5196, "step": 5583 }, { "epoch": 0.45, "grad_norm": 0.9245798277364339, "learning_rate": 1.1969295745799746e-05, "loss": 0.548, "step": 5584 }, { "epoch": 0.45, "grad_norm": 0.8977120983327718, "learning_rate": 1.1966714755149724e-05, "loss": 0.5561, "step": 5585 }, { "epoch": 0.45, "grad_norm": 0.911986785371461, "learning_rate": 1.1964133628208116e-05, "loss": 0.5717, "step": 5586 }, { "epoch": 0.45, "grad_norm": 0.8219590372461145, "learning_rate": 1.196155236515379e-05, "loss": 0.4861, "step": 5587 }, { "epoch": 0.45, "grad_norm": 0.8681933932768588, "learning_rate": 1.1958970966165622e-05, "loss": 0.512, "step": 5588 }, { "epoch": 0.45, "grad_norm": 1.0060914497630464, "learning_rate": 1.1956389431422508e-05, "loss": 0.6282, "step": 5589 }, { "epoch": 0.45, "grad_norm": 1.028934286984717, "learning_rate": 1.1953807761103338e-05, "loss": 0.5839, "step": 5590 }, { "epoch": 0.45, "grad_norm": 0.8952935651557266, "learning_rate": 1.1951225955387025e-05, "loss": 0.5875, "step": 5591 }, { "epoch": 0.45, "grad_norm": 1.1605839344131503, "learning_rate": 1.1948644014452484e-05, "loss": 0.538, "step": 5592 }, { "epoch": 0.45, "grad_norm": 0.9642560605768177, "learning_rate": 1.1946061938478638e-05, "loss": 0.5382, "step": 5593 }, { "epoch": 0.45, "grad_norm": 0.9704106218133949, "learning_rate": 1.1943479727644429e-05, "loss": 0.5499, "step": 5594 }, { "epoch": 0.45, "grad_norm": 0.940747211343841, "learning_rate": 1.19408973821288e-05, "loss": 0.5168, "step": 5595 }, { "epoch": 0.45, "grad_norm": 0.9471035641500342, "learning_rate": 1.1938314902110701e-05, "loss": 0.5431, "step": 5596 }, { "epoch": 0.45, "grad_norm": 0.8100368901211942, "learning_rate": 1.1935732287769099e-05, "loss": 0.5276, "step": 5597 }, { "epoch": 0.45, "grad_norm": 0.9216492266864472, "learning_rate": 1.193314953928297e-05, "loss": 0.5553, "step": 5598 }, { "epoch": 0.46, "grad_norm": 0.9298432323689065, "learning_rate": 1.1930566656831288e-05, "loss": 0.5584, "step": 5599 }, { "epoch": 0.46, "grad_norm": 0.9101378356734033, "learning_rate": 1.1927983640593053e-05, "loss": 0.4693, "step": 5600 }, { "epoch": 0.46, "grad_norm": 0.854826184502737, "learning_rate": 1.192540049074726e-05, "loss": 0.5558, "step": 5601 }, { "epoch": 0.46, "grad_norm": 1.029647115535759, "learning_rate": 1.1922817207472921e-05, "loss": 0.5951, "step": 5602 }, { "epoch": 0.46, "grad_norm": 0.8843012858921222, "learning_rate": 1.1920233790949051e-05, "loss": 0.5116, "step": 5603 }, { "epoch": 0.46, "grad_norm": 0.8733868563514845, "learning_rate": 1.191765024135469e-05, "loss": 0.5402, "step": 5604 }, { "epoch": 0.46, "grad_norm": 0.8751251911535606, "learning_rate": 1.1915066558868865e-05, "loss": 0.4786, "step": 5605 }, { "epoch": 0.46, "grad_norm": 0.8603668761491104, "learning_rate": 1.1912482743670624e-05, "loss": 0.4809, "step": 5606 }, { "epoch": 0.46, "grad_norm": 0.8454520161691437, "learning_rate": 1.1909898795939028e-05, "loss": 0.5249, "step": 5607 }, { "epoch": 0.46, "grad_norm": 0.9231626415447299, "learning_rate": 1.1907314715853138e-05, "loss": 0.4975, "step": 5608 }, { "epoch": 0.46, "grad_norm": 0.9305487251057392, "learning_rate": 1.190473050359203e-05, "loss": 0.5702, "step": 5609 }, { "epoch": 0.46, "grad_norm": 0.9161739295244067, "learning_rate": 1.1902146159334788e-05, "loss": 0.5287, "step": 5610 }, { "epoch": 0.46, "grad_norm": 0.9175136570243649, "learning_rate": 1.1899561683260506e-05, "loss": 0.5298, "step": 5611 }, { "epoch": 0.46, "grad_norm": 0.9511351394030586, "learning_rate": 1.189697707554828e-05, "loss": 0.4931, "step": 5612 }, { "epoch": 0.46, "grad_norm": 0.9041144006702436, "learning_rate": 1.189439233637723e-05, "loss": 0.5236, "step": 5613 }, { "epoch": 0.46, "grad_norm": 0.9583396369583367, "learning_rate": 1.1891807465926467e-05, "loss": 0.5616, "step": 5614 }, { "epoch": 0.46, "grad_norm": 0.8930178278090726, "learning_rate": 1.1889222464375127e-05, "loss": 0.5447, "step": 5615 }, { "epoch": 0.46, "grad_norm": 0.8980531194959175, "learning_rate": 1.1886637331902349e-05, "loss": 0.4901, "step": 5616 }, { "epoch": 0.46, "grad_norm": 0.8984989642229662, "learning_rate": 1.1884052068687273e-05, "loss": 0.5205, "step": 5617 }, { "epoch": 0.46, "grad_norm": 0.9111022679999827, "learning_rate": 1.188146667490906e-05, "loss": 0.5738, "step": 5618 }, { "epoch": 0.46, "grad_norm": 0.9693879175100667, "learning_rate": 1.1878881150746878e-05, "loss": 0.5605, "step": 5619 }, { "epoch": 0.46, "grad_norm": 0.8351788339485661, "learning_rate": 1.1876295496379894e-05, "loss": 0.5249, "step": 5620 }, { "epoch": 0.46, "grad_norm": 0.8617744689704865, "learning_rate": 1.18737097119873e-05, "loss": 0.5436, "step": 5621 }, { "epoch": 0.46, "grad_norm": 0.8774024363332183, "learning_rate": 1.1871123797748285e-05, "loss": 0.5631, "step": 5622 }, { "epoch": 0.46, "grad_norm": 0.8808367572990808, "learning_rate": 1.1868537753842052e-05, "loss": 0.515, "step": 5623 }, { "epoch": 0.46, "grad_norm": 0.9095122786929855, "learning_rate": 1.1865951580447805e-05, "loss": 0.5841, "step": 5624 }, { "epoch": 0.46, "grad_norm": 0.8600399569527147, "learning_rate": 1.1863365277744771e-05, "loss": 0.5722, "step": 5625 }, { "epoch": 0.46, "grad_norm": 1.0620943004279169, "learning_rate": 1.1860778845912177e-05, "loss": 0.6108, "step": 5626 }, { "epoch": 0.46, "grad_norm": 0.9001555709351374, "learning_rate": 1.185819228512926e-05, "loss": 0.5784, "step": 5627 }, { "epoch": 0.46, "grad_norm": 0.8901095149384635, "learning_rate": 1.1855605595575263e-05, "loss": 0.5315, "step": 5628 }, { "epoch": 0.46, "grad_norm": 0.8751412997045999, "learning_rate": 1.1853018777429449e-05, "loss": 0.5046, "step": 5629 }, { "epoch": 0.46, "grad_norm": 0.9685659253082625, "learning_rate": 1.1850431830871075e-05, "loss": 0.6441, "step": 5630 }, { "epoch": 0.46, "grad_norm": 0.8627187869655145, "learning_rate": 1.1847844756079414e-05, "loss": 0.4905, "step": 5631 }, { "epoch": 0.46, "grad_norm": 0.9491405505682985, "learning_rate": 1.1845257553233753e-05, "loss": 0.4991, "step": 5632 }, { "epoch": 0.46, "grad_norm": 0.8990900153819141, "learning_rate": 1.1842670222513379e-05, "loss": 0.5544, "step": 5633 }, { "epoch": 0.46, "grad_norm": 0.8231690363328513, "learning_rate": 1.1840082764097593e-05, "loss": 0.5093, "step": 5634 }, { "epoch": 0.46, "grad_norm": 0.8340209505703706, "learning_rate": 1.1837495178165706e-05, "loss": 0.5191, "step": 5635 }, { "epoch": 0.46, "grad_norm": 0.8824202838144878, "learning_rate": 1.183490746489703e-05, "loss": 0.5046, "step": 5636 }, { "epoch": 0.46, "grad_norm": 0.9022132591342349, "learning_rate": 1.1832319624470895e-05, "loss": 0.5241, "step": 5637 }, { "epoch": 0.46, "grad_norm": 0.8615562704331575, "learning_rate": 1.1829731657066638e-05, "loss": 0.5016, "step": 5638 }, { "epoch": 0.46, "grad_norm": 0.8257423829771974, "learning_rate": 1.1827143562863597e-05, "loss": 0.526, "step": 5639 }, { "epoch": 0.46, "grad_norm": 0.9633143307811811, "learning_rate": 1.1824555342041129e-05, "loss": 0.5566, "step": 5640 }, { "epoch": 0.46, "grad_norm": 0.8663038774802817, "learning_rate": 1.1821966994778594e-05, "loss": 0.4996, "step": 5641 }, { "epoch": 0.46, "grad_norm": 0.7574394729770879, "learning_rate": 1.1819378521255362e-05, "loss": 0.4978, "step": 5642 }, { "epoch": 0.46, "grad_norm": 0.874450535779544, "learning_rate": 1.181678992165081e-05, "loss": 0.5369, "step": 5643 }, { "epoch": 0.46, "grad_norm": 0.8685484962413709, "learning_rate": 1.1814201196144332e-05, "loss": 0.5223, "step": 5644 }, { "epoch": 0.46, "grad_norm": 0.9004285750051082, "learning_rate": 1.181161234491532e-05, "loss": 0.5838, "step": 5645 }, { "epoch": 0.46, "grad_norm": 0.8438898141326269, "learning_rate": 1.1809023368143178e-05, "loss": 0.5334, "step": 5646 }, { "epoch": 0.46, "grad_norm": 0.9248851968313923, "learning_rate": 1.180643426600732e-05, "loss": 0.5461, "step": 5647 }, { "epoch": 0.46, "grad_norm": 0.937478179130208, "learning_rate": 1.1803845038687171e-05, "loss": 0.5556, "step": 5648 }, { "epoch": 0.46, "grad_norm": 0.8211910043207973, "learning_rate": 1.1801255686362161e-05, "loss": 0.4998, "step": 5649 }, { "epoch": 0.46, "grad_norm": 0.9253745602627443, "learning_rate": 1.1798666209211729e-05, "loss": 0.5692, "step": 5650 }, { "epoch": 0.46, "grad_norm": 0.8931107878011122, "learning_rate": 1.1796076607415324e-05, "loss": 0.5769, "step": 5651 }, { "epoch": 0.46, "grad_norm": 1.032722335210612, "learning_rate": 1.1793486881152405e-05, "loss": 0.6256, "step": 5652 }, { "epoch": 0.46, "grad_norm": 0.8894699649744721, "learning_rate": 1.1790897030602436e-05, "loss": 0.461, "step": 5653 }, { "epoch": 0.46, "grad_norm": 0.864199629548567, "learning_rate": 1.1788307055944887e-05, "loss": 0.4806, "step": 5654 }, { "epoch": 0.46, "grad_norm": 0.9561792929043047, "learning_rate": 1.178571695735925e-05, "loss": 0.527, "step": 5655 }, { "epoch": 0.46, "grad_norm": 1.0988366775659597, "learning_rate": 1.178312673502501e-05, "loss": 0.5619, "step": 5656 }, { "epoch": 0.46, "grad_norm": 0.809626546189783, "learning_rate": 1.1780536389121668e-05, "loss": 0.505, "step": 5657 }, { "epoch": 0.46, "grad_norm": 0.8827486792369572, "learning_rate": 1.1777945919828735e-05, "loss": 0.4705, "step": 5658 }, { "epoch": 0.46, "grad_norm": 0.9744348262600504, "learning_rate": 1.1775355327325726e-05, "loss": 0.5681, "step": 5659 }, { "epoch": 0.46, "grad_norm": 0.9503573694547869, "learning_rate": 1.1772764611792167e-05, "loss": 0.588, "step": 5660 }, { "epoch": 0.46, "grad_norm": 0.855926523059355, "learning_rate": 1.1770173773407594e-05, "loss": 0.5295, "step": 5661 }, { "epoch": 0.46, "grad_norm": 0.8577754564930304, "learning_rate": 1.176758281235155e-05, "loss": 0.5658, "step": 5662 }, { "epoch": 0.46, "grad_norm": 0.8640342306312921, "learning_rate": 1.1764991728803582e-05, "loss": 0.4698, "step": 5663 }, { "epoch": 0.46, "grad_norm": 0.8347933853968565, "learning_rate": 1.1762400522943254e-05, "loss": 0.5503, "step": 5664 }, { "epoch": 0.46, "grad_norm": 0.9123725024563369, "learning_rate": 1.1759809194950134e-05, "loss": 0.5921, "step": 5665 }, { "epoch": 0.46, "grad_norm": 0.8766709410074994, "learning_rate": 1.1757217745003797e-05, "loss": 0.5239, "step": 5666 }, { "epoch": 0.46, "grad_norm": 1.0059369165290584, "learning_rate": 1.1754626173283827e-05, "loss": 0.5633, "step": 5667 }, { "epoch": 0.46, "grad_norm": 0.8504046043846838, "learning_rate": 1.1752034479969822e-05, "loss": 0.5828, "step": 5668 }, { "epoch": 0.46, "grad_norm": 0.9058726927790431, "learning_rate": 1.1749442665241382e-05, "loss": 0.5252, "step": 5669 }, { "epoch": 0.46, "grad_norm": 0.862169907036434, "learning_rate": 1.1746850729278114e-05, "loss": 0.5506, "step": 5670 }, { "epoch": 0.46, "grad_norm": 0.9637561441997063, "learning_rate": 1.1744258672259642e-05, "loss": 0.5342, "step": 5671 }, { "epoch": 0.46, "grad_norm": 1.1554538855075298, "learning_rate": 1.174166649436559e-05, "loss": 0.602, "step": 5672 }, { "epoch": 0.46, "grad_norm": 1.1545314272880411, "learning_rate": 1.1739074195775597e-05, "loss": 0.6155, "step": 5673 }, { "epoch": 0.46, "grad_norm": 0.8747730766584026, "learning_rate": 1.1736481776669307e-05, "loss": 0.5385, "step": 5674 }, { "epoch": 0.46, "grad_norm": 0.8670802659680884, "learning_rate": 1.1733889237226364e-05, "loss": 0.5082, "step": 5675 }, { "epoch": 0.46, "grad_norm": 0.8223266345411775, "learning_rate": 1.1731296577626437e-05, "loss": 0.5325, "step": 5676 }, { "epoch": 0.46, "grad_norm": 0.872649722870166, "learning_rate": 1.1728703798049194e-05, "loss": 0.5906, "step": 5677 }, { "epoch": 0.46, "grad_norm": 0.8882721443314949, "learning_rate": 1.172611089867431e-05, "loss": 0.6095, "step": 5678 }, { "epoch": 0.46, "grad_norm": 0.8946066834933611, "learning_rate": 1.1723517879681472e-05, "loss": 0.5391, "step": 5679 }, { "epoch": 0.46, "grad_norm": 0.9320314319421376, "learning_rate": 1.1720924741250378e-05, "loss": 0.5406, "step": 5680 }, { "epoch": 0.46, "grad_norm": 0.8376780591969047, "learning_rate": 1.1718331483560719e-05, "loss": 0.5445, "step": 5681 }, { "epoch": 0.46, "grad_norm": 1.0277180139899027, "learning_rate": 1.1715738106792214e-05, "loss": 0.5892, "step": 5682 }, { "epoch": 0.46, "grad_norm": 1.0513463702008126, "learning_rate": 1.1713144611124583e-05, "loss": 0.573, "step": 5683 }, { "epoch": 0.46, "grad_norm": 0.9765226894666977, "learning_rate": 1.1710550996737548e-05, "loss": 0.5579, "step": 5684 }, { "epoch": 0.46, "grad_norm": 0.860387461075728, "learning_rate": 1.1707957263810845e-05, "loss": 0.5598, "step": 5685 }, { "epoch": 0.46, "grad_norm": 1.0143344858243026, "learning_rate": 1.170536341252422e-05, "loss": 0.579, "step": 5686 }, { "epoch": 0.46, "grad_norm": 0.8562956547995043, "learning_rate": 1.1702769443057425e-05, "loss": 0.4799, "step": 5687 }, { "epoch": 0.46, "grad_norm": 0.8056493289664926, "learning_rate": 1.1700175355590215e-05, "loss": 0.5143, "step": 5688 }, { "epoch": 0.46, "grad_norm": 0.9927643119593819, "learning_rate": 1.1697581150302362e-05, "loss": 0.5658, "step": 5689 }, { "epoch": 0.46, "grad_norm": 0.9375696945093996, "learning_rate": 1.1694986827373642e-05, "loss": 0.5483, "step": 5690 }, { "epoch": 0.46, "grad_norm": 0.9725306054037465, "learning_rate": 1.1692392386983837e-05, "loss": 0.59, "step": 5691 }, { "epoch": 0.46, "grad_norm": 0.9917244244980913, "learning_rate": 1.1689797829312741e-05, "loss": 0.5406, "step": 5692 }, { "epoch": 0.46, "grad_norm": 0.9484530592426267, "learning_rate": 1.1687203154540154e-05, "loss": 0.5242, "step": 5693 }, { "epoch": 0.46, "grad_norm": 0.9394009906033737, "learning_rate": 1.1684608362845886e-05, "loss": 0.5825, "step": 5694 }, { "epoch": 0.46, "grad_norm": 0.8359560568490447, "learning_rate": 1.168201345440975e-05, "loss": 0.4697, "step": 5695 }, { "epoch": 0.46, "grad_norm": 0.8926529091421872, "learning_rate": 1.1679418429411577e-05, "loss": 0.5752, "step": 5696 }, { "epoch": 0.46, "grad_norm": 0.8486560830613838, "learning_rate": 1.1676823288031197e-05, "loss": 0.5451, "step": 5697 }, { "epoch": 0.46, "grad_norm": 0.8495140880211136, "learning_rate": 1.1674228030448447e-05, "loss": 0.5324, "step": 5698 }, { "epoch": 0.46, "grad_norm": 1.0059261820258834, "learning_rate": 1.167163265684318e-05, "loss": 0.6143, "step": 5699 }, { "epoch": 0.46, "grad_norm": 0.9051695413093083, "learning_rate": 1.1669037167395256e-05, "loss": 0.6137, "step": 5700 }, { "epoch": 0.46, "grad_norm": 0.988907191699513, "learning_rate": 1.1666441562284534e-05, "loss": 0.5691, "step": 5701 }, { "epoch": 0.46, "grad_norm": 0.8473370068987643, "learning_rate": 1.166384584169089e-05, "loss": 0.4898, "step": 5702 }, { "epoch": 0.46, "grad_norm": 1.0623212023511857, "learning_rate": 1.1661250005794206e-05, "loss": 0.5344, "step": 5703 }, { "epoch": 0.46, "grad_norm": 0.860164057627655, "learning_rate": 1.1658654054774368e-05, "loss": 0.478, "step": 5704 }, { "epoch": 0.46, "grad_norm": 0.97579313832456, "learning_rate": 1.1656057988811278e-05, "loss": 0.5864, "step": 5705 }, { "epoch": 0.46, "grad_norm": 0.9823768671863748, "learning_rate": 1.1653461808084839e-05, "loss": 0.5735, "step": 5706 }, { "epoch": 0.46, "grad_norm": 0.9273988931839644, "learning_rate": 1.1650865512774959e-05, "loss": 0.4811, "step": 5707 }, { "epoch": 0.46, "grad_norm": 0.955658161290628, "learning_rate": 1.1648269103061567e-05, "loss": 0.5759, "step": 5708 }, { "epoch": 0.46, "grad_norm": 0.9437176531862602, "learning_rate": 1.1645672579124586e-05, "loss": 0.6016, "step": 5709 }, { "epoch": 0.46, "grad_norm": 0.8760405278095192, "learning_rate": 1.1643075941143956e-05, "loss": 0.5191, "step": 5710 }, { "epoch": 0.46, "grad_norm": 0.8942711976734233, "learning_rate": 1.164047918929962e-05, "loss": 0.5556, "step": 5711 }, { "epoch": 0.46, "grad_norm": 0.902951537057684, "learning_rate": 1.1637882323771532e-05, "loss": 0.5103, "step": 5712 }, { "epoch": 0.46, "grad_norm": 0.8573316626768663, "learning_rate": 1.163528534473965e-05, "loss": 0.5217, "step": 5713 }, { "epoch": 0.46, "grad_norm": 1.0027552259507533, "learning_rate": 1.1632688252383948e-05, "loss": 0.5788, "step": 5714 }, { "epoch": 0.46, "grad_norm": 0.8367820712338532, "learning_rate": 1.1630091046884394e-05, "loss": 0.5012, "step": 5715 }, { "epoch": 0.46, "grad_norm": 0.8006736737941973, "learning_rate": 1.1627493728420978e-05, "loss": 0.4689, "step": 5716 }, { "epoch": 0.46, "grad_norm": 0.9051130579150752, "learning_rate": 1.1624896297173693e-05, "loss": 0.6383, "step": 5717 }, { "epoch": 0.46, "grad_norm": 0.8939363733073373, "learning_rate": 1.1622298753322531e-05, "loss": 0.5842, "step": 5718 }, { "epoch": 0.46, "grad_norm": 0.9497539058849697, "learning_rate": 1.1619701097047507e-05, "loss": 0.4973, "step": 5719 }, { "epoch": 0.46, "grad_norm": 0.8778289148182196, "learning_rate": 1.1617103328528634e-05, "loss": 0.6009, "step": 5720 }, { "epoch": 0.46, "grad_norm": 0.8498858417717557, "learning_rate": 1.1614505447945935e-05, "loss": 0.4794, "step": 5721 }, { "epoch": 0.47, "grad_norm": 0.891479859871484, "learning_rate": 1.1611907455479439e-05, "loss": 0.5115, "step": 5722 }, { "epoch": 0.47, "grad_norm": 0.8737329204498719, "learning_rate": 1.1609309351309185e-05, "loss": 0.5272, "step": 5723 }, { "epoch": 0.47, "grad_norm": 0.8746927578986288, "learning_rate": 1.1606711135615223e-05, "loss": 0.5308, "step": 5724 }, { "epoch": 0.47, "grad_norm": 0.8336535581674581, "learning_rate": 1.1604112808577603e-05, "loss": 0.5128, "step": 5725 }, { "epoch": 0.47, "grad_norm": 0.8336680782694602, "learning_rate": 1.160151437037639e-05, "loss": 0.5382, "step": 5726 }, { "epoch": 0.47, "grad_norm": 1.0068426087236186, "learning_rate": 1.159891582119165e-05, "loss": 0.5948, "step": 5727 }, { "epoch": 0.47, "grad_norm": 0.8861976744403234, "learning_rate": 1.159631716120346e-05, "loss": 0.5187, "step": 5728 }, { "epoch": 0.47, "grad_norm": 0.8648814082644428, "learning_rate": 1.1593718390591913e-05, "loss": 0.535, "step": 5729 }, { "epoch": 0.47, "grad_norm": 0.8212720183780278, "learning_rate": 1.159111950953709e-05, "loss": 0.4522, "step": 5730 }, { "epoch": 0.47, "grad_norm": 0.8500083762981806, "learning_rate": 1.1588520518219095e-05, "loss": 0.5048, "step": 5731 }, { "epoch": 0.47, "grad_norm": 0.9552823604332356, "learning_rate": 1.1585921416818042e-05, "loss": 0.5432, "step": 5732 }, { "epoch": 0.47, "grad_norm": 0.9879107801336218, "learning_rate": 1.1583322205514039e-05, "loss": 0.5447, "step": 5733 }, { "epoch": 0.47, "grad_norm": 0.9424763879235039, "learning_rate": 1.158072288448721e-05, "loss": 0.5644, "step": 5734 }, { "epoch": 0.47, "grad_norm": 0.9149517629325111, "learning_rate": 1.1578123453917692e-05, "loss": 0.5157, "step": 5735 }, { "epoch": 0.47, "grad_norm": 0.8057691249709181, "learning_rate": 1.1575523913985614e-05, "loss": 0.5248, "step": 5736 }, { "epoch": 0.47, "grad_norm": 0.8919661942830389, "learning_rate": 1.1572924264871126e-05, "loss": 0.4596, "step": 5737 }, { "epoch": 0.47, "grad_norm": 0.9432369794674959, "learning_rate": 1.1570324506754385e-05, "loss": 0.5393, "step": 5738 }, { "epoch": 0.47, "grad_norm": 0.9501011722952974, "learning_rate": 1.1567724639815546e-05, "loss": 0.597, "step": 5739 }, { "epoch": 0.47, "grad_norm": 1.0192209841846716, "learning_rate": 1.156512466423478e-05, "loss": 0.5543, "step": 5740 }, { "epoch": 0.47, "grad_norm": 0.9118793885972419, "learning_rate": 1.1562524580192265e-05, "loss": 0.5442, "step": 5741 }, { "epoch": 0.47, "grad_norm": 0.8681686671544112, "learning_rate": 1.155992438786818e-05, "loss": 0.5597, "step": 5742 }, { "epoch": 0.47, "grad_norm": 0.8270316553924332, "learning_rate": 1.1557324087442719e-05, "loss": 0.5338, "step": 5743 }, { "epoch": 0.47, "grad_norm": 0.8938029173929268, "learning_rate": 1.1554723679096083e-05, "loss": 0.5791, "step": 5744 }, { "epoch": 0.47, "grad_norm": 1.0399905111718846, "learning_rate": 1.155212316300847e-05, "loss": 0.5945, "step": 5745 }, { "epoch": 0.47, "grad_norm": 0.8980299501605497, "learning_rate": 1.1549522539360103e-05, "loss": 0.5261, "step": 5746 }, { "epoch": 0.47, "grad_norm": 0.9107191322138124, "learning_rate": 1.1546921808331196e-05, "loss": 0.5396, "step": 5747 }, { "epoch": 0.47, "grad_norm": 0.8837694344631738, "learning_rate": 1.1544320970101981e-05, "loss": 0.5647, "step": 5748 }, { "epoch": 0.47, "grad_norm": 1.1697362226170598, "learning_rate": 1.1541720024852692e-05, "loss": 0.5716, "step": 5749 }, { "epoch": 0.47, "grad_norm": 0.9325464531274007, "learning_rate": 1.1539118972763572e-05, "loss": 0.549, "step": 5750 }, { "epoch": 0.47, "grad_norm": 0.910145331273149, "learning_rate": 1.1536517814014876e-05, "loss": 0.5646, "step": 5751 }, { "epoch": 0.47, "grad_norm": 0.9314768239082533, "learning_rate": 1.1533916548786856e-05, "loss": 0.5403, "step": 5752 }, { "epoch": 0.47, "grad_norm": 0.8545528383721573, "learning_rate": 1.153131517725978e-05, "loss": 0.5583, "step": 5753 }, { "epoch": 0.47, "grad_norm": 0.9390530857052009, "learning_rate": 1.1528713699613921e-05, "loss": 0.5674, "step": 5754 }, { "epoch": 0.47, "grad_norm": 0.9354357281115419, "learning_rate": 1.1526112116029555e-05, "loss": 0.5262, "step": 5755 }, { "epoch": 0.47, "grad_norm": 0.8490438361271319, "learning_rate": 1.1523510426686977e-05, "loss": 0.53, "step": 5756 }, { "epoch": 0.47, "grad_norm": 0.8766802437406609, "learning_rate": 1.1520908631766476e-05, "loss": 0.6403, "step": 5757 }, { "epoch": 0.47, "grad_norm": 0.8991187659471015, "learning_rate": 1.1518306731448357e-05, "loss": 0.5231, "step": 5758 }, { "epoch": 0.47, "grad_norm": 0.9545109160032564, "learning_rate": 1.1515704725912926e-05, "loss": 0.5326, "step": 5759 }, { "epoch": 0.47, "grad_norm": 1.010803460178925, "learning_rate": 1.1513102615340505e-05, "loss": 0.5481, "step": 5760 }, { "epoch": 0.47, "grad_norm": 0.9448566756878536, "learning_rate": 1.1510500399911413e-05, "loss": 0.53, "step": 5761 }, { "epoch": 0.47, "grad_norm": 0.889478480629088, "learning_rate": 1.1507898079805984e-05, "loss": 0.5635, "step": 5762 }, { "epoch": 0.47, "grad_norm": 0.7889905138028853, "learning_rate": 1.1505295655204557e-05, "loss": 0.5205, "step": 5763 }, { "epoch": 0.47, "grad_norm": 0.9482501001206173, "learning_rate": 1.1502693126287473e-05, "loss": 0.5474, "step": 5764 }, { "epoch": 0.47, "grad_norm": 0.8962124749836796, "learning_rate": 1.1500090493235088e-05, "loss": 0.5721, "step": 5765 }, { "epoch": 0.47, "grad_norm": 0.9142381851003678, "learning_rate": 1.1497487756227765e-05, "loss": 0.5591, "step": 5766 }, { "epoch": 0.47, "grad_norm": 0.9397027105849795, "learning_rate": 1.1494884915445867e-05, "loss": 0.5431, "step": 5767 }, { "epoch": 0.47, "grad_norm": 0.931798640912055, "learning_rate": 1.1492281971069772e-05, "loss": 0.6008, "step": 5768 }, { "epoch": 0.47, "grad_norm": 0.8497883566831262, "learning_rate": 1.148967892327986e-05, "loss": 0.4807, "step": 5769 }, { "epoch": 0.47, "grad_norm": 1.0284837210866395, "learning_rate": 1.1487075772256517e-05, "loss": 0.5645, "step": 5770 }, { "epoch": 0.47, "grad_norm": 0.9293165286236764, "learning_rate": 1.1484472518180146e-05, "loss": 0.5455, "step": 5771 }, { "epoch": 0.47, "grad_norm": 0.8389218359292604, "learning_rate": 1.1481869161231146e-05, "loss": 0.5453, "step": 5772 }, { "epoch": 0.47, "grad_norm": 0.9159749563956316, "learning_rate": 1.1479265701589924e-05, "loss": 0.5282, "step": 5773 }, { "epoch": 0.47, "grad_norm": 0.9070787010145015, "learning_rate": 1.1476662139436903e-05, "loss": 0.5471, "step": 5774 }, { "epoch": 0.47, "grad_norm": 1.010745817868924, "learning_rate": 1.1474058474952505e-05, "loss": 0.4959, "step": 5775 }, { "epoch": 0.47, "grad_norm": 0.8536666744909506, "learning_rate": 1.1471454708317163e-05, "loss": 0.5393, "step": 5776 }, { "epoch": 0.47, "grad_norm": 0.9417959392783372, "learning_rate": 1.1468850839711314e-05, "loss": 0.5515, "step": 5777 }, { "epoch": 0.47, "grad_norm": 0.9056937867738876, "learning_rate": 1.1466246869315407e-05, "loss": 0.5208, "step": 5778 }, { "epoch": 0.47, "grad_norm": 0.9178472792346383, "learning_rate": 1.1463642797309889e-05, "loss": 0.54, "step": 5779 }, { "epoch": 0.47, "grad_norm": 0.9127826094034939, "learning_rate": 1.1461038623875224e-05, "loss": 0.5335, "step": 5780 }, { "epoch": 0.47, "grad_norm": 0.9142747269421215, "learning_rate": 1.145843434919188e-05, "loss": 0.5424, "step": 5781 }, { "epoch": 0.47, "grad_norm": 0.9521292000322145, "learning_rate": 1.1455829973440328e-05, "loss": 0.4752, "step": 5782 }, { "epoch": 0.47, "grad_norm": 0.9023297612335568, "learning_rate": 1.1453225496801052e-05, "loss": 0.5146, "step": 5783 }, { "epoch": 0.47, "grad_norm": 0.8767471354615514, "learning_rate": 1.1450620919454538e-05, "loss": 0.5128, "step": 5784 }, { "epoch": 0.47, "grad_norm": 0.9506251594156782, "learning_rate": 1.144801624158128e-05, "loss": 0.6153, "step": 5785 }, { "epoch": 0.47, "grad_norm": 0.8936628203180962, "learning_rate": 1.144541146336178e-05, "loss": 0.5134, "step": 5786 }, { "epoch": 0.47, "grad_norm": 0.9560096879745319, "learning_rate": 1.1442806584976549e-05, "loss": 0.547, "step": 5787 }, { "epoch": 0.47, "grad_norm": 0.9683698526031369, "learning_rate": 1.14402016066061e-05, "loss": 0.5176, "step": 5788 }, { "epoch": 0.47, "grad_norm": 0.8838884747303756, "learning_rate": 1.1437596528430956e-05, "loss": 0.5551, "step": 5789 }, { "epoch": 0.47, "grad_norm": 1.0051779630328477, "learning_rate": 1.143499135063165e-05, "loss": 0.6128, "step": 5790 }, { "epoch": 0.47, "grad_norm": 0.9116566361869124, "learning_rate": 1.1432386073388718e-05, "loss": 0.4915, "step": 5791 }, { "epoch": 0.47, "grad_norm": 0.9302519835107923, "learning_rate": 1.1429780696882697e-05, "loss": 0.5214, "step": 5792 }, { "epoch": 0.47, "grad_norm": 0.8898580580646048, "learning_rate": 1.1427175221294145e-05, "loss": 0.524, "step": 5793 }, { "epoch": 0.47, "grad_norm": 0.9166491607210097, "learning_rate": 1.1424569646803616e-05, "loss": 0.5383, "step": 5794 }, { "epoch": 0.47, "grad_norm": 0.9242787204585731, "learning_rate": 1.1421963973591674e-05, "loss": 0.466, "step": 5795 }, { "epoch": 0.47, "grad_norm": 0.896400261572487, "learning_rate": 1.1419358201838888e-05, "loss": 0.5314, "step": 5796 }, { "epoch": 0.47, "grad_norm": 0.9348660459482668, "learning_rate": 1.1416752331725842e-05, "loss": 0.486, "step": 5797 }, { "epoch": 0.47, "grad_norm": 0.9553987352915291, "learning_rate": 1.1414146363433112e-05, "loss": 0.4985, "step": 5798 }, { "epoch": 0.47, "grad_norm": 0.9954151620187609, "learning_rate": 1.1411540297141293e-05, "loss": 0.5526, "step": 5799 }, { "epoch": 0.47, "grad_norm": 0.8631823698760575, "learning_rate": 1.1408934133030985e-05, "loss": 0.5711, "step": 5800 }, { "epoch": 0.47, "grad_norm": 0.8611002496151663, "learning_rate": 1.1406327871282792e-05, "loss": 0.5432, "step": 5801 }, { "epoch": 0.47, "grad_norm": 0.9373509114087041, "learning_rate": 1.1403721512077324e-05, "loss": 0.5104, "step": 5802 }, { "epoch": 0.47, "grad_norm": 0.8651715229849188, "learning_rate": 1.14011150555952e-05, "loss": 0.5173, "step": 5803 }, { "epoch": 0.47, "grad_norm": 0.8591973900203775, "learning_rate": 1.1398508502017047e-05, "loss": 0.5324, "step": 5804 }, { "epoch": 0.47, "grad_norm": 0.9105118528653173, "learning_rate": 1.1395901851523494e-05, "loss": 0.5249, "step": 5805 }, { "epoch": 0.47, "grad_norm": 0.9613722495446257, "learning_rate": 1.1393295104295178e-05, "loss": 0.5782, "step": 5806 }, { "epoch": 0.47, "grad_norm": 0.7954537842824811, "learning_rate": 1.1390688260512755e-05, "loss": 0.5114, "step": 5807 }, { "epoch": 0.47, "grad_norm": 0.9248260565773117, "learning_rate": 1.1388081320356861e-05, "loss": 0.5524, "step": 5808 }, { "epoch": 0.47, "grad_norm": 0.8469594823476866, "learning_rate": 1.1385474284008167e-05, "loss": 0.4896, "step": 5809 }, { "epoch": 0.47, "grad_norm": 0.8069975623735556, "learning_rate": 1.1382867151647333e-05, "loss": 0.5172, "step": 5810 }, { "epoch": 0.47, "grad_norm": 0.9064033476636228, "learning_rate": 1.1380259923455033e-05, "loss": 0.5351, "step": 5811 }, { "epoch": 0.47, "grad_norm": 0.8459661189916762, "learning_rate": 1.1377652599611942e-05, "loss": 0.5321, "step": 5812 }, { "epoch": 0.47, "grad_norm": 0.8653354047603307, "learning_rate": 1.1375045180298749e-05, "loss": 0.5899, "step": 5813 }, { "epoch": 0.47, "grad_norm": 0.9715947135905709, "learning_rate": 1.1372437665696145e-05, "loss": 0.5819, "step": 5814 }, { "epoch": 0.47, "grad_norm": 0.8273124149020723, "learning_rate": 1.136983005598483e-05, "loss": 0.4911, "step": 5815 }, { "epoch": 0.47, "grad_norm": 0.9421996751596458, "learning_rate": 1.1367222351345504e-05, "loss": 0.5739, "step": 5816 }, { "epoch": 0.47, "grad_norm": 0.9107314035512659, "learning_rate": 1.136461455195888e-05, "loss": 0.4639, "step": 5817 }, { "epoch": 0.47, "grad_norm": 0.8399480002609003, "learning_rate": 1.1362006658005684e-05, "loss": 0.4968, "step": 5818 }, { "epoch": 0.47, "grad_norm": 0.9050184961256491, "learning_rate": 1.135939866966663e-05, "loss": 0.5679, "step": 5819 }, { "epoch": 0.47, "grad_norm": 0.8902604436710996, "learning_rate": 1.1356790587122457e-05, "loss": 0.5179, "step": 5820 }, { "epoch": 0.47, "grad_norm": 0.8722050269762409, "learning_rate": 1.13541824105539e-05, "loss": 0.543, "step": 5821 }, { "epoch": 0.47, "grad_norm": 0.9447918438261024, "learning_rate": 1.1351574140141701e-05, "loss": 0.5653, "step": 5822 }, { "epoch": 0.47, "grad_norm": 0.874738767797678, "learning_rate": 1.1348965776066611e-05, "loss": 0.5667, "step": 5823 }, { "epoch": 0.47, "grad_norm": 0.974237757914514, "learning_rate": 1.1346357318509395e-05, "loss": 0.5381, "step": 5824 }, { "epoch": 0.47, "grad_norm": 0.8495673811717946, "learning_rate": 1.1343748767650806e-05, "loss": 0.5473, "step": 5825 }, { "epoch": 0.47, "grad_norm": 0.941721385466632, "learning_rate": 1.1341140123671621e-05, "loss": 0.5479, "step": 5826 }, { "epoch": 0.47, "grad_norm": 0.9913803444659351, "learning_rate": 1.1338531386752618e-05, "loss": 0.5276, "step": 5827 }, { "epoch": 0.47, "grad_norm": 0.8792161364554925, "learning_rate": 1.1335922557074572e-05, "loss": 0.5012, "step": 5828 }, { "epoch": 0.47, "grad_norm": 0.8561321256615947, "learning_rate": 1.133331363481828e-05, "loss": 0.5516, "step": 5829 }, { "epoch": 0.47, "grad_norm": 0.8714825042279836, "learning_rate": 1.133070462016454e-05, "loss": 0.4823, "step": 5830 }, { "epoch": 0.47, "grad_norm": 0.8371557202330407, "learning_rate": 1.1328095513294143e-05, "loss": 0.5371, "step": 5831 }, { "epoch": 0.47, "grad_norm": 0.8041356268035228, "learning_rate": 1.1325486314387908e-05, "loss": 0.4643, "step": 5832 }, { "epoch": 0.47, "grad_norm": 0.8817325273916904, "learning_rate": 1.1322877023626647e-05, "loss": 0.4926, "step": 5833 }, { "epoch": 0.47, "grad_norm": 0.8390037601845575, "learning_rate": 1.1320267641191183e-05, "loss": 0.5356, "step": 5834 }, { "epoch": 0.47, "grad_norm": 0.9465123705146559, "learning_rate": 1.131765816726234e-05, "loss": 0.5277, "step": 5835 }, { "epoch": 0.47, "grad_norm": 0.8709013251621589, "learning_rate": 1.1315048602020956e-05, "loss": 0.5292, "step": 5836 }, { "epoch": 0.47, "grad_norm": 0.9327073017044094, "learning_rate": 1.1312438945647873e-05, "loss": 0.5589, "step": 5837 }, { "epoch": 0.47, "grad_norm": 0.913930006801104, "learning_rate": 1.1309829198323929e-05, "loss": 0.5097, "step": 5838 }, { "epoch": 0.47, "grad_norm": 0.9536570039795854, "learning_rate": 1.1307219360229991e-05, "loss": 0.5517, "step": 5839 }, { "epoch": 0.47, "grad_norm": 0.8559047225531874, "learning_rate": 1.1304609431546905e-05, "loss": 0.4756, "step": 5840 }, { "epoch": 0.47, "grad_norm": 0.8773984137628708, "learning_rate": 1.1301999412455545e-05, "loss": 0.5227, "step": 5841 }, { "epoch": 0.47, "grad_norm": 1.056976234813652, "learning_rate": 1.129938930313678e-05, "loss": 0.5863, "step": 5842 }, { "epoch": 0.47, "grad_norm": 0.8246643505599478, "learning_rate": 1.129677910377149e-05, "loss": 0.4886, "step": 5843 }, { "epoch": 0.47, "grad_norm": 0.949542374607715, "learning_rate": 1.1294168814540554e-05, "loss": 0.5693, "step": 5844 }, { "epoch": 0.48, "grad_norm": 1.0296550313854473, "learning_rate": 1.1291558435624871e-05, "loss": 0.5254, "step": 5845 }, { "epoch": 0.48, "grad_norm": 0.8437049883098626, "learning_rate": 1.1288947967205335e-05, "loss": 0.5596, "step": 5846 }, { "epoch": 0.48, "grad_norm": 0.8585532394054722, "learning_rate": 1.1286337409462844e-05, "loss": 0.5473, "step": 5847 }, { "epoch": 0.48, "grad_norm": 0.9820576693821286, "learning_rate": 1.1283726762578316e-05, "loss": 0.6178, "step": 5848 }, { "epoch": 0.48, "grad_norm": 0.9108796836809981, "learning_rate": 1.128111602673266e-05, "loss": 0.5125, "step": 5849 }, { "epoch": 0.48, "grad_norm": 0.8617572776326249, "learning_rate": 1.1278505202106797e-05, "loss": 0.5172, "step": 5850 }, { "epoch": 0.48, "grad_norm": 0.886056947138033, "learning_rate": 1.1275894288881664e-05, "loss": 0.498, "step": 5851 }, { "epoch": 0.48, "grad_norm": 0.9296123160821662, "learning_rate": 1.1273283287238184e-05, "loss": 0.5488, "step": 5852 }, { "epoch": 0.48, "grad_norm": 0.9310583665236684, "learning_rate": 1.12706721973573e-05, "loss": 0.5318, "step": 5853 }, { "epoch": 0.48, "grad_norm": 0.8348396049829269, "learning_rate": 1.1268061019419965e-05, "loss": 0.5751, "step": 5854 }, { "epoch": 0.48, "grad_norm": 0.8750748024173053, "learning_rate": 1.1265449753607122e-05, "loss": 0.5463, "step": 5855 }, { "epoch": 0.48, "grad_norm": 0.8049671125117622, "learning_rate": 1.1262838400099733e-05, "loss": 0.4689, "step": 5856 }, { "epoch": 0.48, "grad_norm": 0.8821628993628409, "learning_rate": 1.1260226959078766e-05, "loss": 0.4792, "step": 5857 }, { "epoch": 0.48, "grad_norm": 0.8499335731309763, "learning_rate": 1.1257615430725188e-05, "loss": 0.5666, "step": 5858 }, { "epoch": 0.48, "grad_norm": 0.8016705903840738, "learning_rate": 1.1255003815219973e-05, "loss": 0.4629, "step": 5859 }, { "epoch": 0.48, "grad_norm": 0.8575923821384105, "learning_rate": 1.1252392112744113e-05, "loss": 0.5538, "step": 5860 }, { "epoch": 0.48, "grad_norm": 0.9091042763411217, "learning_rate": 1.1249780323478585e-05, "loss": 0.5528, "step": 5861 }, { "epoch": 0.48, "grad_norm": 0.879419120033352, "learning_rate": 1.124716844760439e-05, "loss": 0.5088, "step": 5862 }, { "epoch": 0.48, "grad_norm": 0.9238669229492489, "learning_rate": 1.1244556485302532e-05, "loss": 0.5262, "step": 5863 }, { "epoch": 0.48, "grad_norm": 0.9622603224438493, "learning_rate": 1.1241944436754008e-05, "loss": 0.6327, "step": 5864 }, { "epoch": 0.48, "grad_norm": 0.8709572099514622, "learning_rate": 1.1239332302139839e-05, "loss": 0.6088, "step": 5865 }, { "epoch": 0.48, "grad_norm": 0.8728800808030194, "learning_rate": 1.1236720081641042e-05, "loss": 0.5195, "step": 5866 }, { "epoch": 0.48, "grad_norm": 0.9157664222436604, "learning_rate": 1.1234107775438637e-05, "loss": 0.521, "step": 5867 }, { "epoch": 0.48, "grad_norm": 0.9421227582596474, "learning_rate": 1.1231495383713657e-05, "loss": 0.5419, "step": 5868 }, { "epoch": 0.48, "grad_norm": 0.8996081993865935, "learning_rate": 1.1228882906647142e-05, "loss": 0.4896, "step": 5869 }, { "epoch": 0.48, "grad_norm": 0.8783857047148886, "learning_rate": 1.1226270344420131e-05, "loss": 0.516, "step": 5870 }, { "epoch": 0.48, "grad_norm": 0.7932167932457265, "learning_rate": 1.1223657697213672e-05, "loss": 0.4892, "step": 5871 }, { "epoch": 0.48, "grad_norm": 0.9248614707298407, "learning_rate": 1.1221044965208821e-05, "loss": 0.5565, "step": 5872 }, { "epoch": 0.48, "grad_norm": 0.8308674630147707, "learning_rate": 1.1218432148586638e-05, "loss": 0.5883, "step": 5873 }, { "epoch": 0.48, "grad_norm": 0.8704808487120602, "learning_rate": 1.1215819247528186e-05, "loss": 0.5364, "step": 5874 }, { "epoch": 0.48, "grad_norm": 0.9057993331161298, "learning_rate": 1.121320626221454e-05, "loss": 0.5342, "step": 5875 }, { "epoch": 0.48, "grad_norm": 0.9622876164726756, "learning_rate": 1.1210593192826776e-05, "loss": 0.6037, "step": 5876 }, { "epoch": 0.48, "grad_norm": 0.8488925867142036, "learning_rate": 1.1207980039545976e-05, "loss": 0.5314, "step": 5877 }, { "epoch": 0.48, "grad_norm": 0.8730947745792921, "learning_rate": 1.1205366802553231e-05, "loss": 0.5412, "step": 5878 }, { "epoch": 0.48, "grad_norm": 0.9506959222264564, "learning_rate": 1.1202753482029639e-05, "loss": 0.6111, "step": 5879 }, { "epoch": 0.48, "grad_norm": 0.8871307714612382, "learning_rate": 1.1200140078156293e-05, "loss": 0.5414, "step": 5880 }, { "epoch": 0.48, "grad_norm": 0.9470858331126085, "learning_rate": 1.1197526591114306e-05, "loss": 0.5387, "step": 5881 }, { "epoch": 0.48, "grad_norm": 0.968646981582715, "learning_rate": 1.119491302108479e-05, "loss": 0.6078, "step": 5882 }, { "epoch": 0.48, "grad_norm": 0.9271943209176021, "learning_rate": 1.1192299368248858e-05, "loss": 0.569, "step": 5883 }, { "epoch": 0.48, "grad_norm": 0.899014706576815, "learning_rate": 1.1189685632787638e-05, "loss": 0.5318, "step": 5884 }, { "epoch": 0.48, "grad_norm": 0.8912231576345668, "learning_rate": 1.1187071814882262e-05, "loss": 0.5458, "step": 5885 }, { "epoch": 0.48, "grad_norm": 0.9244677150779027, "learning_rate": 1.118445791471386e-05, "loss": 0.5491, "step": 5886 }, { "epoch": 0.48, "grad_norm": 0.7735443209972106, "learning_rate": 1.1181843932463577e-05, "loss": 0.4898, "step": 5887 }, { "epoch": 0.48, "grad_norm": 0.8876592892316715, "learning_rate": 1.1179229868312555e-05, "loss": 0.4807, "step": 5888 }, { "epoch": 0.48, "grad_norm": 0.8108220261166168, "learning_rate": 1.117661572244195e-05, "loss": 0.4873, "step": 5889 }, { "epoch": 0.48, "grad_norm": 0.9695562013180646, "learning_rate": 1.117400149503292e-05, "loss": 0.6291, "step": 5890 }, { "epoch": 0.48, "grad_norm": 1.0105840919169105, "learning_rate": 1.1171387186266628e-05, "loss": 0.6225, "step": 5891 }, { "epoch": 0.48, "grad_norm": 0.8261616963980778, "learning_rate": 1.1168772796324241e-05, "loss": 0.4972, "step": 5892 }, { "epoch": 0.48, "grad_norm": 0.938537542798799, "learning_rate": 1.1166158325386938e-05, "loss": 0.5204, "step": 5893 }, { "epoch": 0.48, "grad_norm": 0.8614582593712691, "learning_rate": 1.1163543773635896e-05, "loss": 0.5719, "step": 5894 }, { "epoch": 0.48, "grad_norm": 0.9374411897404901, "learning_rate": 1.1160929141252303e-05, "loss": 0.491, "step": 5895 }, { "epoch": 0.48, "grad_norm": 0.9359501939977077, "learning_rate": 1.115831442841735e-05, "loss": 0.4821, "step": 5896 }, { "epoch": 0.48, "grad_norm": 0.9385210570409233, "learning_rate": 1.1155699635312235e-05, "loss": 0.5466, "step": 5897 }, { "epoch": 0.48, "grad_norm": 0.8988786435675817, "learning_rate": 1.1153084762118163e-05, "loss": 0.5296, "step": 5898 }, { "epoch": 0.48, "grad_norm": 0.9405919058361396, "learning_rate": 1.1150469809016336e-05, "loss": 0.5517, "step": 5899 }, { "epoch": 0.48, "grad_norm": 0.9164295167852962, "learning_rate": 1.1147854776187973e-05, "loss": 0.6041, "step": 5900 }, { "epoch": 0.48, "grad_norm": 1.0104231088332736, "learning_rate": 1.1145239663814291e-05, "loss": 0.5689, "step": 5901 }, { "epoch": 0.48, "grad_norm": 0.9624897296538513, "learning_rate": 1.1142624472076518e-05, "loss": 0.643, "step": 5902 }, { "epoch": 0.48, "grad_norm": 0.9035371445616356, "learning_rate": 1.1140009201155881e-05, "loss": 0.5335, "step": 5903 }, { "epoch": 0.48, "grad_norm": 0.8552416264952798, "learning_rate": 1.1137393851233618e-05, "loss": 0.5205, "step": 5904 }, { "epoch": 0.48, "grad_norm": 0.8690331556336678, "learning_rate": 1.1134778422490971e-05, "loss": 0.5088, "step": 5905 }, { "epoch": 0.48, "grad_norm": 0.8007198775976306, "learning_rate": 1.1132162915109186e-05, "loss": 0.5099, "step": 5906 }, { "epoch": 0.48, "grad_norm": 0.8398136874931055, "learning_rate": 1.1129547329269512e-05, "loss": 0.4875, "step": 5907 }, { "epoch": 0.48, "grad_norm": 0.968349450845184, "learning_rate": 1.1126931665153213e-05, "loss": 0.6041, "step": 5908 }, { "epoch": 0.48, "grad_norm": 0.8786642078065504, "learning_rate": 1.1124315922941549e-05, "loss": 0.4978, "step": 5909 }, { "epoch": 0.48, "grad_norm": 0.9614973758016057, "learning_rate": 1.1121700102815787e-05, "loss": 0.5669, "step": 5910 }, { "epoch": 0.48, "grad_norm": 0.9050042577946916, "learning_rate": 1.1119084204957204e-05, "loss": 0.5286, "step": 5911 }, { "epoch": 0.48, "grad_norm": 0.8748269695827618, "learning_rate": 1.1116468229547079e-05, "loss": 0.4821, "step": 5912 }, { "epoch": 0.48, "grad_norm": 0.9758458472696362, "learning_rate": 1.1113852176766695e-05, "loss": 0.5561, "step": 5913 }, { "epoch": 0.48, "grad_norm": 0.968997489808725, "learning_rate": 1.1111236046797342e-05, "loss": 0.5863, "step": 5914 }, { "epoch": 0.48, "grad_norm": 0.8520026964412684, "learning_rate": 1.110861983982032e-05, "loss": 0.5155, "step": 5915 }, { "epoch": 0.48, "grad_norm": 0.9705553198685964, "learning_rate": 1.1106003556016924e-05, "loss": 0.5326, "step": 5916 }, { "epoch": 0.48, "grad_norm": 0.942824468226576, "learning_rate": 1.1103387195568463e-05, "loss": 0.5439, "step": 5917 }, { "epoch": 0.48, "grad_norm": 1.002773840554435, "learning_rate": 1.110077075865625e-05, "loss": 0.6232, "step": 5918 }, { "epoch": 0.48, "grad_norm": 0.9123206687630057, "learning_rate": 1.1098154245461597e-05, "loss": 0.5355, "step": 5919 }, { "epoch": 0.48, "grad_norm": 0.8978286343392776, "learning_rate": 1.109553765616583e-05, "loss": 0.5299, "step": 5920 }, { "epoch": 0.48, "grad_norm": 0.9972585079973012, "learning_rate": 1.1092920990950276e-05, "loss": 0.5836, "step": 5921 }, { "epoch": 0.48, "grad_norm": 0.8419429273808501, "learning_rate": 1.1090304249996264e-05, "loss": 0.4881, "step": 5922 }, { "epoch": 0.48, "grad_norm": 0.9619008015768903, "learning_rate": 1.1087687433485135e-05, "loss": 0.4746, "step": 5923 }, { "epoch": 0.48, "grad_norm": 0.9965086736539188, "learning_rate": 1.1085070541598235e-05, "loss": 0.5737, "step": 5924 }, { "epoch": 0.48, "grad_norm": 0.8223959891987548, "learning_rate": 1.1082453574516907e-05, "loss": 0.5213, "step": 5925 }, { "epoch": 0.48, "grad_norm": 0.9194358573261691, "learning_rate": 1.1079836532422505e-05, "loss": 0.5685, "step": 5926 }, { "epoch": 0.48, "grad_norm": 0.8433036241482647, "learning_rate": 1.1077219415496391e-05, "loss": 0.5119, "step": 5927 }, { "epoch": 0.48, "grad_norm": 0.8369149997767066, "learning_rate": 1.1074602223919925e-05, "loss": 0.5315, "step": 5928 }, { "epoch": 0.48, "grad_norm": 0.962608668637937, "learning_rate": 1.107198495787448e-05, "loss": 0.5197, "step": 5929 }, { "epoch": 0.48, "grad_norm": 0.9299123924527506, "learning_rate": 1.106936761754143e-05, "loss": 0.4922, "step": 5930 }, { "epoch": 0.48, "grad_norm": 0.9346185712137653, "learning_rate": 1.1066750203102148e-05, "loss": 0.5297, "step": 5931 }, { "epoch": 0.48, "grad_norm": 0.8796465411194002, "learning_rate": 1.1064132714738024e-05, "loss": 0.5336, "step": 5932 }, { "epoch": 0.48, "grad_norm": 0.9216539178960147, "learning_rate": 1.1061515152630448e-05, "loss": 0.6052, "step": 5933 }, { "epoch": 0.48, "grad_norm": 0.9549392231390016, "learning_rate": 1.1058897516960817e-05, "loss": 0.6129, "step": 5934 }, { "epoch": 0.48, "grad_norm": 0.9376318659921885, "learning_rate": 1.1056279807910522e-05, "loss": 0.6062, "step": 5935 }, { "epoch": 0.48, "grad_norm": 0.9601200523129949, "learning_rate": 1.1053662025660973e-05, "loss": 0.4865, "step": 5936 }, { "epoch": 0.48, "grad_norm": 0.9388283502974828, "learning_rate": 1.1051044170393586e-05, "loss": 0.5551, "step": 5937 }, { "epoch": 0.48, "grad_norm": 0.9367217738098178, "learning_rate": 1.1048426242289767e-05, "loss": 0.5412, "step": 5938 }, { "epoch": 0.48, "grad_norm": 0.9095242772845795, "learning_rate": 1.1045808241530937e-05, "loss": 0.5014, "step": 5939 }, { "epoch": 0.48, "grad_norm": 0.9201435611696015, "learning_rate": 1.1043190168298527e-05, "loss": 0.5581, "step": 5940 }, { "epoch": 0.48, "grad_norm": 0.942430264175165, "learning_rate": 1.104057202277396e-05, "loss": 0.5376, "step": 5941 }, { "epoch": 0.48, "grad_norm": 0.9284963591597374, "learning_rate": 1.1037953805138679e-05, "loss": 0.5984, "step": 5942 }, { "epoch": 0.48, "grad_norm": 0.9806126690798201, "learning_rate": 1.103533551557412e-05, "loss": 0.5994, "step": 5943 }, { "epoch": 0.48, "grad_norm": 0.9852923229857014, "learning_rate": 1.1032717154261725e-05, "loss": 0.5086, "step": 5944 }, { "epoch": 0.48, "grad_norm": 0.8857293632409518, "learning_rate": 1.1030098721382947e-05, "loss": 0.5406, "step": 5945 }, { "epoch": 0.48, "grad_norm": 0.9089245246997715, "learning_rate": 1.1027480217119245e-05, "loss": 0.5156, "step": 5946 }, { "epoch": 0.48, "grad_norm": 0.8422760311162406, "learning_rate": 1.102486164165207e-05, "loss": 0.5645, "step": 5947 }, { "epoch": 0.48, "grad_norm": 0.8867799252180668, "learning_rate": 1.1022242995162895e-05, "loss": 0.5605, "step": 5948 }, { "epoch": 0.48, "grad_norm": 0.8775600612421792, "learning_rate": 1.1019624277833188e-05, "loss": 0.6068, "step": 5949 }, { "epoch": 0.48, "grad_norm": 0.919899703948138, "learning_rate": 1.1017005489844424e-05, "loss": 0.4996, "step": 5950 }, { "epoch": 0.48, "grad_norm": 0.9793631542392197, "learning_rate": 1.1014386631378079e-05, "loss": 0.5301, "step": 5951 }, { "epoch": 0.48, "grad_norm": 0.8502436939737742, "learning_rate": 1.101176770261564e-05, "loss": 0.5321, "step": 5952 }, { "epoch": 0.48, "grad_norm": 0.9908848621988976, "learning_rate": 1.1009148703738599e-05, "loss": 0.5246, "step": 5953 }, { "epoch": 0.48, "grad_norm": 0.8478840248580101, "learning_rate": 1.1006529634928446e-05, "loss": 0.503, "step": 5954 }, { "epoch": 0.48, "grad_norm": 0.9149829104564555, "learning_rate": 1.1003910496366683e-05, "loss": 0.5766, "step": 5955 }, { "epoch": 0.48, "grad_norm": 1.3735137099920995, "learning_rate": 1.1001291288234812e-05, "loss": 0.5335, "step": 5956 }, { "epoch": 0.48, "grad_norm": 1.0109614737297976, "learning_rate": 1.0998672010714344e-05, "loss": 0.5876, "step": 5957 }, { "epoch": 0.48, "grad_norm": 0.9525877879244522, "learning_rate": 1.0996052663986791e-05, "loss": 0.5306, "step": 5958 }, { "epoch": 0.48, "grad_norm": 0.872468204745674, "learning_rate": 1.0993433248233672e-05, "loss": 0.5275, "step": 5959 }, { "epoch": 0.48, "grad_norm": 0.9800315769595653, "learning_rate": 1.0990813763636511e-05, "loss": 0.5752, "step": 5960 }, { "epoch": 0.48, "grad_norm": 0.9484721067744775, "learning_rate": 1.0988194210376834e-05, "loss": 0.5565, "step": 5961 }, { "epoch": 0.48, "grad_norm": 0.970274479323285, "learning_rate": 1.0985574588636174e-05, "loss": 0.5795, "step": 5962 }, { "epoch": 0.48, "grad_norm": 0.8814469628462033, "learning_rate": 1.0982954898596072e-05, "loss": 0.4921, "step": 5963 }, { "epoch": 0.48, "grad_norm": 0.9531102160371265, "learning_rate": 1.0980335140438066e-05, "loss": 0.5659, "step": 5964 }, { "epoch": 0.48, "grad_norm": 0.8076295124586604, "learning_rate": 1.0977715314343702e-05, "loss": 0.5268, "step": 5965 }, { "epoch": 0.48, "grad_norm": 0.9189308585416595, "learning_rate": 1.0975095420494537e-05, "loss": 0.529, "step": 5966 }, { "epoch": 0.48, "grad_norm": 0.8835362326913786, "learning_rate": 1.0972475459072124e-05, "loss": 0.5095, "step": 5967 }, { "epoch": 0.49, "grad_norm": 0.9200688922262339, "learning_rate": 1.0969855430258022e-05, "loss": 0.5085, "step": 5968 }, { "epoch": 0.49, "grad_norm": 1.0103589244278846, "learning_rate": 1.0967235334233802e-05, "loss": 0.6143, "step": 5969 }, { "epoch": 0.49, "grad_norm": 0.8747556366743678, "learning_rate": 1.096461517118103e-05, "loss": 0.5439, "step": 5970 }, { "epoch": 0.49, "grad_norm": 0.8383655597267304, "learning_rate": 1.096199494128128e-05, "loss": 0.5071, "step": 5971 }, { "epoch": 0.49, "grad_norm": 0.9352976550371722, "learning_rate": 1.0959374644716137e-05, "loss": 0.5272, "step": 5972 }, { "epoch": 0.49, "grad_norm": 0.9736793571068132, "learning_rate": 1.0956754281667182e-05, "loss": 0.5717, "step": 5973 }, { "epoch": 0.49, "grad_norm": 0.8320012054898668, "learning_rate": 1.0954133852316003e-05, "loss": 0.4946, "step": 5974 }, { "epoch": 0.49, "grad_norm": 0.9549832251089269, "learning_rate": 1.0951513356844192e-05, "loss": 0.5193, "step": 5975 }, { "epoch": 0.49, "grad_norm": 0.7905276524529273, "learning_rate": 1.0948892795433353e-05, "loss": 0.4973, "step": 5976 }, { "epoch": 0.49, "grad_norm": 0.9168115233955988, "learning_rate": 1.0946272168265081e-05, "loss": 0.5515, "step": 5977 }, { "epoch": 0.49, "grad_norm": 0.9139760132751583, "learning_rate": 1.094365147552099e-05, "loss": 0.5441, "step": 5978 }, { "epoch": 0.49, "grad_norm": 0.9405480838687121, "learning_rate": 1.094103071738269e-05, "loss": 0.5967, "step": 5979 }, { "epoch": 0.49, "grad_norm": 0.8594264268924838, "learning_rate": 1.0938409894031793e-05, "loss": 0.5401, "step": 5980 }, { "epoch": 0.49, "grad_norm": 0.8756876468757003, "learning_rate": 1.0935789005649924e-05, "loss": 0.5171, "step": 5981 }, { "epoch": 0.49, "grad_norm": 0.8974310052441253, "learning_rate": 1.0933168052418708e-05, "loss": 0.5579, "step": 5982 }, { "epoch": 0.49, "grad_norm": 0.9251336556455075, "learning_rate": 1.0930547034519772e-05, "loss": 0.5695, "step": 5983 }, { "epoch": 0.49, "grad_norm": 0.9314886487604079, "learning_rate": 1.0927925952134753e-05, "loss": 0.5214, "step": 5984 }, { "epoch": 0.49, "grad_norm": 0.869125763237957, "learning_rate": 1.092530480544529e-05, "loss": 0.501, "step": 5985 }, { "epoch": 0.49, "grad_norm": 0.915804992331489, "learning_rate": 1.092268359463302e-05, "loss": 0.5217, "step": 5986 }, { "epoch": 0.49, "grad_norm": 0.8962623903964275, "learning_rate": 1.0920062319879599e-05, "loss": 0.5452, "step": 5987 }, { "epoch": 0.49, "grad_norm": 0.8486741066180117, "learning_rate": 1.0917440981366677e-05, "loss": 0.5229, "step": 5988 }, { "epoch": 0.49, "grad_norm": 0.9043838061637083, "learning_rate": 1.0914819579275903e-05, "loss": 0.5257, "step": 5989 }, { "epoch": 0.49, "grad_norm": 0.8980008090414996, "learning_rate": 1.0912198113788947e-05, "loss": 0.539, "step": 5990 }, { "epoch": 0.49, "grad_norm": 0.8991497597361532, "learning_rate": 1.0909576585087472e-05, "loss": 0.5007, "step": 5991 }, { "epoch": 0.49, "grad_norm": 0.8883674978924093, "learning_rate": 1.0906954993353145e-05, "loss": 0.5107, "step": 5992 }, { "epoch": 0.49, "grad_norm": 0.8998564171217721, "learning_rate": 1.0904333338767641e-05, "loss": 0.5625, "step": 5993 }, { "epoch": 0.49, "grad_norm": 0.8363802061981552, "learning_rate": 1.090171162151264e-05, "loss": 0.5212, "step": 5994 }, { "epoch": 0.49, "grad_norm": 0.9017747043223552, "learning_rate": 1.0899089841769824e-05, "loss": 0.5528, "step": 5995 }, { "epoch": 0.49, "grad_norm": 0.9231025436302872, "learning_rate": 1.0896467999720876e-05, "loss": 0.5525, "step": 5996 }, { "epoch": 0.49, "grad_norm": 0.8912850378764711, "learning_rate": 1.0893846095547493e-05, "loss": 0.5891, "step": 5997 }, { "epoch": 0.49, "grad_norm": 0.9500971063396939, "learning_rate": 1.0891224129431368e-05, "loss": 0.5333, "step": 5998 }, { "epoch": 0.49, "grad_norm": 0.8674435355298813, "learning_rate": 1.0888602101554202e-05, "loss": 0.4728, "step": 5999 }, { "epoch": 0.49, "grad_norm": 0.8477710168689605, "learning_rate": 1.0885980012097698e-05, "loss": 0.4872, "step": 6000 }, { "epoch": 0.49, "grad_norm": 0.9681405308198463, "learning_rate": 1.0883357861243567e-05, "loss": 0.5292, "step": 6001 }, { "epoch": 0.49, "grad_norm": 0.8159825384348709, "learning_rate": 1.0880735649173518e-05, "loss": 0.5195, "step": 6002 }, { "epoch": 0.49, "grad_norm": 0.8594649461848198, "learning_rate": 1.0878113376069268e-05, "loss": 0.5064, "step": 6003 }, { "epoch": 0.49, "grad_norm": 0.8755507296784738, "learning_rate": 1.0875491042112543e-05, "loss": 0.4985, "step": 6004 }, { "epoch": 0.49, "grad_norm": 1.1053674630111627, "learning_rate": 1.0872868647485064e-05, "loss": 0.5157, "step": 6005 }, { "epoch": 0.49, "grad_norm": 0.8650612305952349, "learning_rate": 1.087024619236856e-05, "loss": 0.5791, "step": 6006 }, { "epoch": 0.49, "grad_norm": 0.9342489013084267, "learning_rate": 1.0867623676944771e-05, "loss": 0.5481, "step": 6007 }, { "epoch": 0.49, "grad_norm": 0.8953188110291666, "learning_rate": 1.0865001101395429e-05, "loss": 0.4963, "step": 6008 }, { "epoch": 0.49, "grad_norm": 0.9266589925253965, "learning_rate": 1.0862378465902276e-05, "loss": 0.5699, "step": 6009 }, { "epoch": 0.49, "grad_norm": 0.876106966122857, "learning_rate": 1.0859755770647063e-05, "loss": 0.5398, "step": 6010 }, { "epoch": 0.49, "grad_norm": 0.9158713362331197, "learning_rate": 1.0857133015811537e-05, "loss": 0.5676, "step": 6011 }, { "epoch": 0.49, "grad_norm": 0.8439366191962501, "learning_rate": 1.0854510201577451e-05, "loss": 0.4834, "step": 6012 }, { "epoch": 0.49, "grad_norm": 0.9129278894147207, "learning_rate": 1.0851887328126569e-05, "loss": 0.4796, "step": 6013 }, { "epoch": 0.49, "grad_norm": 0.9651114390249649, "learning_rate": 1.084926439564065e-05, "loss": 0.519, "step": 6014 }, { "epoch": 0.49, "grad_norm": 0.9133574311556374, "learning_rate": 1.084664140430146e-05, "loss": 0.569, "step": 6015 }, { "epoch": 0.49, "grad_norm": 0.9137297031652001, "learning_rate": 1.0844018354290776e-05, "loss": 0.5623, "step": 6016 }, { "epoch": 0.49, "grad_norm": 0.8509472903986172, "learning_rate": 1.0841395245790363e-05, "loss": 0.5037, "step": 6017 }, { "epoch": 0.49, "grad_norm": 0.9168976366504523, "learning_rate": 1.0838772078982008e-05, "loss": 0.5698, "step": 6018 }, { "epoch": 0.49, "grad_norm": 0.902456982349151, "learning_rate": 1.0836148854047494e-05, "loss": 0.5128, "step": 6019 }, { "epoch": 0.49, "grad_norm": 0.922315991787359, "learning_rate": 1.0833525571168603e-05, "loss": 0.4852, "step": 6020 }, { "epoch": 0.49, "grad_norm": 0.9847818345055375, "learning_rate": 1.0830902230527129e-05, "loss": 0.5451, "step": 6021 }, { "epoch": 0.49, "grad_norm": 0.9403531190902872, "learning_rate": 1.082827883230487e-05, "loss": 0.5087, "step": 6022 }, { "epoch": 0.49, "grad_norm": 0.9536798293761942, "learning_rate": 1.0825655376683621e-05, "loss": 0.5691, "step": 6023 }, { "epoch": 0.49, "grad_norm": 0.8687923617162312, "learning_rate": 1.0823031863845189e-05, "loss": 0.5028, "step": 6024 }, { "epoch": 0.49, "grad_norm": 0.9344505134076202, "learning_rate": 1.082040829397138e-05, "loss": 0.5811, "step": 6025 }, { "epoch": 0.49, "grad_norm": 1.0892474648248411, "learning_rate": 1.0817784667243998e-05, "loss": 0.5767, "step": 6026 }, { "epoch": 0.49, "grad_norm": 0.9631439527676332, "learning_rate": 1.0815160983844865e-05, "loss": 0.5175, "step": 6027 }, { "epoch": 0.49, "grad_norm": 0.9986721453630223, "learning_rate": 1.0812537243955804e-05, "loss": 0.5998, "step": 6028 }, { "epoch": 0.49, "grad_norm": 0.9578778424630721, "learning_rate": 1.0809913447758628e-05, "loss": 0.5775, "step": 6029 }, { "epoch": 0.49, "grad_norm": 0.9161777185614014, "learning_rate": 1.080728959543517e-05, "loss": 0.5776, "step": 6030 }, { "epoch": 0.49, "grad_norm": 0.9106523123320278, "learning_rate": 1.0804665687167262e-05, "loss": 0.5458, "step": 6031 }, { "epoch": 0.49, "grad_norm": 0.8102666418535739, "learning_rate": 1.0802041723136731e-05, "loss": 0.5212, "step": 6032 }, { "epoch": 0.49, "grad_norm": 0.9620209665623649, "learning_rate": 1.079941770352542e-05, "loss": 0.5381, "step": 6033 }, { "epoch": 0.49, "grad_norm": 0.9765426156604666, "learning_rate": 1.0796793628515176e-05, "loss": 0.5639, "step": 6034 }, { "epoch": 0.49, "grad_norm": 0.9410479105661026, "learning_rate": 1.0794169498287837e-05, "loss": 0.5848, "step": 6035 }, { "epoch": 0.49, "grad_norm": 0.9258676956045075, "learning_rate": 1.0791545313025255e-05, "loss": 0.5677, "step": 6036 }, { "epoch": 0.49, "grad_norm": 0.9773392276289621, "learning_rate": 1.078892107290929e-05, "loss": 0.634, "step": 6037 }, { "epoch": 0.49, "grad_norm": 0.9256631034571904, "learning_rate": 1.0786296778121787e-05, "loss": 0.5795, "step": 6038 }, { "epoch": 0.49, "grad_norm": 0.933137084813143, "learning_rate": 1.078367242884462e-05, "loss": 0.5872, "step": 6039 }, { "epoch": 0.49, "grad_norm": 0.9953572621075247, "learning_rate": 1.0781048025259648e-05, "loss": 0.6192, "step": 6040 }, { "epoch": 0.49, "grad_norm": 1.0050752288603593, "learning_rate": 1.0778423567548739e-05, "loss": 0.5837, "step": 6041 }, { "epoch": 0.49, "grad_norm": 0.8950958764206185, "learning_rate": 1.0775799055893768e-05, "loss": 0.5146, "step": 6042 }, { "epoch": 0.49, "grad_norm": 0.9012573457259359, "learning_rate": 1.0773174490476613e-05, "loss": 0.5663, "step": 6043 }, { "epoch": 0.49, "grad_norm": 0.9568547546834592, "learning_rate": 1.0770549871479149e-05, "loss": 0.5659, "step": 6044 }, { "epoch": 0.49, "grad_norm": 0.921074163124563, "learning_rate": 1.0767925199083262e-05, "loss": 0.5878, "step": 6045 }, { "epoch": 0.49, "grad_norm": 0.8790758009068457, "learning_rate": 1.0765300473470841e-05, "loss": 0.4825, "step": 6046 }, { "epoch": 0.49, "grad_norm": 0.9692177897991965, "learning_rate": 1.0762675694823777e-05, "loss": 0.5294, "step": 6047 }, { "epoch": 0.49, "grad_norm": 0.7912159684386474, "learning_rate": 1.0760050863323961e-05, "loss": 0.4587, "step": 6048 }, { "epoch": 0.49, "grad_norm": 0.902387535550519, "learning_rate": 1.0757425979153297e-05, "loss": 0.5185, "step": 6049 }, { "epoch": 0.49, "grad_norm": 0.90900599928075, "learning_rate": 1.0754801042493683e-05, "loss": 0.5249, "step": 6050 }, { "epoch": 0.49, "grad_norm": 0.9150820410769867, "learning_rate": 1.0752176053527025e-05, "loss": 0.5712, "step": 6051 }, { "epoch": 0.49, "grad_norm": 0.9762071829758882, "learning_rate": 1.0749551012435237e-05, "loss": 0.573, "step": 6052 }, { "epoch": 0.49, "grad_norm": 0.8573612056669551, "learning_rate": 1.0746925919400226e-05, "loss": 0.4773, "step": 6053 }, { "epoch": 0.49, "grad_norm": 0.911707052115374, "learning_rate": 1.0744300774603914e-05, "loss": 0.5026, "step": 6054 }, { "epoch": 0.49, "grad_norm": 0.9979792020943502, "learning_rate": 1.0741675578228216e-05, "loss": 0.6148, "step": 6055 }, { "epoch": 0.49, "grad_norm": 0.9553815644356218, "learning_rate": 1.073905033045506e-05, "loss": 0.5726, "step": 6056 }, { "epoch": 0.49, "grad_norm": 0.9136612647564725, "learning_rate": 1.0736425031466369e-05, "loss": 0.5387, "step": 6057 }, { "epoch": 0.49, "grad_norm": 0.973117546702641, "learning_rate": 1.0733799681444077e-05, "loss": 0.5942, "step": 6058 }, { "epoch": 0.49, "grad_norm": 0.9243498996981262, "learning_rate": 1.073117428057012e-05, "loss": 0.5464, "step": 6059 }, { "epoch": 0.49, "grad_norm": 0.9654957940954922, "learning_rate": 1.0728548829026433e-05, "loss": 0.5123, "step": 6060 }, { "epoch": 0.49, "grad_norm": 0.8545999154255652, "learning_rate": 1.0725923326994958e-05, "loss": 0.502, "step": 6061 }, { "epoch": 0.49, "grad_norm": 0.9459939415916986, "learning_rate": 1.0723297774657642e-05, "loss": 0.6115, "step": 6062 }, { "epoch": 0.49, "grad_norm": 0.8484583387652496, "learning_rate": 1.0720672172196432e-05, "loss": 0.5328, "step": 6063 }, { "epoch": 0.49, "grad_norm": 0.827531492547263, "learning_rate": 1.0718046519793276e-05, "loss": 0.4541, "step": 6064 }, { "epoch": 0.49, "grad_norm": 0.8574412032251646, "learning_rate": 1.0715420817630137e-05, "loss": 0.4967, "step": 6065 }, { "epoch": 0.49, "grad_norm": 0.8550222014048962, "learning_rate": 1.0712795065888968e-05, "loss": 0.4633, "step": 6066 }, { "epoch": 0.49, "grad_norm": 0.9021045314787502, "learning_rate": 1.0710169264751733e-05, "loss": 0.5815, "step": 6067 }, { "epoch": 0.49, "grad_norm": 0.9894222824218805, "learning_rate": 1.0707543414400398e-05, "loss": 0.528, "step": 6068 }, { "epoch": 0.49, "grad_norm": 0.9728184096575895, "learning_rate": 1.0704917515016933e-05, "loss": 0.5035, "step": 6069 }, { "epoch": 0.49, "grad_norm": 0.9641583065696216, "learning_rate": 1.0702291566783307e-05, "loss": 0.5558, "step": 6070 }, { "epoch": 0.49, "grad_norm": 0.8391781018752039, "learning_rate": 1.0699665569881503e-05, "loss": 0.5121, "step": 6071 }, { "epoch": 0.49, "grad_norm": 0.8621647843592861, "learning_rate": 1.0697039524493492e-05, "loss": 0.5277, "step": 6072 }, { "epoch": 0.49, "grad_norm": 0.7567533685287966, "learning_rate": 1.069441343080126e-05, "loss": 0.4858, "step": 6073 }, { "epoch": 0.49, "grad_norm": 0.8648137761316245, "learning_rate": 1.0691787288986795e-05, "loss": 0.532, "step": 6074 }, { "epoch": 0.49, "grad_norm": 0.9042486379215297, "learning_rate": 1.0689161099232084e-05, "loss": 0.526, "step": 6075 }, { "epoch": 0.49, "grad_norm": 0.906851550856305, "learning_rate": 1.0686534861719118e-05, "loss": 0.5236, "step": 6076 }, { "epoch": 0.49, "grad_norm": 0.9131446706717494, "learning_rate": 1.06839085766299e-05, "loss": 0.4741, "step": 6077 }, { "epoch": 0.49, "grad_norm": 0.8710047907608442, "learning_rate": 1.068128224414642e-05, "loss": 0.5528, "step": 6078 }, { "epoch": 0.49, "grad_norm": 0.9268905719223248, "learning_rate": 1.0678655864450684e-05, "loss": 0.5913, "step": 6079 }, { "epoch": 0.49, "grad_norm": 0.9435191680227256, "learning_rate": 1.0676029437724703e-05, "loss": 0.5458, "step": 6080 }, { "epoch": 0.49, "grad_norm": 0.8724092493981516, "learning_rate": 1.0673402964150479e-05, "loss": 0.5784, "step": 6081 }, { "epoch": 0.49, "grad_norm": 0.8601665421495636, "learning_rate": 1.0670776443910024e-05, "loss": 0.5473, "step": 6082 }, { "epoch": 0.49, "grad_norm": 0.9680710017013635, "learning_rate": 1.0668149877185361e-05, "loss": 0.6032, "step": 6083 }, { "epoch": 0.49, "grad_norm": 0.9492639664421051, "learning_rate": 1.0665523264158501e-05, "loss": 0.5389, "step": 6084 }, { "epoch": 0.49, "grad_norm": 0.8455152863436715, "learning_rate": 1.0662896605011472e-05, "loss": 0.566, "step": 6085 }, { "epoch": 0.49, "grad_norm": 0.85243477804133, "learning_rate": 1.0660269899926296e-05, "loss": 0.5244, "step": 6086 }, { "epoch": 0.49, "grad_norm": 0.9435897995784096, "learning_rate": 1.0657643149084999e-05, "loss": 0.5268, "step": 6087 }, { "epoch": 0.49, "grad_norm": 0.9004172884401473, "learning_rate": 1.0655016352669616e-05, "loss": 0.5125, "step": 6088 }, { "epoch": 0.49, "grad_norm": 0.8696831235448563, "learning_rate": 1.0652389510862182e-05, "loss": 0.4969, "step": 6089 }, { "epoch": 0.49, "grad_norm": 0.8757254452087241, "learning_rate": 1.0649762623844733e-05, "loss": 0.5422, "step": 6090 }, { "epoch": 0.5, "grad_norm": 0.8533339193252824, "learning_rate": 1.064713569179931e-05, "loss": 0.49, "step": 6091 }, { "epoch": 0.5, "grad_norm": 0.929141233600302, "learning_rate": 1.064450871490796e-05, "loss": 0.5793, "step": 6092 }, { "epoch": 0.5, "grad_norm": 0.9853060175197331, "learning_rate": 1.0641881693352724e-05, "loss": 0.5836, "step": 6093 }, { "epoch": 0.5, "grad_norm": 0.8969091374661617, "learning_rate": 1.0639254627315658e-05, "loss": 0.572, "step": 6094 }, { "epoch": 0.5, "grad_norm": 0.9445842218624012, "learning_rate": 1.0636627516978815e-05, "loss": 0.5491, "step": 6095 }, { "epoch": 0.5, "grad_norm": 0.8521417788675616, "learning_rate": 1.0634000362524247e-05, "loss": 0.4567, "step": 6096 }, { "epoch": 0.5, "grad_norm": 0.8243800789666074, "learning_rate": 1.0631373164134015e-05, "loss": 0.5237, "step": 6097 }, { "epoch": 0.5, "grad_norm": 0.8972677822447758, "learning_rate": 1.0628745921990184e-05, "loss": 0.5347, "step": 6098 }, { "epoch": 0.5, "grad_norm": 0.9096471053000708, "learning_rate": 1.062611863627482e-05, "loss": 0.5355, "step": 6099 }, { "epoch": 0.5, "grad_norm": 0.8451782292679187, "learning_rate": 1.062349130716999e-05, "loss": 0.4855, "step": 6100 }, { "epoch": 0.5, "grad_norm": 0.8535857129649261, "learning_rate": 1.0620863934857764e-05, "loss": 0.5551, "step": 6101 }, { "epoch": 0.5, "grad_norm": 0.816157477591498, "learning_rate": 1.0618236519520219e-05, "loss": 0.501, "step": 6102 }, { "epoch": 0.5, "grad_norm": 1.0310068024642354, "learning_rate": 1.0615609061339431e-05, "loss": 0.6114, "step": 6103 }, { "epoch": 0.5, "grad_norm": 0.9423701264828378, "learning_rate": 1.061298156049748e-05, "loss": 0.5538, "step": 6104 }, { "epoch": 0.5, "grad_norm": 0.8732552391809214, "learning_rate": 1.061035401717645e-05, "loss": 0.5382, "step": 6105 }, { "epoch": 0.5, "grad_norm": 0.8797775091115982, "learning_rate": 1.0607726431558431e-05, "loss": 0.5089, "step": 6106 }, { "epoch": 0.5, "grad_norm": 0.9400596223756281, "learning_rate": 1.060509880382551e-05, "loss": 0.5566, "step": 6107 }, { "epoch": 0.5, "grad_norm": 0.8467700720411315, "learning_rate": 1.0602471134159773e-05, "loss": 0.5317, "step": 6108 }, { "epoch": 0.5, "grad_norm": 0.822881476771488, "learning_rate": 1.0599843422743328e-05, "loss": 0.505, "step": 6109 }, { "epoch": 0.5, "grad_norm": 0.945363657168463, "learning_rate": 1.059721566975826e-05, "loss": 0.5576, "step": 6110 }, { "epoch": 0.5, "grad_norm": 0.8550973491019075, "learning_rate": 1.0594587875386677e-05, "loss": 0.4685, "step": 6111 }, { "epoch": 0.5, "grad_norm": 0.8978924311350094, "learning_rate": 1.0591960039810684e-05, "loss": 0.5041, "step": 6112 }, { "epoch": 0.5, "grad_norm": 0.9659817672441282, "learning_rate": 1.0589332163212384e-05, "loss": 0.5497, "step": 6113 }, { "epoch": 0.5, "grad_norm": 0.9426550282970327, "learning_rate": 1.0586704245773886e-05, "loss": 0.5407, "step": 6114 }, { "epoch": 0.5, "grad_norm": 0.9234953615378141, "learning_rate": 1.0584076287677307e-05, "loss": 0.6009, "step": 6115 }, { "epoch": 0.5, "grad_norm": 0.8702469617913521, "learning_rate": 1.0581448289104759e-05, "loss": 0.493, "step": 6116 }, { "epoch": 0.5, "grad_norm": 0.9994597487559939, "learning_rate": 1.057882025023836e-05, "loss": 0.5883, "step": 6117 }, { "epoch": 0.5, "grad_norm": 0.918411318404071, "learning_rate": 1.0576192171260228e-05, "loss": 0.5317, "step": 6118 }, { "epoch": 0.5, "grad_norm": 0.9373647802629542, "learning_rate": 1.0573564052352496e-05, "loss": 0.5399, "step": 6119 }, { "epoch": 0.5, "grad_norm": 0.8050241018619839, "learning_rate": 1.0570935893697278e-05, "loss": 0.4442, "step": 6120 }, { "epoch": 0.5, "grad_norm": 0.9022679662284697, "learning_rate": 1.0568307695476712e-05, "loss": 0.5033, "step": 6121 }, { "epoch": 0.5, "grad_norm": 0.8846901204793555, "learning_rate": 1.0565679457872928e-05, "loss": 0.5182, "step": 6122 }, { "epoch": 0.5, "grad_norm": 1.0059532477165203, "learning_rate": 1.0563051181068056e-05, "loss": 0.5464, "step": 6123 }, { "epoch": 0.5, "grad_norm": 0.9327245880379895, "learning_rate": 1.0560422865244237e-05, "loss": 0.5133, "step": 6124 }, { "epoch": 0.5, "grad_norm": 0.8496071103162126, "learning_rate": 1.0557794510583611e-05, "loss": 0.4938, "step": 6125 }, { "epoch": 0.5, "grad_norm": 0.8298240451208877, "learning_rate": 1.0555166117268322e-05, "loss": 0.5385, "step": 6126 }, { "epoch": 0.5, "grad_norm": 0.934187978424719, "learning_rate": 1.0552537685480512e-05, "loss": 0.5143, "step": 6127 }, { "epoch": 0.5, "grad_norm": 0.8674548058152174, "learning_rate": 1.054990921540233e-05, "loss": 0.5794, "step": 6128 }, { "epoch": 0.5, "grad_norm": 1.0370693329998053, "learning_rate": 1.054728070721593e-05, "loss": 0.5977, "step": 6129 }, { "epoch": 0.5, "grad_norm": 0.8819244109215885, "learning_rate": 1.0544652161103459e-05, "loss": 0.5054, "step": 6130 }, { "epoch": 0.5, "grad_norm": 0.8960578144446292, "learning_rate": 1.0542023577247076e-05, "loss": 0.545, "step": 6131 }, { "epoch": 0.5, "grad_norm": 0.886580731545749, "learning_rate": 1.0539394955828944e-05, "loss": 0.5429, "step": 6132 }, { "epoch": 0.5, "grad_norm": 0.9374761540391363, "learning_rate": 1.0536766297031216e-05, "loss": 0.6374, "step": 6133 }, { "epoch": 0.5, "grad_norm": 0.9103004242142594, "learning_rate": 1.053413760103606e-05, "loss": 0.5159, "step": 6134 }, { "epoch": 0.5, "grad_norm": 0.8968055272655576, "learning_rate": 1.0531508868025647e-05, "loss": 0.5283, "step": 6135 }, { "epoch": 0.5, "grad_norm": 0.970055594503045, "learning_rate": 1.0528880098182136e-05, "loss": 0.5435, "step": 6136 }, { "epoch": 0.5, "grad_norm": 0.9389369412572518, "learning_rate": 1.0526251291687703e-05, "loss": 0.5747, "step": 6137 }, { "epoch": 0.5, "grad_norm": 0.8078187197158875, "learning_rate": 1.0523622448724524e-05, "loss": 0.5405, "step": 6138 }, { "epoch": 0.5, "grad_norm": 0.9190980092900863, "learning_rate": 1.0520993569474773e-05, "loss": 0.5321, "step": 6139 }, { "epoch": 0.5, "grad_norm": 0.958215322494185, "learning_rate": 1.051836465412063e-05, "loss": 0.5699, "step": 6140 }, { "epoch": 0.5, "grad_norm": 0.8721689328952731, "learning_rate": 1.051573570284428e-05, "loss": 0.6109, "step": 6141 }, { "epoch": 0.5, "grad_norm": 1.0429424389942195, "learning_rate": 1.0513106715827897e-05, "loss": 0.5949, "step": 6142 }, { "epoch": 0.5, "grad_norm": 0.8826173376580875, "learning_rate": 1.0510477693253676e-05, "loss": 0.4822, "step": 6143 }, { "epoch": 0.5, "grad_norm": 0.9558074179782857, "learning_rate": 1.0507848635303805e-05, "loss": 0.5409, "step": 6144 }, { "epoch": 0.5, "grad_norm": 1.0047521941779742, "learning_rate": 1.0505219542160474e-05, "loss": 0.5804, "step": 6145 }, { "epoch": 0.5, "grad_norm": 1.0319590843517332, "learning_rate": 1.0502590414005875e-05, "loss": 0.5394, "step": 6146 }, { "epoch": 0.5, "grad_norm": 0.8462689043839046, "learning_rate": 1.0499961251022208e-05, "loss": 0.5235, "step": 6147 }, { "epoch": 0.5, "grad_norm": 0.9645087832836281, "learning_rate": 1.049733205339167e-05, "loss": 0.5744, "step": 6148 }, { "epoch": 0.5, "grad_norm": 0.9931263205618819, "learning_rate": 1.0494702821296458e-05, "loss": 0.5615, "step": 6149 }, { "epoch": 0.5, "grad_norm": 0.8977359164608016, "learning_rate": 1.0492073554918782e-05, "loss": 0.5124, "step": 6150 }, { "epoch": 0.5, "grad_norm": 0.9580917800214114, "learning_rate": 1.0489444254440846e-05, "loss": 0.556, "step": 6151 }, { "epoch": 0.5, "grad_norm": 0.9868313720168621, "learning_rate": 1.0486814920044857e-05, "loss": 0.6069, "step": 6152 }, { "epoch": 0.5, "grad_norm": 0.9786912759940932, "learning_rate": 1.0484185551913027e-05, "loss": 0.5963, "step": 6153 }, { "epoch": 0.5, "grad_norm": 0.8802270262465605, "learning_rate": 1.0481556150227562e-05, "loss": 0.4737, "step": 6154 }, { "epoch": 0.5, "grad_norm": 0.9904658437694336, "learning_rate": 1.0478926715170687e-05, "loss": 0.5483, "step": 6155 }, { "epoch": 0.5, "grad_norm": 0.922169756232924, "learning_rate": 1.0476297246924619e-05, "loss": 0.5358, "step": 6156 }, { "epoch": 0.5, "grad_norm": 0.9532812373990939, "learning_rate": 1.047366774567157e-05, "loss": 0.4799, "step": 6157 }, { "epoch": 0.5, "grad_norm": 0.9210650459076102, "learning_rate": 1.0471038211593764e-05, "loss": 0.5301, "step": 6158 }, { "epoch": 0.5, "grad_norm": 0.7225895551992169, "learning_rate": 1.0468408644873433e-05, "loss": 0.4377, "step": 6159 }, { "epoch": 0.5, "grad_norm": 0.9545247474276838, "learning_rate": 1.0465779045692796e-05, "loss": 0.564, "step": 6160 }, { "epoch": 0.5, "grad_norm": 0.9428918239243667, "learning_rate": 1.0463149414234084e-05, "loss": 0.5187, "step": 6161 }, { "epoch": 0.5, "grad_norm": 0.9060269955149981, "learning_rate": 1.046051975067953e-05, "loss": 0.5088, "step": 6162 }, { "epoch": 0.5, "grad_norm": 0.9264511973693403, "learning_rate": 1.0457890055211364e-05, "loss": 0.5766, "step": 6163 }, { "epoch": 0.5, "grad_norm": 0.899604789095249, "learning_rate": 1.0455260328011822e-05, "loss": 0.516, "step": 6164 }, { "epoch": 0.5, "grad_norm": 0.9773994581290497, "learning_rate": 1.0452630569263147e-05, "loss": 0.6503, "step": 6165 }, { "epoch": 0.5, "grad_norm": 0.9720121111533556, "learning_rate": 1.0450000779147573e-05, "loss": 0.5527, "step": 6166 }, { "epoch": 0.5, "grad_norm": 0.9737789980852884, "learning_rate": 1.0447370957847343e-05, "loss": 0.5923, "step": 6167 }, { "epoch": 0.5, "grad_norm": 0.8662034958602722, "learning_rate": 1.0444741105544705e-05, "loss": 0.5748, "step": 6168 }, { "epoch": 0.5, "grad_norm": 0.9015025314707872, "learning_rate": 1.04421112224219e-05, "loss": 0.5418, "step": 6169 }, { "epoch": 0.5, "grad_norm": 0.8663857088672641, "learning_rate": 1.0439481308661181e-05, "loss": 0.515, "step": 6170 }, { "epoch": 0.5, "grad_norm": 0.9607331905556366, "learning_rate": 1.0436851364444798e-05, "loss": 0.5785, "step": 6171 }, { "epoch": 0.5, "grad_norm": 0.8698730300041885, "learning_rate": 1.0434221389955002e-05, "loss": 0.4768, "step": 6172 }, { "epoch": 0.5, "grad_norm": 0.9302907035593343, "learning_rate": 1.0431591385374047e-05, "loss": 0.5422, "step": 6173 }, { "epoch": 0.5, "grad_norm": 0.8590944033288993, "learning_rate": 1.0428961350884194e-05, "loss": 0.5405, "step": 6174 }, { "epoch": 0.5, "grad_norm": 0.8604175708088144, "learning_rate": 1.0426331286667701e-05, "loss": 0.5365, "step": 6175 }, { "epoch": 0.5, "grad_norm": 0.8882444903167186, "learning_rate": 1.0423701192906825e-05, "loss": 0.5045, "step": 6176 }, { "epoch": 0.5, "grad_norm": 0.8505136672359528, "learning_rate": 1.0421071069783834e-05, "loss": 0.4987, "step": 6177 }, { "epoch": 0.5, "grad_norm": 0.8571941770141829, "learning_rate": 1.0418440917480992e-05, "loss": 0.5007, "step": 6178 }, { "epoch": 0.5, "grad_norm": 0.9102254227045845, "learning_rate": 1.0415810736180563e-05, "loss": 0.5232, "step": 6179 }, { "epoch": 0.5, "grad_norm": 0.7864199735738805, "learning_rate": 1.0413180526064824e-05, "loss": 0.4712, "step": 6180 }, { "epoch": 0.5, "grad_norm": 0.8072441676513875, "learning_rate": 1.0410550287316035e-05, "loss": 0.5144, "step": 6181 }, { "epoch": 0.5, "grad_norm": 0.850410157333761, "learning_rate": 1.0407920020116477e-05, "loss": 0.5598, "step": 6182 }, { "epoch": 0.5, "grad_norm": 0.9830887682458233, "learning_rate": 1.0405289724648425e-05, "loss": 0.5918, "step": 6183 }, { "epoch": 0.5, "grad_norm": 0.9114690900097432, "learning_rate": 1.0402659401094154e-05, "loss": 0.525, "step": 6184 }, { "epoch": 0.5, "grad_norm": 0.8534813523391873, "learning_rate": 1.0400029049635942e-05, "loss": 0.4923, "step": 6185 }, { "epoch": 0.5, "grad_norm": 0.877842763272256, "learning_rate": 1.039739867045607e-05, "loss": 0.5255, "step": 6186 }, { "epoch": 0.5, "grad_norm": 1.0236133388921236, "learning_rate": 1.039476826373683e-05, "loss": 0.5916, "step": 6187 }, { "epoch": 0.5, "grad_norm": 0.8590065023812046, "learning_rate": 1.0392137829660494e-05, "loss": 0.4965, "step": 6188 }, { "epoch": 0.5, "grad_norm": 0.8639027295230077, "learning_rate": 1.0389507368409356e-05, "loss": 0.5561, "step": 6189 }, { "epoch": 0.5, "grad_norm": 0.8836484623587807, "learning_rate": 1.0386876880165701e-05, "loss": 0.5385, "step": 6190 }, { "epoch": 0.5, "grad_norm": 0.8632103809318213, "learning_rate": 1.0384246365111823e-05, "loss": 0.5109, "step": 6191 }, { "epoch": 0.5, "grad_norm": 0.9279412725843376, "learning_rate": 1.0381615823430012e-05, "loss": 0.4993, "step": 6192 }, { "epoch": 0.5, "grad_norm": 0.8684010140359392, "learning_rate": 1.0378985255302565e-05, "loss": 0.5207, "step": 6193 }, { "epoch": 0.5, "grad_norm": 0.8880388535422927, "learning_rate": 1.0376354660911772e-05, "loss": 0.5221, "step": 6194 }, { "epoch": 0.5, "grad_norm": 1.0301038902267186, "learning_rate": 1.0373724040439936e-05, "loss": 0.5964, "step": 6195 }, { "epoch": 0.5, "grad_norm": 0.7284968100578956, "learning_rate": 1.0371093394069359e-05, "loss": 0.4624, "step": 6196 }, { "epoch": 0.5, "grad_norm": 0.859546061851474, "learning_rate": 1.0368462721982336e-05, "loss": 0.4492, "step": 6197 }, { "epoch": 0.5, "grad_norm": 0.8540284446590735, "learning_rate": 1.0365832024361173e-05, "loss": 0.5058, "step": 6198 }, { "epoch": 0.5, "grad_norm": 0.9311871787223283, "learning_rate": 1.0363201301388177e-05, "loss": 0.5237, "step": 6199 }, { "epoch": 0.5, "grad_norm": 0.8783458652186021, "learning_rate": 1.036057055324565e-05, "loss": 0.5155, "step": 6200 }, { "epoch": 0.5, "grad_norm": 0.9133086616533669, "learning_rate": 1.0357939780115906e-05, "loss": 0.5134, "step": 6201 }, { "epoch": 0.5, "grad_norm": 0.93527568205224, "learning_rate": 1.0355308982181254e-05, "loss": 0.5286, "step": 6202 }, { "epoch": 0.5, "grad_norm": 0.9748174070640031, "learning_rate": 1.0352678159624e-05, "loss": 0.4804, "step": 6203 }, { "epoch": 0.5, "grad_norm": 0.8304324868783131, "learning_rate": 1.0350047312626465e-05, "loss": 0.5307, "step": 6204 }, { "epoch": 0.5, "grad_norm": 0.737183332887448, "learning_rate": 1.0347416441370963e-05, "loss": 0.4458, "step": 6205 }, { "epoch": 0.5, "grad_norm": 0.8193594305614337, "learning_rate": 1.0344785546039808e-05, "loss": 0.4981, "step": 6206 }, { "epoch": 0.5, "grad_norm": 0.9325176197238431, "learning_rate": 1.0342154626815321e-05, "loss": 0.5829, "step": 6207 }, { "epoch": 0.5, "grad_norm": 0.9374309225936001, "learning_rate": 1.0339523683879824e-05, "loss": 0.5537, "step": 6208 }, { "epoch": 0.5, "grad_norm": 0.8479987941359138, "learning_rate": 1.0336892717415635e-05, "loss": 0.5356, "step": 6209 }, { "epoch": 0.5, "grad_norm": 0.7980171217781268, "learning_rate": 1.0334261727605076e-05, "loss": 0.5199, "step": 6210 }, { "epoch": 0.5, "grad_norm": 0.8531021371446437, "learning_rate": 1.0331630714630481e-05, "loss": 0.5068, "step": 6211 }, { "epoch": 0.5, "grad_norm": 0.8625990920480998, "learning_rate": 1.032899967867417e-05, "loss": 0.4964, "step": 6212 }, { "epoch": 0.5, "grad_norm": 0.9672841283305325, "learning_rate": 1.0326368619918471e-05, "loss": 0.5782, "step": 6213 }, { "epoch": 0.51, "grad_norm": 1.0735602156729935, "learning_rate": 1.032373753854572e-05, "loss": 0.5248, "step": 6214 }, { "epoch": 0.51, "grad_norm": 0.9113137478830905, "learning_rate": 1.0321106434738242e-05, "loss": 0.569, "step": 6215 }, { "epoch": 0.51, "grad_norm": 0.811414298943247, "learning_rate": 1.0318475308678374e-05, "loss": 0.5266, "step": 6216 }, { "epoch": 0.51, "grad_norm": 0.9225312761086015, "learning_rate": 1.031584416054845e-05, "loss": 0.5044, "step": 6217 }, { "epoch": 0.51, "grad_norm": 0.8248517090127305, "learning_rate": 1.0313212990530804e-05, "loss": 0.4785, "step": 6218 }, { "epoch": 0.51, "grad_norm": 0.9024948796769673, "learning_rate": 1.0310581798807776e-05, "loss": 0.5658, "step": 6219 }, { "epoch": 0.51, "grad_norm": 0.9641619822749372, "learning_rate": 1.0307950585561705e-05, "loss": 0.5675, "step": 6220 }, { "epoch": 0.51, "grad_norm": 0.8721024784016015, "learning_rate": 1.0305319350974932e-05, "loss": 0.5326, "step": 6221 }, { "epoch": 0.51, "grad_norm": 0.9505797840996292, "learning_rate": 1.0302688095229798e-05, "loss": 0.5678, "step": 6222 }, { "epoch": 0.51, "grad_norm": 0.8774987959531362, "learning_rate": 1.030005681850865e-05, "loss": 0.5067, "step": 6223 }, { "epoch": 0.51, "grad_norm": 0.9024372804219885, "learning_rate": 1.0297425520993829e-05, "loss": 0.5454, "step": 6224 }, { "epoch": 0.51, "grad_norm": 0.9296782390888202, "learning_rate": 1.0294794202867681e-05, "loss": 0.5238, "step": 6225 }, { "epoch": 0.51, "grad_norm": 0.8898639473811754, "learning_rate": 1.029216286431256e-05, "loss": 0.5284, "step": 6226 }, { "epoch": 0.51, "grad_norm": 0.9559196004726771, "learning_rate": 1.028953150551081e-05, "loss": 0.606, "step": 6227 }, { "epoch": 0.51, "grad_norm": 0.9758923851832058, "learning_rate": 1.0286900126644783e-05, "loss": 0.6068, "step": 6228 }, { "epoch": 0.51, "grad_norm": 0.8429290545153291, "learning_rate": 1.0284268727896833e-05, "loss": 0.5196, "step": 6229 }, { "epoch": 0.51, "grad_norm": 0.9241137844044234, "learning_rate": 1.028163730944931e-05, "loss": 0.5021, "step": 6230 }, { "epoch": 0.51, "grad_norm": 0.8844502814224067, "learning_rate": 1.0279005871484572e-05, "loss": 0.5981, "step": 6231 }, { "epoch": 0.51, "grad_norm": 0.7940233993428926, "learning_rate": 1.0276374414184977e-05, "loss": 0.4836, "step": 6232 }, { "epoch": 0.51, "grad_norm": 0.8540121985206928, "learning_rate": 1.0273742937732877e-05, "loss": 0.5212, "step": 6233 }, { "epoch": 0.51, "grad_norm": 0.8411679107015873, "learning_rate": 1.0271111442310638e-05, "loss": 0.4643, "step": 6234 }, { "epoch": 0.51, "grad_norm": 0.843190016884267, "learning_rate": 1.0268479928100615e-05, "loss": 0.4889, "step": 6235 }, { "epoch": 0.51, "grad_norm": 0.9298687033475067, "learning_rate": 1.026584839528517e-05, "loss": 0.4891, "step": 6236 }, { "epoch": 0.51, "grad_norm": 0.8314982900576823, "learning_rate": 1.0263216844046666e-05, "loss": 0.5256, "step": 6237 }, { "epoch": 0.51, "grad_norm": 0.9635576321688502, "learning_rate": 1.026058527456747e-05, "loss": 0.5927, "step": 6238 }, { "epoch": 0.51, "grad_norm": 0.9399777954113789, "learning_rate": 1.0257953687029945e-05, "loss": 0.5366, "step": 6239 }, { "epoch": 0.51, "grad_norm": 0.9062288398497633, "learning_rate": 1.0255322081616456e-05, "loss": 0.4792, "step": 6240 }, { "epoch": 0.51, "grad_norm": 0.9000858459834027, "learning_rate": 1.025269045850938e-05, "loss": 0.4799, "step": 6241 }, { "epoch": 0.51, "grad_norm": 0.9559802820060481, "learning_rate": 1.0250058817891074e-05, "loss": 0.5548, "step": 6242 }, { "epoch": 0.51, "grad_norm": 0.9062852807311451, "learning_rate": 1.0247427159943912e-05, "loss": 0.4895, "step": 6243 }, { "epoch": 0.51, "grad_norm": 0.9464049844786903, "learning_rate": 1.0244795484850272e-05, "loss": 0.5534, "step": 6244 }, { "epoch": 0.51, "grad_norm": 0.8847921545422844, "learning_rate": 1.024216379279252e-05, "loss": 0.5079, "step": 6245 }, { "epoch": 0.51, "grad_norm": 0.8978136301515469, "learning_rate": 1.0239532083953032e-05, "loss": 0.5608, "step": 6246 }, { "epoch": 0.51, "grad_norm": 0.9463582730856492, "learning_rate": 1.0236900358514181e-05, "loss": 0.5277, "step": 6247 }, { "epoch": 0.51, "grad_norm": 0.8067583675753313, "learning_rate": 1.023426861665835e-05, "loss": 0.4961, "step": 6248 }, { "epoch": 0.51, "grad_norm": 0.8883255946762623, "learning_rate": 1.0231636858567909e-05, "loss": 0.5654, "step": 6249 }, { "epoch": 0.51, "grad_norm": 0.9190681551860245, "learning_rate": 1.022900508442524e-05, "loss": 0.562, "step": 6250 }, { "epoch": 0.51, "grad_norm": 0.9494150603533084, "learning_rate": 1.0226373294412718e-05, "loss": 0.5676, "step": 6251 }, { "epoch": 0.51, "grad_norm": 0.9061517922527588, "learning_rate": 1.0223741488712732e-05, "loss": 0.5428, "step": 6252 }, { "epoch": 0.51, "grad_norm": 0.9275782262305359, "learning_rate": 1.0221109667507656e-05, "loss": 0.5184, "step": 6253 }, { "epoch": 0.51, "grad_norm": 0.9189579629915223, "learning_rate": 1.0218477830979878e-05, "loss": 0.5439, "step": 6254 }, { "epoch": 0.51, "grad_norm": 0.9182833448414004, "learning_rate": 1.0215845979311783e-05, "loss": 0.5327, "step": 6255 }, { "epoch": 0.51, "grad_norm": 0.8902344636216859, "learning_rate": 1.0213214112685747e-05, "loss": 0.4828, "step": 6256 }, { "epoch": 0.51, "grad_norm": 0.897277648623057, "learning_rate": 1.0210582231284165e-05, "loss": 0.5015, "step": 6257 }, { "epoch": 0.51, "grad_norm": 0.9182502828960334, "learning_rate": 1.0207950335289423e-05, "loss": 0.552, "step": 6258 }, { "epoch": 0.51, "grad_norm": 0.885910996981317, "learning_rate": 1.0205318424883906e-05, "loss": 0.4874, "step": 6259 }, { "epoch": 0.51, "grad_norm": 0.922634853593885, "learning_rate": 1.0202686500250003e-05, "loss": 0.529, "step": 6260 }, { "epoch": 0.51, "grad_norm": 0.8720759553847209, "learning_rate": 1.0200054561570108e-05, "loss": 0.5301, "step": 6261 }, { "epoch": 0.51, "grad_norm": 0.9239527432654803, "learning_rate": 1.0197422609026606e-05, "loss": 0.5539, "step": 6262 }, { "epoch": 0.51, "grad_norm": 0.8890671690015547, "learning_rate": 1.0194790642801893e-05, "loss": 0.5476, "step": 6263 }, { "epoch": 0.51, "grad_norm": 0.8569505481042301, "learning_rate": 1.0192158663078362e-05, "loss": 0.5216, "step": 6264 }, { "epoch": 0.51, "grad_norm": 0.8738179518705772, "learning_rate": 1.0189526670038407e-05, "loss": 0.5254, "step": 6265 }, { "epoch": 0.51, "grad_norm": 0.8517879706535724, "learning_rate": 1.0186894663864421e-05, "loss": 0.5613, "step": 6266 }, { "epoch": 0.51, "grad_norm": 0.9467237412597023, "learning_rate": 1.01842626447388e-05, "loss": 0.565, "step": 6267 }, { "epoch": 0.51, "grad_norm": 0.9372933446510155, "learning_rate": 1.0181630612843943e-05, "loss": 0.5869, "step": 6268 }, { "epoch": 0.51, "grad_norm": 0.8379813367737269, "learning_rate": 1.0178998568362243e-05, "loss": 0.4953, "step": 6269 }, { "epoch": 0.51, "grad_norm": 0.8807616083485671, "learning_rate": 1.0176366511476102e-05, "loss": 0.5605, "step": 6270 }, { "epoch": 0.51, "grad_norm": 0.909667415705031, "learning_rate": 1.0173734442367919e-05, "loss": 0.5494, "step": 6271 }, { "epoch": 0.51, "grad_norm": 0.9180665855259821, "learning_rate": 1.0171102361220093e-05, "loss": 0.5776, "step": 6272 }, { "epoch": 0.51, "grad_norm": 0.8866368103793839, "learning_rate": 1.0168470268215025e-05, "loss": 0.5162, "step": 6273 }, { "epoch": 0.51, "grad_norm": 0.9331207432565604, "learning_rate": 1.0165838163535115e-05, "loss": 0.5799, "step": 6274 }, { "epoch": 0.51, "grad_norm": 0.9299756848302255, "learning_rate": 1.0163206047362773e-05, "loss": 0.5631, "step": 6275 }, { "epoch": 0.51, "grad_norm": 0.9153501532529058, "learning_rate": 1.016057391988039e-05, "loss": 0.5746, "step": 6276 }, { "epoch": 0.51, "grad_norm": 0.9106064921432052, "learning_rate": 1.015794178127038e-05, "loss": 0.4931, "step": 6277 }, { "epoch": 0.51, "grad_norm": 0.8583263808738999, "learning_rate": 1.0155309631715145e-05, "loss": 0.5349, "step": 6278 }, { "epoch": 0.51, "grad_norm": 0.9011920327335722, "learning_rate": 1.015267747139709e-05, "loss": 0.5075, "step": 6279 }, { "epoch": 0.51, "grad_norm": 0.8793178650201761, "learning_rate": 1.0150045300498618e-05, "loss": 0.5156, "step": 6280 }, { "epoch": 0.51, "grad_norm": 0.8115799752291708, "learning_rate": 1.0147413119202145e-05, "loss": 0.4846, "step": 6281 }, { "epoch": 0.51, "grad_norm": 0.9177489139524344, "learning_rate": 1.0144780927690072e-05, "loss": 0.5457, "step": 6282 }, { "epoch": 0.51, "grad_norm": 0.8929122588141614, "learning_rate": 1.0142148726144807e-05, "loss": 0.5309, "step": 6283 }, { "epoch": 0.51, "grad_norm": 0.804018811198706, "learning_rate": 1.0139516514748767e-05, "loss": 0.4608, "step": 6284 }, { "epoch": 0.51, "grad_norm": 0.9347609164966086, "learning_rate": 1.013688429368435e-05, "loss": 0.4832, "step": 6285 }, { "epoch": 0.51, "grad_norm": 0.9217079376541866, "learning_rate": 1.0134252063133976e-05, "loss": 0.5749, "step": 6286 }, { "epoch": 0.51, "grad_norm": 0.8506565309327903, "learning_rate": 1.0131619823280053e-05, "loss": 0.5394, "step": 6287 }, { "epoch": 0.51, "grad_norm": 0.8415600271925963, "learning_rate": 1.0128987574304991e-05, "loss": 0.5208, "step": 6288 }, { "epoch": 0.51, "grad_norm": 0.8758726806842629, "learning_rate": 1.0126355316391206e-05, "loss": 0.5892, "step": 6289 }, { "epoch": 0.51, "grad_norm": 0.9039543673993381, "learning_rate": 1.012372304972111e-05, "loss": 0.572, "step": 6290 }, { "epoch": 0.51, "grad_norm": 0.8837831665094975, "learning_rate": 1.0121090774477116e-05, "loss": 0.5164, "step": 6291 }, { "epoch": 0.51, "grad_norm": 0.7969372582312041, "learning_rate": 1.0118458490841639e-05, "loss": 0.4687, "step": 6292 }, { "epoch": 0.51, "grad_norm": 0.9144472654901419, "learning_rate": 1.0115826198997094e-05, "loss": 0.4895, "step": 6293 }, { "epoch": 0.51, "grad_norm": 0.8546462905815698, "learning_rate": 1.0113193899125895e-05, "loss": 0.4768, "step": 6294 }, { "epoch": 0.51, "grad_norm": 0.8571567901864389, "learning_rate": 1.0110561591410456e-05, "loss": 0.5441, "step": 6295 }, { "epoch": 0.51, "grad_norm": 0.8062069436489295, "learning_rate": 1.0107929276033204e-05, "loss": 0.4905, "step": 6296 }, { "epoch": 0.51, "grad_norm": 0.8690171443782525, "learning_rate": 1.0105296953176544e-05, "loss": 0.4848, "step": 6297 }, { "epoch": 0.51, "grad_norm": 0.8929739364675837, "learning_rate": 1.01026646230229e-05, "loss": 0.5132, "step": 6298 }, { "epoch": 0.51, "grad_norm": 0.8985021936347624, "learning_rate": 1.010003228575469e-05, "loss": 0.5021, "step": 6299 }, { "epoch": 0.51, "grad_norm": 0.8734985699165543, "learning_rate": 1.009739994155433e-05, "loss": 0.4902, "step": 6300 }, { "epoch": 0.51, "grad_norm": 0.9145764779595406, "learning_rate": 1.0094767590604238e-05, "loss": 0.5366, "step": 6301 }, { "epoch": 0.51, "grad_norm": 0.938111655768843, "learning_rate": 1.009213523308684e-05, "loss": 0.5352, "step": 6302 }, { "epoch": 0.51, "grad_norm": 0.9187062301830011, "learning_rate": 1.0089502869184549e-05, "loss": 0.5064, "step": 6303 }, { "epoch": 0.51, "grad_norm": 0.8909106006968533, "learning_rate": 1.0086870499079791e-05, "loss": 0.526, "step": 6304 }, { "epoch": 0.51, "grad_norm": 0.9891112088673406, "learning_rate": 1.0084238122954984e-05, "loss": 0.4361, "step": 6305 }, { "epoch": 0.51, "grad_norm": 0.9986087397097535, "learning_rate": 1.0081605740992548e-05, "loss": 0.5481, "step": 6306 }, { "epoch": 0.51, "grad_norm": 0.9478586520651313, "learning_rate": 1.0078973353374908e-05, "loss": 0.5567, "step": 6307 }, { "epoch": 0.51, "grad_norm": 0.889552514423573, "learning_rate": 1.0076340960284483e-05, "loss": 0.5208, "step": 6308 }, { "epoch": 0.51, "grad_norm": 0.8643499230620482, "learning_rate": 1.0073708561903702e-05, "loss": 0.5414, "step": 6309 }, { "epoch": 0.51, "grad_norm": 0.976399247172449, "learning_rate": 1.0071076158414977e-05, "loss": 0.6492, "step": 6310 }, { "epoch": 0.51, "grad_norm": 0.9571144105482331, "learning_rate": 1.006844375000074e-05, "loss": 0.6006, "step": 6311 }, { "epoch": 0.51, "grad_norm": 0.8677112699372111, "learning_rate": 1.0065811336843412e-05, "loss": 0.5269, "step": 6312 }, { "epoch": 0.51, "grad_norm": 0.8705189403238094, "learning_rate": 1.0063178919125416e-05, "loss": 0.5816, "step": 6313 }, { "epoch": 0.51, "grad_norm": 0.936745039273025, "learning_rate": 1.0060546497029178e-05, "loss": 0.519, "step": 6314 }, { "epoch": 0.51, "grad_norm": 1.2267196954416242, "learning_rate": 1.0057914070737123e-05, "loss": 0.6564, "step": 6315 }, { "epoch": 0.51, "grad_norm": 0.8408749871775812, "learning_rate": 1.0055281640431669e-05, "loss": 0.5499, "step": 6316 }, { "epoch": 0.51, "grad_norm": 0.9485173287734637, "learning_rate": 1.005264920629525e-05, "loss": 0.5244, "step": 6317 }, { "epoch": 0.51, "grad_norm": 0.8744806225444277, "learning_rate": 1.0050016768510288e-05, "loss": 0.4819, "step": 6318 }, { "epoch": 0.51, "grad_norm": 0.9837730314596066, "learning_rate": 1.0047384327259207e-05, "loss": 0.5324, "step": 6319 }, { "epoch": 0.51, "grad_norm": 0.9897169442511425, "learning_rate": 1.0044751882724436e-05, "loss": 0.5116, "step": 6320 }, { "epoch": 0.51, "grad_norm": 0.8355025962859695, "learning_rate": 1.0042119435088397e-05, "loss": 0.5085, "step": 6321 }, { "epoch": 0.51, "grad_norm": 0.8822374522103449, "learning_rate": 1.003948698453352e-05, "loss": 0.5093, "step": 6322 }, { "epoch": 0.51, "grad_norm": 0.8868032716019236, "learning_rate": 1.0036854531242228e-05, "loss": 0.5216, "step": 6323 }, { "epoch": 0.51, "grad_norm": 1.0298087875025894, "learning_rate": 1.0034222075396954e-05, "loss": 0.5794, "step": 6324 }, { "epoch": 0.51, "grad_norm": 0.859260370043238, "learning_rate": 1.0031589617180115e-05, "loss": 0.4686, "step": 6325 }, { "epoch": 0.51, "grad_norm": 0.8158325936763585, "learning_rate": 1.0028957156774146e-05, "loss": 0.4922, "step": 6326 }, { "epoch": 0.51, "grad_norm": 0.9242530488088277, "learning_rate": 1.0026324694361474e-05, "loss": 0.5227, "step": 6327 }, { "epoch": 0.51, "grad_norm": 1.0280646267210969, "learning_rate": 1.002369223012452e-05, "loss": 0.5422, "step": 6328 }, { "epoch": 0.51, "grad_norm": 0.848472981714665, "learning_rate": 1.0021059764245718e-05, "loss": 0.514, "step": 6329 }, { "epoch": 0.51, "grad_norm": 0.8569811344917727, "learning_rate": 1.0018427296907494e-05, "loss": 0.5174, "step": 6330 }, { "epoch": 0.51, "grad_norm": 1.042013096707966, "learning_rate": 1.001579482829227e-05, "loss": 0.5577, "step": 6331 }, { "epoch": 0.51, "grad_norm": 1.0091968409285097, "learning_rate": 1.0013162358582483e-05, "loss": 0.5897, "step": 6332 }, { "epoch": 0.51, "grad_norm": 0.8723519274260123, "learning_rate": 1.0010529887960554e-05, "loss": 0.4901, "step": 6333 }, { "epoch": 0.51, "grad_norm": 0.9537740574635022, "learning_rate": 1.0007897416608914e-05, "loss": 0.5724, "step": 6334 }, { "epoch": 0.51, "grad_norm": 0.9789588220022455, "learning_rate": 1.0005264944709989e-05, "loss": 0.5557, "step": 6335 }, { "epoch": 0.51, "grad_norm": 0.9737723535686139, "learning_rate": 1.000263247244621e-05, "loss": 0.5868, "step": 6336 }, { "epoch": 0.52, "grad_norm": 0.9126233242598474, "learning_rate": 1e-05, "loss": 0.5436, "step": 6337 }, { "epoch": 0.52, "grad_norm": 0.9065250250113098, "learning_rate": 9.997367527553795e-06, "loss": 0.5473, "step": 6338 }, { "epoch": 0.52, "grad_norm": 0.7824478859233489, "learning_rate": 9.994735055290011e-06, "loss": 0.4108, "step": 6339 }, { "epoch": 0.52, "grad_norm": 0.9394386325341539, "learning_rate": 9.992102583391089e-06, "loss": 0.5323, "step": 6340 }, { "epoch": 0.52, "grad_norm": 0.9901071026908101, "learning_rate": 9.98947011203945e-06, "loss": 0.5428, "step": 6341 }, { "epoch": 0.52, "grad_norm": 0.8936489106322111, "learning_rate": 9.986837641417519e-06, "loss": 0.5059, "step": 6342 }, { "epoch": 0.52, "grad_norm": 0.8990739204643596, "learning_rate": 9.984205171707731e-06, "loss": 0.5022, "step": 6343 }, { "epoch": 0.52, "grad_norm": 0.9451398467900175, "learning_rate": 9.981572703092513e-06, "loss": 0.5639, "step": 6344 }, { "epoch": 0.52, "grad_norm": 0.9201894358093854, "learning_rate": 9.978940235754283e-06, "loss": 0.5315, "step": 6345 }, { "epoch": 0.52, "grad_norm": 0.9482135020604635, "learning_rate": 9.976307769875483e-06, "loss": 0.5399, "step": 6346 }, { "epoch": 0.52, "grad_norm": 0.9473339865184615, "learning_rate": 9.973675305638531e-06, "loss": 0.5061, "step": 6347 }, { "epoch": 0.52, "grad_norm": 0.9599685256552569, "learning_rate": 9.971042843225856e-06, "loss": 0.5604, "step": 6348 }, { "epoch": 0.52, "grad_norm": 0.8559541252886981, "learning_rate": 9.968410382819888e-06, "loss": 0.5574, "step": 6349 }, { "epoch": 0.52, "grad_norm": 0.8868070586988529, "learning_rate": 9.965777924603053e-06, "loss": 0.5052, "step": 6350 }, { "epoch": 0.52, "grad_norm": 0.9358125147616042, "learning_rate": 9.963145468757773e-06, "loss": 0.5475, "step": 6351 }, { "epoch": 0.52, "grad_norm": 0.9305033132415802, "learning_rate": 9.960513015466484e-06, "loss": 0.5354, "step": 6352 }, { "epoch": 0.52, "grad_norm": 0.9508925128715102, "learning_rate": 9.957880564911608e-06, "loss": 0.5395, "step": 6353 }, { "epoch": 0.52, "grad_norm": 0.8046306445228953, "learning_rate": 9.955248117275566e-06, "loss": 0.4776, "step": 6354 }, { "epoch": 0.52, "grad_norm": 0.8810642297736707, "learning_rate": 9.952615672740795e-06, "loss": 0.465, "step": 6355 }, { "epoch": 0.52, "grad_norm": 0.8271357839574274, "learning_rate": 9.949983231489717e-06, "loss": 0.5456, "step": 6356 }, { "epoch": 0.52, "grad_norm": 0.9932580719434206, "learning_rate": 9.947350793704751e-06, "loss": 0.5465, "step": 6357 }, { "epoch": 0.52, "grad_norm": 0.8911737003658028, "learning_rate": 9.944718359568333e-06, "loss": 0.4734, "step": 6358 }, { "epoch": 0.52, "grad_norm": 0.9414596037214545, "learning_rate": 9.942085929262884e-06, "loss": 0.5159, "step": 6359 }, { "epoch": 0.52, "grad_norm": 0.9720304123433685, "learning_rate": 9.939453502970824e-06, "loss": 0.5785, "step": 6360 }, { "epoch": 0.52, "grad_norm": 0.9146124769744249, "learning_rate": 9.936821080874587e-06, "loss": 0.5701, "step": 6361 }, { "epoch": 0.52, "grad_norm": 0.9294401348339756, "learning_rate": 9.934188663156592e-06, "loss": 0.5041, "step": 6362 }, { "epoch": 0.52, "grad_norm": 0.896786916401712, "learning_rate": 9.931556249999262e-06, "loss": 0.4878, "step": 6363 }, { "epoch": 0.52, "grad_norm": 0.9214512261006776, "learning_rate": 9.928923841585025e-06, "loss": 0.5577, "step": 6364 }, { "epoch": 0.52, "grad_norm": 0.952653505975468, "learning_rate": 9.926291438096305e-06, "loss": 0.5745, "step": 6365 }, { "epoch": 0.52, "grad_norm": 0.894555231533999, "learning_rate": 9.923659039715517e-06, "loss": 0.538, "step": 6366 }, { "epoch": 0.52, "grad_norm": 0.9779553837225547, "learning_rate": 9.921026646625094e-06, "loss": 0.5475, "step": 6367 }, { "epoch": 0.52, "grad_norm": 0.9825468080035322, "learning_rate": 9.918394259007458e-06, "loss": 0.5964, "step": 6368 }, { "epoch": 0.52, "grad_norm": 0.9816450220972703, "learning_rate": 9.91576187704502e-06, "loss": 0.5483, "step": 6369 }, { "epoch": 0.52, "grad_norm": 0.9936843969441377, "learning_rate": 9.913129500920214e-06, "loss": 0.5398, "step": 6370 }, { "epoch": 0.52, "grad_norm": 0.8657007233111466, "learning_rate": 9.910497130815454e-06, "loss": 0.4922, "step": 6371 }, { "epoch": 0.52, "grad_norm": 0.8313299803368648, "learning_rate": 9.907864766913162e-06, "loss": 0.5318, "step": 6372 }, { "epoch": 0.52, "grad_norm": 0.9539489317761674, "learning_rate": 9.905232409395764e-06, "loss": 0.5689, "step": 6373 }, { "epoch": 0.52, "grad_norm": 0.8829599504299749, "learning_rate": 9.902600058445676e-06, "loss": 0.5331, "step": 6374 }, { "epoch": 0.52, "grad_norm": 1.0323815487828643, "learning_rate": 9.899967714245313e-06, "loss": 0.6064, "step": 6375 }, { "epoch": 0.52, "grad_norm": 0.8874248857833198, "learning_rate": 9.897335376977104e-06, "loss": 0.5538, "step": 6376 }, { "epoch": 0.52, "grad_norm": 0.9590342091526441, "learning_rate": 9.894703046823461e-06, "loss": 0.571, "step": 6377 }, { "epoch": 0.52, "grad_norm": 0.9608727054645129, "learning_rate": 9.8920707239668e-06, "loss": 0.5436, "step": 6378 }, { "epoch": 0.52, "grad_norm": 0.9303455966981541, "learning_rate": 9.889438408589545e-06, "loss": 0.4842, "step": 6379 }, { "epoch": 0.52, "grad_norm": 0.9910314393804825, "learning_rate": 9.88680610087411e-06, "loss": 0.528, "step": 6380 }, { "epoch": 0.52, "grad_norm": 0.9227435341657937, "learning_rate": 9.884173801002909e-06, "loss": 0.5307, "step": 6381 }, { "epoch": 0.52, "grad_norm": 0.8368401603641216, "learning_rate": 9.881541509158366e-06, "loss": 0.4937, "step": 6382 }, { "epoch": 0.52, "grad_norm": 0.8543858991076968, "learning_rate": 9.878909225522889e-06, "loss": 0.4891, "step": 6383 }, { "epoch": 0.52, "grad_norm": 0.9324945143635585, "learning_rate": 9.876276950278893e-06, "loss": 0.5354, "step": 6384 }, { "epoch": 0.52, "grad_norm": 0.8203979580170194, "learning_rate": 9.873644683608798e-06, "loss": 0.4777, "step": 6385 }, { "epoch": 0.52, "grad_norm": 0.8715151945909194, "learning_rate": 9.87101242569501e-06, "loss": 0.5123, "step": 6386 }, { "epoch": 0.52, "grad_norm": 0.954708756603573, "learning_rate": 9.86838017671995e-06, "loss": 0.5242, "step": 6387 }, { "epoch": 0.52, "grad_norm": 0.9317365832888832, "learning_rate": 9.865747936866027e-06, "loss": 0.5395, "step": 6388 }, { "epoch": 0.52, "grad_norm": 0.8770081131433685, "learning_rate": 9.863115706315652e-06, "loss": 0.5174, "step": 6389 }, { "epoch": 0.52, "grad_norm": 0.8598260750153, "learning_rate": 9.860483485251238e-06, "loss": 0.4591, "step": 6390 }, { "epoch": 0.52, "grad_norm": 0.9755980531605132, "learning_rate": 9.857851273855195e-06, "loss": 0.5801, "step": 6391 }, { "epoch": 0.52, "grad_norm": 1.0087368311921079, "learning_rate": 9.855219072309931e-06, "loss": 0.498, "step": 6392 }, { "epoch": 0.52, "grad_norm": 0.9230860988633534, "learning_rate": 9.852586880797857e-06, "loss": 0.5284, "step": 6393 }, { "epoch": 0.52, "grad_norm": 0.9058880053248296, "learning_rate": 9.849954699501383e-06, "loss": 0.5242, "step": 6394 }, { "epoch": 0.52, "grad_norm": 0.9677734450221844, "learning_rate": 9.847322528602913e-06, "loss": 0.5892, "step": 6395 }, { "epoch": 0.52, "grad_norm": 0.9367373557695239, "learning_rate": 9.844690368284857e-06, "loss": 0.56, "step": 6396 }, { "epoch": 0.52, "grad_norm": 0.878603950726151, "learning_rate": 9.842058218729623e-06, "loss": 0.4691, "step": 6397 }, { "epoch": 0.52, "grad_norm": 0.8616635511939935, "learning_rate": 9.839426080119612e-06, "loss": 0.5389, "step": 6398 }, { "epoch": 0.52, "grad_norm": 0.9986338396517531, "learning_rate": 9.836793952637232e-06, "loss": 0.552, "step": 6399 }, { "epoch": 0.52, "grad_norm": 0.9511148684059743, "learning_rate": 9.834161836464888e-06, "loss": 0.5818, "step": 6400 }, { "epoch": 0.52, "grad_norm": 1.0105896126209348, "learning_rate": 9.831529731784975e-06, "loss": 0.5089, "step": 6401 }, { "epoch": 0.52, "grad_norm": 0.8717280136431198, "learning_rate": 9.828897638779909e-06, "loss": 0.5305, "step": 6402 }, { "epoch": 0.52, "grad_norm": 0.9162669287847024, "learning_rate": 9.826265557632083e-06, "loss": 0.5128, "step": 6403 }, { "epoch": 0.52, "grad_norm": 0.8920982133737825, "learning_rate": 9.823633488523898e-06, "loss": 0.5622, "step": 6404 }, { "epoch": 0.52, "grad_norm": 0.9054418294786118, "learning_rate": 9.821001431637759e-06, "loss": 0.5298, "step": 6405 }, { "epoch": 0.52, "grad_norm": 0.8805665336617836, "learning_rate": 9.81836938715606e-06, "loss": 0.5627, "step": 6406 }, { "epoch": 0.52, "grad_norm": 0.9678033064998812, "learning_rate": 9.815737355261201e-06, "loss": 0.5519, "step": 6407 }, { "epoch": 0.52, "grad_norm": 0.8733129307408266, "learning_rate": 9.813105336135582e-06, "loss": 0.5598, "step": 6408 }, { "epoch": 0.52, "grad_norm": 0.9086165997165361, "learning_rate": 9.810473329961595e-06, "loss": 0.524, "step": 6409 }, { "epoch": 0.52, "grad_norm": 0.9492530342714538, "learning_rate": 9.807841336921639e-06, "loss": 0.4998, "step": 6410 }, { "epoch": 0.52, "grad_norm": 0.8393818681658488, "learning_rate": 9.80520935719811e-06, "loss": 0.544, "step": 6411 }, { "epoch": 0.52, "grad_norm": 0.9381276538883282, "learning_rate": 9.802577390973397e-06, "loss": 0.5205, "step": 6412 }, { "epoch": 0.52, "grad_norm": 0.8978597929624021, "learning_rate": 9.799945438429895e-06, "loss": 0.5191, "step": 6413 }, { "epoch": 0.52, "grad_norm": 0.9130042053056334, "learning_rate": 9.79731349975e-06, "loss": 0.52, "step": 6414 }, { "epoch": 0.52, "grad_norm": 0.909046664883525, "learning_rate": 9.794681575116097e-06, "loss": 0.5285, "step": 6415 }, { "epoch": 0.52, "grad_norm": 0.9534951232598357, "learning_rate": 9.792049664710579e-06, "loss": 0.5247, "step": 6416 }, { "epoch": 0.52, "grad_norm": 0.93802065691835, "learning_rate": 9.789417768715837e-06, "loss": 0.4866, "step": 6417 }, { "epoch": 0.52, "grad_norm": 0.7893135352036715, "learning_rate": 9.786785887314255e-06, "loss": 0.412, "step": 6418 }, { "epoch": 0.52, "grad_norm": 0.9111844420461667, "learning_rate": 9.784154020688222e-06, "loss": 0.5623, "step": 6419 }, { "epoch": 0.52, "grad_norm": 0.9935656482433806, "learning_rate": 9.781522169020125e-06, "loss": 0.5779, "step": 6420 }, { "epoch": 0.52, "grad_norm": 0.8351481116157773, "learning_rate": 9.778890332492346e-06, "loss": 0.4643, "step": 6421 }, { "epoch": 0.52, "grad_norm": 0.9588974823514563, "learning_rate": 9.776258511287271e-06, "loss": 0.5176, "step": 6422 }, { "epoch": 0.52, "grad_norm": 0.828264983941755, "learning_rate": 9.773626705587283e-06, "loss": 0.4941, "step": 6423 }, { "epoch": 0.52, "grad_norm": 0.9097675795507923, "learning_rate": 9.770994915574766e-06, "loss": 0.5653, "step": 6424 }, { "epoch": 0.52, "grad_norm": 0.8263116395093373, "learning_rate": 9.768363141432095e-06, "loss": 0.5402, "step": 6425 }, { "epoch": 0.52, "grad_norm": 0.8329323971338786, "learning_rate": 9.765731383341654e-06, "loss": 0.4887, "step": 6426 }, { "epoch": 0.52, "grad_norm": 0.9319485658181705, "learning_rate": 9.76309964148582e-06, "loss": 0.5301, "step": 6427 }, { "epoch": 0.52, "grad_norm": 0.9109510386316052, "learning_rate": 9.760467916046971e-06, "loss": 0.5702, "step": 6428 }, { "epoch": 0.52, "grad_norm": 0.9192063800704567, "learning_rate": 9.757836207207483e-06, "loss": 0.5517, "step": 6429 }, { "epoch": 0.52, "grad_norm": 0.8938122304610931, "learning_rate": 9.755204515149731e-06, "loss": 0.5129, "step": 6430 }, { "epoch": 0.52, "grad_norm": 0.8885073683990292, "learning_rate": 9.75257284005609e-06, "loss": 0.4888, "step": 6431 }, { "epoch": 0.52, "grad_norm": 0.9694255751986594, "learning_rate": 9.74994118210893e-06, "loss": 0.5145, "step": 6432 }, { "epoch": 0.52, "grad_norm": 0.9905287843588529, "learning_rate": 9.747309541490627e-06, "loss": 0.5592, "step": 6433 }, { "epoch": 0.52, "grad_norm": 0.9468510667397407, "learning_rate": 9.744677918383546e-06, "loss": 0.4493, "step": 6434 }, { "epoch": 0.52, "grad_norm": 0.8159532262872364, "learning_rate": 9.742046312970058e-06, "loss": 0.4615, "step": 6435 }, { "epoch": 0.52, "grad_norm": 0.9604668106872334, "learning_rate": 9.739414725432535e-06, "loss": 0.5214, "step": 6436 }, { "epoch": 0.52, "grad_norm": 0.9556439331531814, "learning_rate": 9.736783155953338e-06, "loss": 0.5053, "step": 6437 }, { "epoch": 0.52, "grad_norm": 0.916486932919353, "learning_rate": 9.734151604714834e-06, "loss": 0.5413, "step": 6438 }, { "epoch": 0.52, "grad_norm": 0.8681676499631316, "learning_rate": 9.73152007189939e-06, "loss": 0.4963, "step": 6439 }, { "epoch": 0.52, "grad_norm": 0.8529159346600007, "learning_rate": 9.728888557689364e-06, "loss": 0.5299, "step": 6440 }, { "epoch": 0.52, "grad_norm": 0.8944587393736042, "learning_rate": 9.726257062267124e-06, "loss": 0.523, "step": 6441 }, { "epoch": 0.52, "grad_norm": 0.8535702354962332, "learning_rate": 9.723625585815028e-06, "loss": 0.5471, "step": 6442 }, { "epoch": 0.52, "grad_norm": 0.9894346512705576, "learning_rate": 9.720994128515428e-06, "loss": 0.5799, "step": 6443 }, { "epoch": 0.52, "grad_norm": 0.8912292132663773, "learning_rate": 9.718362690550693e-06, "loss": 0.5419, "step": 6444 }, { "epoch": 0.52, "grad_norm": 0.8431590559324045, "learning_rate": 9.715731272103172e-06, "loss": 0.5395, "step": 6445 }, { "epoch": 0.52, "grad_norm": 0.8296842798941493, "learning_rate": 9.713099873355219e-06, "loss": 0.5027, "step": 6446 }, { "epoch": 0.52, "grad_norm": 1.049682332775139, "learning_rate": 9.710468494489194e-06, "loss": 0.5357, "step": 6447 }, { "epoch": 0.52, "grad_norm": 0.9346713890089724, "learning_rate": 9.707837135687444e-06, "loss": 0.5126, "step": 6448 }, { "epoch": 0.52, "grad_norm": 0.9759963987028532, "learning_rate": 9.705205797132319e-06, "loss": 0.5742, "step": 6449 }, { "epoch": 0.52, "grad_norm": 0.878814498064152, "learning_rate": 9.702574479006174e-06, "loss": 0.5266, "step": 6450 }, { "epoch": 0.52, "grad_norm": 0.8991362672949392, "learning_rate": 9.699943181491355e-06, "loss": 0.4653, "step": 6451 }, { "epoch": 0.52, "grad_norm": 0.8413370445817436, "learning_rate": 9.697311904770202e-06, "loss": 0.469, "step": 6452 }, { "epoch": 0.52, "grad_norm": 0.8689595579603493, "learning_rate": 9.69468064902507e-06, "loss": 0.5227, "step": 6453 }, { "epoch": 0.52, "grad_norm": 0.9697152793274977, "learning_rate": 9.692049414438298e-06, "loss": 0.5844, "step": 6454 }, { "epoch": 0.52, "grad_norm": 0.8949486024463249, "learning_rate": 9.689418201192226e-06, "loss": 0.5068, "step": 6455 }, { "epoch": 0.52, "grad_norm": 0.9773141105296431, "learning_rate": 9.6867870094692e-06, "loss": 0.5538, "step": 6456 }, { "epoch": 0.52, "grad_norm": 0.857328825009141, "learning_rate": 9.684155839451555e-06, "loss": 0.4758, "step": 6457 }, { "epoch": 0.52, "grad_norm": 0.9429415025858808, "learning_rate": 9.681524691321628e-06, "loss": 0.6401, "step": 6458 }, { "epoch": 0.52, "grad_norm": 1.0145411858316762, "learning_rate": 9.678893565261761e-06, "loss": 0.5969, "step": 6459 }, { "epoch": 0.53, "grad_norm": 0.9661312013835944, "learning_rate": 9.676262461454285e-06, "loss": 0.5137, "step": 6460 }, { "epoch": 0.53, "grad_norm": 0.9337415348932296, "learning_rate": 9.67363138008153e-06, "loss": 0.5644, "step": 6461 }, { "epoch": 0.53, "grad_norm": 0.8901844897600891, "learning_rate": 9.671000321325832e-06, "loss": 0.6057, "step": 6462 }, { "epoch": 0.53, "grad_norm": 0.9016225379646999, "learning_rate": 9.668369285369524e-06, "loss": 0.498, "step": 6463 }, { "epoch": 0.53, "grad_norm": 0.9297387666303769, "learning_rate": 9.665738272394924e-06, "loss": 0.5294, "step": 6464 }, { "epoch": 0.53, "grad_norm": 0.9734669398659537, "learning_rate": 9.66310728258437e-06, "loss": 0.5146, "step": 6465 }, { "epoch": 0.53, "grad_norm": 0.9545143524091929, "learning_rate": 9.660476316120181e-06, "loss": 0.5714, "step": 6466 }, { "epoch": 0.53, "grad_norm": 0.8922757284332623, "learning_rate": 9.65784537318468e-06, "loss": 0.542, "step": 6467 }, { "epoch": 0.53, "grad_norm": 0.8573370579926729, "learning_rate": 9.655214453960195e-06, "loss": 0.4861, "step": 6468 }, { "epoch": 0.53, "grad_norm": 0.8977961285324865, "learning_rate": 9.652583558629042e-06, "loss": 0.5144, "step": 6469 }, { "epoch": 0.53, "grad_norm": 0.9579689428849033, "learning_rate": 9.649952687373535e-06, "loss": 0.5472, "step": 6470 }, { "epoch": 0.53, "grad_norm": 0.8805879235281294, "learning_rate": 9.647321840376001e-06, "loss": 0.5174, "step": 6471 }, { "epoch": 0.53, "grad_norm": 1.051855667302005, "learning_rate": 9.644691017818752e-06, "loss": 0.5926, "step": 6472 }, { "epoch": 0.53, "grad_norm": 0.8989815009031404, "learning_rate": 9.642060219884096e-06, "loss": 0.4875, "step": 6473 }, { "epoch": 0.53, "grad_norm": 0.9211094147866692, "learning_rate": 9.639429446754352e-06, "loss": 0.5151, "step": 6474 }, { "epoch": 0.53, "grad_norm": 0.9399000526890481, "learning_rate": 9.636798698611828e-06, "loss": 0.554, "step": 6475 }, { "epoch": 0.53, "grad_norm": 0.8830150022783372, "learning_rate": 9.634167975638828e-06, "loss": 0.5276, "step": 6476 }, { "epoch": 0.53, "grad_norm": 0.858616695781131, "learning_rate": 9.631537278017667e-06, "loss": 0.4538, "step": 6477 }, { "epoch": 0.53, "grad_norm": 0.9342039254589489, "learning_rate": 9.628906605930647e-06, "loss": 0.5417, "step": 6478 }, { "epoch": 0.53, "grad_norm": 0.8611122539892918, "learning_rate": 9.626275959560064e-06, "loss": 0.536, "step": 6479 }, { "epoch": 0.53, "grad_norm": 0.857609366638501, "learning_rate": 9.62364533908823e-06, "loss": 0.489, "step": 6480 }, { "epoch": 0.53, "grad_norm": 0.825074690220345, "learning_rate": 9.621014744697442e-06, "loss": 0.4701, "step": 6481 }, { "epoch": 0.53, "grad_norm": 0.934085548351372, "learning_rate": 9.61838417656999e-06, "loss": 0.4784, "step": 6482 }, { "epoch": 0.53, "grad_norm": 0.9527486975769726, "learning_rate": 9.615753634888179e-06, "loss": 0.5408, "step": 6483 }, { "epoch": 0.53, "grad_norm": 0.84672859944973, "learning_rate": 9.613123119834304e-06, "loss": 0.5066, "step": 6484 }, { "epoch": 0.53, "grad_norm": 0.8997458292305717, "learning_rate": 9.610492631590646e-06, "loss": 0.5416, "step": 6485 }, { "epoch": 0.53, "grad_norm": 0.9626727248227898, "learning_rate": 9.60786217033951e-06, "loss": 0.5657, "step": 6486 }, { "epoch": 0.53, "grad_norm": 0.764459498528756, "learning_rate": 9.605231736263176e-06, "loss": 0.4537, "step": 6487 }, { "epoch": 0.53, "grad_norm": 0.9296132740721262, "learning_rate": 9.602601329543928e-06, "loss": 0.4775, "step": 6488 }, { "epoch": 0.53, "grad_norm": 0.8278080916393366, "learning_rate": 9.599970950364061e-06, "loss": 0.4814, "step": 6489 }, { "epoch": 0.53, "grad_norm": 0.9615444489092069, "learning_rate": 9.597340598905851e-06, "loss": 0.591, "step": 6490 }, { "epoch": 0.53, "grad_norm": 0.9166588125726522, "learning_rate": 9.594710275351577e-06, "loss": 0.5656, "step": 6491 }, { "epoch": 0.53, "grad_norm": 0.9398183643638113, "learning_rate": 9.592079979883526e-06, "loss": 0.5495, "step": 6492 }, { "epoch": 0.53, "grad_norm": 0.9101662724536649, "learning_rate": 9.58944971268397e-06, "loss": 0.575, "step": 6493 }, { "epoch": 0.53, "grad_norm": 0.9027671688552364, "learning_rate": 9.586819473935181e-06, "loss": 0.5345, "step": 6494 }, { "epoch": 0.53, "grad_norm": 0.9741851024488997, "learning_rate": 9.58418926381944e-06, "loss": 0.6057, "step": 6495 }, { "epoch": 0.53, "grad_norm": 0.8528086376805016, "learning_rate": 9.581559082519015e-06, "loss": 0.4635, "step": 6496 }, { "epoch": 0.53, "grad_norm": 0.9158825951760049, "learning_rate": 9.578928930216167e-06, "loss": 0.5513, "step": 6497 }, { "epoch": 0.53, "grad_norm": 0.9498083322382603, "learning_rate": 9.576298807093177e-06, "loss": 0.5602, "step": 6498 }, { "epoch": 0.53, "grad_norm": 0.8378768819215866, "learning_rate": 9.573668713332305e-06, "loss": 0.4934, "step": 6499 }, { "epoch": 0.53, "grad_norm": 0.849447929846769, "learning_rate": 9.571038649115807e-06, "loss": 0.5089, "step": 6500 }, { "epoch": 0.53, "grad_norm": 0.9094774639747515, "learning_rate": 9.568408614625956e-06, "loss": 0.5415, "step": 6501 }, { "epoch": 0.53, "grad_norm": 0.8683994871193035, "learning_rate": 9.565778610045003e-06, "loss": 0.4996, "step": 6502 }, { "epoch": 0.53, "grad_norm": 0.8416512392944917, "learning_rate": 9.563148635555205e-06, "loss": 0.4769, "step": 6503 }, { "epoch": 0.53, "grad_norm": 0.8417091367157625, "learning_rate": 9.560518691338822e-06, "loss": 0.5091, "step": 6504 }, { "epoch": 0.53, "grad_norm": 0.9307177969399728, "learning_rate": 9.557888777578105e-06, "loss": 0.5736, "step": 6505 }, { "epoch": 0.53, "grad_norm": 0.9148668617918116, "learning_rate": 9.555258894455298e-06, "loss": 0.4671, "step": 6506 }, { "epoch": 0.53, "grad_norm": 0.933610510661884, "learning_rate": 9.55262904215266e-06, "loss": 0.5215, "step": 6507 }, { "epoch": 0.53, "grad_norm": 0.8493681051755907, "learning_rate": 9.549999220852432e-06, "loss": 0.4692, "step": 6508 }, { "epoch": 0.53, "grad_norm": 0.9008032770859512, "learning_rate": 9.547369430736857e-06, "loss": 0.5602, "step": 6509 }, { "epoch": 0.53, "grad_norm": 0.8313031359121507, "learning_rate": 9.54473967198818e-06, "loss": 0.4551, "step": 6510 }, { "epoch": 0.53, "grad_norm": 2.0024548477030657, "learning_rate": 9.542109944788643e-06, "loss": 0.4755, "step": 6511 }, { "epoch": 0.53, "grad_norm": 0.9424448090376205, "learning_rate": 9.539480249320473e-06, "loss": 0.5853, "step": 6512 }, { "epoch": 0.53, "grad_norm": 0.8858742800185914, "learning_rate": 9.53685058576592e-06, "loss": 0.5339, "step": 6513 }, { "epoch": 0.53, "grad_norm": 0.9352935076731403, "learning_rate": 9.53422095430721e-06, "loss": 0.5547, "step": 6514 }, { "epoch": 0.53, "grad_norm": 1.0227461291170266, "learning_rate": 9.53159135512657e-06, "loss": 0.5612, "step": 6515 }, { "epoch": 0.53, "grad_norm": 0.9646149787645724, "learning_rate": 9.528961788406237e-06, "loss": 0.5668, "step": 6516 }, { "epoch": 0.53, "grad_norm": 0.9089296205705552, "learning_rate": 9.526332254328437e-06, "loss": 0.5376, "step": 6517 }, { "epoch": 0.53, "grad_norm": 0.8838124970501346, "learning_rate": 9.523702753075386e-06, "loss": 0.5065, "step": 6518 }, { "epoch": 0.53, "grad_norm": 0.9206866517683869, "learning_rate": 9.521073284829315e-06, "loss": 0.4834, "step": 6519 }, { "epoch": 0.53, "grad_norm": 0.912784923782488, "learning_rate": 9.518443849772441e-06, "loss": 0.512, "step": 6520 }, { "epoch": 0.53, "grad_norm": 0.9058344101233732, "learning_rate": 9.515814448086978e-06, "loss": 0.5461, "step": 6521 }, { "epoch": 0.53, "grad_norm": 0.8793164453057128, "learning_rate": 9.513185079955148e-06, "loss": 0.4845, "step": 6522 }, { "epoch": 0.53, "grad_norm": 0.8984635639989134, "learning_rate": 9.51055574555916e-06, "loss": 0.5754, "step": 6523 }, { "epoch": 0.53, "grad_norm": 0.9293547040387503, "learning_rate": 9.50792644508122e-06, "loss": 0.5819, "step": 6524 }, { "epoch": 0.53, "grad_norm": 0.784032342081321, "learning_rate": 9.505297178703546e-06, "loss": 0.5305, "step": 6525 }, { "epoch": 0.53, "grad_norm": 0.883478994718842, "learning_rate": 9.502667946608332e-06, "loss": 0.556, "step": 6526 }, { "epoch": 0.53, "grad_norm": 0.8416266967465563, "learning_rate": 9.500038748977794e-06, "loss": 0.5466, "step": 6527 }, { "epoch": 0.53, "grad_norm": 1.0312877433341645, "learning_rate": 9.497409585994128e-06, "loss": 0.5922, "step": 6528 }, { "epoch": 0.53, "grad_norm": 0.9451565220427494, "learning_rate": 9.494780457839527e-06, "loss": 0.4781, "step": 6529 }, { "epoch": 0.53, "grad_norm": 0.9527771032047712, "learning_rate": 9.492151364696196e-06, "loss": 0.5277, "step": 6530 }, { "epoch": 0.53, "grad_norm": 0.8612304851836948, "learning_rate": 9.489522306746327e-06, "loss": 0.487, "step": 6531 }, { "epoch": 0.53, "grad_norm": 0.8498362511354313, "learning_rate": 9.486893284172103e-06, "loss": 0.5537, "step": 6532 }, { "epoch": 0.53, "grad_norm": 0.9141501671369846, "learning_rate": 9.484264297155724e-06, "loss": 0.521, "step": 6533 }, { "epoch": 0.53, "grad_norm": 0.8471454028075913, "learning_rate": 9.481635345879373e-06, "loss": 0.5362, "step": 6534 }, { "epoch": 0.53, "grad_norm": 0.8502542534921609, "learning_rate": 9.479006430525227e-06, "loss": 0.5177, "step": 6535 }, { "epoch": 0.53, "grad_norm": 0.8558504290560964, "learning_rate": 9.476377551275478e-06, "loss": 0.5179, "step": 6536 }, { "epoch": 0.53, "grad_norm": 0.9149921933622068, "learning_rate": 9.4737487083123e-06, "loss": 0.4804, "step": 6537 }, { "epoch": 0.53, "grad_norm": 0.8624426856664629, "learning_rate": 9.471119901817866e-06, "loss": 0.5192, "step": 6538 }, { "epoch": 0.53, "grad_norm": 0.9475697368614927, "learning_rate": 9.468491131974358e-06, "loss": 0.5907, "step": 6539 }, { "epoch": 0.53, "grad_norm": 0.8597657353172048, "learning_rate": 9.465862398963943e-06, "loss": 0.4512, "step": 6540 }, { "epoch": 0.53, "grad_norm": 0.791808646314481, "learning_rate": 9.463233702968784e-06, "loss": 0.4951, "step": 6541 }, { "epoch": 0.53, "grad_norm": 0.9190383831550103, "learning_rate": 9.46060504417106e-06, "loss": 0.5751, "step": 6542 }, { "epoch": 0.53, "grad_norm": 0.9219461465709242, "learning_rate": 9.457976422752925e-06, "loss": 0.5326, "step": 6543 }, { "epoch": 0.53, "grad_norm": 0.8887657703238295, "learning_rate": 9.455347838896541e-06, "loss": 0.4661, "step": 6544 }, { "epoch": 0.53, "grad_norm": 0.8733624243772576, "learning_rate": 9.452719292784074e-06, "loss": 0.5464, "step": 6545 }, { "epoch": 0.53, "grad_norm": 0.9112115008584419, "learning_rate": 9.450090784597673e-06, "loss": 0.5647, "step": 6546 }, { "epoch": 0.53, "grad_norm": 1.7311921806377226, "learning_rate": 9.44746231451949e-06, "loss": 0.5059, "step": 6547 }, { "epoch": 0.53, "grad_norm": 0.8463909442796727, "learning_rate": 9.444833882731681e-06, "loss": 0.5476, "step": 6548 }, { "epoch": 0.53, "grad_norm": 0.9461525435875031, "learning_rate": 9.442205489416392e-06, "loss": 0.5637, "step": 6549 }, { "epoch": 0.53, "grad_norm": 0.8698853022532326, "learning_rate": 9.439577134755763e-06, "loss": 0.476, "step": 6550 }, { "epoch": 0.53, "grad_norm": 0.8221458505598757, "learning_rate": 9.436948818931947e-06, "loss": 0.4741, "step": 6551 }, { "epoch": 0.53, "grad_norm": 0.8672724748753476, "learning_rate": 9.434320542127075e-06, "loss": 0.5206, "step": 6552 }, { "epoch": 0.53, "grad_norm": 0.9586388160464261, "learning_rate": 9.43169230452329e-06, "loss": 0.5331, "step": 6553 }, { "epoch": 0.53, "grad_norm": 0.9699403966085434, "learning_rate": 9.429064106302724e-06, "loss": 0.5506, "step": 6554 }, { "epoch": 0.53, "grad_norm": 0.9896460820121302, "learning_rate": 9.426435947647508e-06, "loss": 0.5081, "step": 6555 }, { "epoch": 0.53, "grad_norm": 0.8609140681024577, "learning_rate": 9.42380782873977e-06, "loss": 0.4972, "step": 6556 }, { "epoch": 0.53, "grad_norm": 0.9419865663383613, "learning_rate": 9.421179749761643e-06, "loss": 0.5552, "step": 6557 }, { "epoch": 0.53, "grad_norm": 0.8792446463968906, "learning_rate": 9.418551710895243e-06, "loss": 0.4424, "step": 6558 }, { "epoch": 0.53, "grad_norm": 0.9401380367951486, "learning_rate": 9.415923712322693e-06, "loss": 0.4913, "step": 6559 }, { "epoch": 0.53, "grad_norm": 0.9564624625255295, "learning_rate": 9.413295754226115e-06, "loss": 0.4873, "step": 6560 }, { "epoch": 0.53, "grad_norm": 0.9038480801805764, "learning_rate": 9.410667836787619e-06, "loss": 0.5728, "step": 6561 }, { "epoch": 0.53, "grad_norm": 0.8070238910164431, "learning_rate": 9.408039960189317e-06, "loss": 0.4674, "step": 6562 }, { "epoch": 0.53, "grad_norm": 0.9804935597251521, "learning_rate": 9.405412124613325e-06, "loss": 0.5187, "step": 6563 }, { "epoch": 0.53, "grad_norm": 0.8857730008887388, "learning_rate": 9.402784330241743e-06, "loss": 0.5002, "step": 6564 }, { "epoch": 0.53, "grad_norm": 0.8739355689761309, "learning_rate": 9.400156577256675e-06, "loss": 0.5278, "step": 6565 }, { "epoch": 0.53, "grad_norm": 0.9579620085111423, "learning_rate": 9.397528865840229e-06, "loss": 0.5324, "step": 6566 }, { "epoch": 0.53, "grad_norm": 0.8396241787978719, "learning_rate": 9.394901196174496e-06, "loss": 0.5279, "step": 6567 }, { "epoch": 0.53, "grad_norm": 0.9116099647277938, "learning_rate": 9.39227356844157e-06, "loss": 0.4807, "step": 6568 }, { "epoch": 0.53, "grad_norm": 0.9085824599768635, "learning_rate": 9.389645982823552e-06, "loss": 0.5559, "step": 6569 }, { "epoch": 0.53, "grad_norm": 0.946189269407832, "learning_rate": 9.387018439502524e-06, "loss": 0.5012, "step": 6570 }, { "epoch": 0.53, "grad_norm": 0.8718190611058443, "learning_rate": 9.384390938660572e-06, "loss": 0.5061, "step": 6571 }, { "epoch": 0.53, "grad_norm": 0.8547334865321193, "learning_rate": 9.381763480479784e-06, "loss": 0.5166, "step": 6572 }, { "epoch": 0.53, "grad_norm": 1.0652378374846714, "learning_rate": 9.379136065142241e-06, "loss": 0.5806, "step": 6573 }, { "epoch": 0.53, "grad_norm": 0.8143736521167203, "learning_rate": 9.376508692830012e-06, "loss": 0.5379, "step": 6574 }, { "epoch": 0.53, "grad_norm": 0.9218503896969388, "learning_rate": 9.373881363725182e-06, "loss": 0.5158, "step": 6575 }, { "epoch": 0.53, "grad_norm": 0.9985452463714473, "learning_rate": 9.371254078009819e-06, "loss": 0.5931, "step": 6576 }, { "epoch": 0.53, "grad_norm": 0.9356941082925173, "learning_rate": 9.368626835865987e-06, "loss": 0.4867, "step": 6577 }, { "epoch": 0.53, "grad_norm": 0.9597854538853388, "learning_rate": 9.365999637475756e-06, "loss": 0.609, "step": 6578 }, { "epoch": 0.53, "grad_norm": 0.8403719685382467, "learning_rate": 9.363372483021191e-06, "loss": 0.4493, "step": 6579 }, { "epoch": 0.53, "grad_norm": 0.989244981681324, "learning_rate": 9.360745372684346e-06, "loss": 0.6439, "step": 6580 }, { "epoch": 0.53, "grad_norm": 0.9416956127576758, "learning_rate": 9.358118306647278e-06, "loss": 0.5829, "step": 6581 }, { "epoch": 0.53, "grad_norm": 0.9294458423197391, "learning_rate": 9.355491285092045e-06, "loss": 0.597, "step": 6582 }, { "epoch": 0.54, "grad_norm": 0.8599793380974954, "learning_rate": 9.352864308200693e-06, "loss": 0.5003, "step": 6583 }, { "epoch": 0.54, "grad_norm": 0.8562934086264922, "learning_rate": 9.350237376155269e-06, "loss": 0.4935, "step": 6584 }, { "epoch": 0.54, "grad_norm": 0.9499034838164225, "learning_rate": 9.347610489137821e-06, "loss": 0.5657, "step": 6585 }, { "epoch": 0.54, "grad_norm": 0.9886652459642102, "learning_rate": 9.344983647330386e-06, "loss": 0.4546, "step": 6586 }, { "epoch": 0.54, "grad_norm": 0.8408184507746581, "learning_rate": 9.342356850915003e-06, "loss": 0.4653, "step": 6587 }, { "epoch": 0.54, "grad_norm": 0.9170817507583063, "learning_rate": 9.339730100073709e-06, "loss": 0.5481, "step": 6588 }, { "epoch": 0.54, "grad_norm": 0.8354998641142983, "learning_rate": 9.33710339498853e-06, "loss": 0.4542, "step": 6589 }, { "epoch": 0.54, "grad_norm": 0.8346026834004256, "learning_rate": 9.3344767358415e-06, "loss": 0.4872, "step": 6590 }, { "epoch": 0.54, "grad_norm": 0.9166805271898558, "learning_rate": 9.331850122814644e-06, "loss": 0.5649, "step": 6591 }, { "epoch": 0.54, "grad_norm": 0.9016451210187113, "learning_rate": 9.329223556089976e-06, "loss": 0.5336, "step": 6592 }, { "epoch": 0.54, "grad_norm": 0.8541527908291205, "learning_rate": 9.326597035849524e-06, "loss": 0.514, "step": 6593 }, { "epoch": 0.54, "grad_norm": 0.8920893747093328, "learning_rate": 9.323970562275302e-06, "loss": 0.5288, "step": 6594 }, { "epoch": 0.54, "grad_norm": 0.887453410935998, "learning_rate": 9.321344135549316e-06, "loss": 0.5056, "step": 6595 }, { "epoch": 0.54, "grad_norm": 0.8794368276693035, "learning_rate": 9.318717755853583e-06, "loss": 0.4963, "step": 6596 }, { "epoch": 0.54, "grad_norm": 0.8652655941315313, "learning_rate": 9.316091423370105e-06, "loss": 0.5329, "step": 6597 }, { "epoch": 0.54, "grad_norm": 0.9208644302407073, "learning_rate": 9.313465138280882e-06, "loss": 0.5108, "step": 6598 }, { "epoch": 0.54, "grad_norm": 0.963722543617233, "learning_rate": 9.31083890076792e-06, "loss": 0.575, "step": 6599 }, { "epoch": 0.54, "grad_norm": 0.9541286094340493, "learning_rate": 9.30821271101321e-06, "loss": 0.5455, "step": 6600 }, { "epoch": 0.54, "grad_norm": 1.0065486795180587, "learning_rate": 9.305586569198742e-06, "loss": 0.5707, "step": 6601 }, { "epoch": 0.54, "grad_norm": 0.9259839776751105, "learning_rate": 9.30296047550651e-06, "loss": 0.4889, "step": 6602 }, { "epoch": 0.54, "grad_norm": 0.9726764156210098, "learning_rate": 9.300334430118504e-06, "loss": 0.5617, "step": 6603 }, { "epoch": 0.54, "grad_norm": 0.884838643280822, "learning_rate": 9.297708433216693e-06, "loss": 0.503, "step": 6604 }, { "epoch": 0.54, "grad_norm": 0.8280384803844053, "learning_rate": 9.29508248498307e-06, "loss": 0.4996, "step": 6605 }, { "epoch": 0.54, "grad_norm": 0.8906426897381897, "learning_rate": 9.292456585599607e-06, "loss": 0.5115, "step": 6606 }, { "epoch": 0.54, "grad_norm": 0.996972130518505, "learning_rate": 9.289830735248269e-06, "loss": 0.5234, "step": 6607 }, { "epoch": 0.54, "grad_norm": 0.8291707987913881, "learning_rate": 9.287204934111035e-06, "loss": 0.5578, "step": 6608 }, { "epoch": 0.54, "grad_norm": 0.915568083458182, "learning_rate": 9.284579182369868e-06, "loss": 0.5419, "step": 6609 }, { "epoch": 0.54, "grad_norm": 0.9954268850193402, "learning_rate": 9.281953480206725e-06, "loss": 0.522, "step": 6610 }, { "epoch": 0.54, "grad_norm": 0.9047193985797033, "learning_rate": 9.279327827803573e-06, "loss": 0.4784, "step": 6611 }, { "epoch": 0.54, "grad_norm": 0.8701895692193299, "learning_rate": 9.276702225342363e-06, "loss": 0.4848, "step": 6612 }, { "epoch": 0.54, "grad_norm": 0.9950557491152223, "learning_rate": 9.274076673005042e-06, "loss": 0.5792, "step": 6613 }, { "epoch": 0.54, "grad_norm": 0.9822586130445328, "learning_rate": 9.271451170973568e-06, "loss": 0.4991, "step": 6614 }, { "epoch": 0.54, "grad_norm": 0.9107882853370662, "learning_rate": 9.268825719429884e-06, "loss": 0.5307, "step": 6615 }, { "epoch": 0.54, "grad_norm": 0.9603488113009204, "learning_rate": 9.266200318555923e-06, "loss": 0.5128, "step": 6616 }, { "epoch": 0.54, "grad_norm": 0.9931853248124778, "learning_rate": 9.263574968533635e-06, "loss": 0.5775, "step": 6617 }, { "epoch": 0.54, "grad_norm": 0.9562773602607122, "learning_rate": 9.260949669544946e-06, "loss": 0.4898, "step": 6618 }, { "epoch": 0.54, "grad_norm": 0.9262129155659268, "learning_rate": 9.258324421771785e-06, "loss": 0.5486, "step": 6619 }, { "epoch": 0.54, "grad_norm": 0.9512134810062653, "learning_rate": 9.255699225396091e-06, "loss": 0.5639, "step": 6620 }, { "epoch": 0.54, "grad_norm": 0.8827005731153343, "learning_rate": 9.25307408059978e-06, "loss": 0.5139, "step": 6621 }, { "epoch": 0.54, "grad_norm": 0.8965142343662935, "learning_rate": 9.250448987564765e-06, "loss": 0.5929, "step": 6622 }, { "epoch": 0.54, "grad_norm": 0.9594934835087743, "learning_rate": 9.247823946472978e-06, "loss": 0.5294, "step": 6623 }, { "epoch": 0.54, "grad_norm": 0.9948381063542425, "learning_rate": 9.245198957506324e-06, "loss": 0.5583, "step": 6624 }, { "epoch": 0.54, "grad_norm": 0.9747739230563899, "learning_rate": 9.242574020846706e-06, "loss": 0.596, "step": 6625 }, { "epoch": 0.54, "grad_norm": 0.9866782142190802, "learning_rate": 9.239949136676042e-06, "loss": 0.5165, "step": 6626 }, { "epoch": 0.54, "grad_norm": 0.836617718413994, "learning_rate": 9.23732430517623e-06, "loss": 0.4931, "step": 6627 }, { "epoch": 0.54, "grad_norm": 0.9969240754051573, "learning_rate": 9.23469952652916e-06, "loss": 0.5709, "step": 6628 }, { "epoch": 0.54, "grad_norm": 1.084925424323057, "learning_rate": 9.232074800916741e-06, "loss": 0.5608, "step": 6629 }, { "epoch": 0.54, "grad_norm": 0.8014165391119704, "learning_rate": 9.229450128520856e-06, "loss": 0.4672, "step": 6630 }, { "epoch": 0.54, "grad_norm": 0.9275634877793757, "learning_rate": 9.22682550952339e-06, "loss": 0.5796, "step": 6631 }, { "epoch": 0.54, "grad_norm": 0.9943343152879219, "learning_rate": 9.224200944106234e-06, "loss": 0.5677, "step": 6632 }, { "epoch": 0.54, "grad_norm": 0.875222035331371, "learning_rate": 9.221576432451266e-06, "loss": 0.5456, "step": 6633 }, { "epoch": 0.54, "grad_norm": 0.7647523024721653, "learning_rate": 9.218951974740354e-06, "loss": 0.4952, "step": 6634 }, { "epoch": 0.54, "grad_norm": 0.8870884362180773, "learning_rate": 9.216327571155384e-06, "loss": 0.4923, "step": 6635 }, { "epoch": 0.54, "grad_norm": 0.8953762772911025, "learning_rate": 9.213703221878217e-06, "loss": 0.5387, "step": 6636 }, { "epoch": 0.54, "grad_norm": 0.7947655687162931, "learning_rate": 9.211078927090714e-06, "loss": 0.536, "step": 6637 }, { "epoch": 0.54, "grad_norm": 0.9651351409257324, "learning_rate": 9.208454686974748e-06, "loss": 0.4714, "step": 6638 }, { "epoch": 0.54, "grad_norm": 0.9095238031554407, "learning_rate": 9.205830501712168e-06, "loss": 0.5529, "step": 6639 }, { "epoch": 0.54, "grad_norm": 0.8954728476730376, "learning_rate": 9.203206371484827e-06, "loss": 0.4868, "step": 6640 }, { "epoch": 0.54, "grad_norm": 0.9236020847975327, "learning_rate": 9.200582296474581e-06, "loss": 0.5553, "step": 6641 }, { "epoch": 0.54, "grad_norm": 0.973573370807308, "learning_rate": 9.197958276863274e-06, "loss": 0.6337, "step": 6642 }, { "epoch": 0.54, "grad_norm": 0.8218184879634992, "learning_rate": 9.195334312832742e-06, "loss": 0.5422, "step": 6643 }, { "epoch": 0.54, "grad_norm": 0.8703081984309136, "learning_rate": 9.192710404564833e-06, "loss": 0.5375, "step": 6644 }, { "epoch": 0.54, "grad_norm": 0.868266999358897, "learning_rate": 9.190086552241375e-06, "loss": 0.5186, "step": 6645 }, { "epoch": 0.54, "grad_norm": 0.9387964986316296, "learning_rate": 9.187462756044198e-06, "loss": 0.5584, "step": 6646 }, { "epoch": 0.54, "grad_norm": 0.9318884814426076, "learning_rate": 9.184839016155136e-06, "loss": 0.5393, "step": 6647 }, { "epoch": 0.54, "grad_norm": 0.894118676292748, "learning_rate": 9.182215332756003e-06, "loss": 0.5073, "step": 6648 }, { "epoch": 0.54, "grad_norm": 0.9782985369401567, "learning_rate": 9.179591706028626e-06, "loss": 0.5251, "step": 6649 }, { "epoch": 0.54, "grad_norm": 0.9914198352405411, "learning_rate": 9.176968136154815e-06, "loss": 0.5461, "step": 6650 }, { "epoch": 0.54, "grad_norm": 0.8639055680751122, "learning_rate": 9.174344623316377e-06, "loss": 0.5449, "step": 6651 }, { "epoch": 0.54, "grad_norm": 0.9484262400017193, "learning_rate": 9.171721167695132e-06, "loss": 0.4951, "step": 6652 }, { "epoch": 0.54, "grad_norm": 0.897854575462643, "learning_rate": 9.169097769472873e-06, "loss": 0.5389, "step": 6653 }, { "epoch": 0.54, "grad_norm": 0.9094567346832353, "learning_rate": 9.166474428831399e-06, "loss": 0.5056, "step": 6654 }, { "epoch": 0.54, "grad_norm": 0.8990709731930566, "learning_rate": 9.16385114595251e-06, "loss": 0.4614, "step": 6655 }, { "epoch": 0.54, "grad_norm": 0.8562388383715867, "learning_rate": 9.161227921017996e-06, "loss": 0.5157, "step": 6656 }, { "epoch": 0.54, "grad_norm": 0.8713141936842353, "learning_rate": 9.158604754209637e-06, "loss": 0.4757, "step": 6657 }, { "epoch": 0.54, "grad_norm": 0.8443143174087063, "learning_rate": 9.15598164570923e-06, "loss": 0.5069, "step": 6658 }, { "epoch": 0.54, "grad_norm": 0.92136601553196, "learning_rate": 9.153358595698542e-06, "loss": 0.5045, "step": 6659 }, { "epoch": 0.54, "grad_norm": 0.827326202667017, "learning_rate": 9.15073560435935e-06, "loss": 0.5152, "step": 6660 }, { "epoch": 0.54, "grad_norm": 0.9489168131505338, "learning_rate": 9.148112671873433e-06, "loss": 0.5234, "step": 6661 }, { "epoch": 0.54, "grad_norm": 0.9148988968297469, "learning_rate": 9.14548979842255e-06, "loss": 0.5703, "step": 6662 }, { "epoch": 0.54, "grad_norm": 0.9248357145748127, "learning_rate": 9.142866984188465e-06, "loss": 0.5427, "step": 6663 }, { "epoch": 0.54, "grad_norm": 0.9191461067370301, "learning_rate": 9.140244229352939e-06, "loss": 0.5771, "step": 6664 }, { "epoch": 0.54, "grad_norm": 0.8586547352456922, "learning_rate": 9.137621534097727e-06, "loss": 0.5078, "step": 6665 }, { "epoch": 0.54, "grad_norm": 0.9672594179057945, "learning_rate": 9.134998898604573e-06, "loss": 0.5568, "step": 6666 }, { "epoch": 0.54, "grad_norm": 0.9385908631281702, "learning_rate": 9.13237632305523e-06, "loss": 0.5224, "step": 6667 }, { "epoch": 0.54, "grad_norm": 0.8288168870807058, "learning_rate": 9.129753807631441e-06, "loss": 0.4726, "step": 6668 }, { "epoch": 0.54, "grad_norm": 0.822299693879274, "learning_rate": 9.127131352514936e-06, "loss": 0.4879, "step": 6669 }, { "epoch": 0.54, "grad_norm": 0.9248585728305219, "learning_rate": 9.124508957887458e-06, "loss": 0.5317, "step": 6670 }, { "epoch": 0.54, "grad_norm": 0.8571133889927011, "learning_rate": 9.121886623930735e-06, "loss": 0.55, "step": 6671 }, { "epoch": 0.54, "grad_norm": 0.9419444582984893, "learning_rate": 9.119264350826484e-06, "loss": 0.538, "step": 6672 }, { "epoch": 0.54, "grad_norm": 0.9999021530257328, "learning_rate": 9.116642138756436e-06, "loss": 0.5301, "step": 6673 }, { "epoch": 0.54, "grad_norm": 0.9350978949571519, "learning_rate": 9.114019987902305e-06, "loss": 0.5313, "step": 6674 }, { "epoch": 0.54, "grad_norm": 0.8767162700759692, "learning_rate": 9.111397898445798e-06, "loss": 0.4861, "step": 6675 }, { "epoch": 0.54, "grad_norm": 0.811548256992899, "learning_rate": 9.108775870568633e-06, "loss": 0.4757, "step": 6676 }, { "epoch": 0.54, "grad_norm": 0.8172579337528821, "learning_rate": 9.10615390445251e-06, "loss": 0.4537, "step": 6677 }, { "epoch": 0.54, "grad_norm": 0.8618640332615486, "learning_rate": 9.103532000279126e-06, "loss": 0.473, "step": 6678 }, { "epoch": 0.54, "grad_norm": 0.8147714207686093, "learning_rate": 9.100910158230181e-06, "loss": 0.4793, "step": 6679 }, { "epoch": 0.54, "grad_norm": 0.8892794701271312, "learning_rate": 9.098288378487365e-06, "loss": 0.4554, "step": 6680 }, { "epoch": 0.54, "grad_norm": 0.9288749782454486, "learning_rate": 9.095666661232359e-06, "loss": 0.5453, "step": 6681 }, { "epoch": 0.54, "grad_norm": 0.9337429815153883, "learning_rate": 9.093045006646858e-06, "loss": 0.5492, "step": 6682 }, { "epoch": 0.54, "grad_norm": 0.8679010196336439, "learning_rate": 9.090423414912533e-06, "loss": 0.5516, "step": 6683 }, { "epoch": 0.54, "grad_norm": 1.0241778177487386, "learning_rate": 9.087801886211054e-06, "loss": 0.5558, "step": 6684 }, { "epoch": 0.54, "grad_norm": 0.9616016252490783, "learning_rate": 9.085180420724098e-06, "loss": 0.5434, "step": 6685 }, { "epoch": 0.54, "grad_norm": 1.0637639240727763, "learning_rate": 9.08255901863333e-06, "loss": 0.5941, "step": 6686 }, { "epoch": 0.54, "grad_norm": 0.8857949184792532, "learning_rate": 9.079937680120403e-06, "loss": 0.513, "step": 6687 }, { "epoch": 0.54, "grad_norm": 0.8920117605122396, "learning_rate": 9.07731640536698e-06, "loss": 0.5177, "step": 6688 }, { "epoch": 0.54, "grad_norm": 0.9771864597037313, "learning_rate": 9.074695194554716e-06, "loss": 0.5003, "step": 6689 }, { "epoch": 0.54, "grad_norm": 0.8469320043425077, "learning_rate": 9.072074047865249e-06, "loss": 0.5068, "step": 6690 }, { "epoch": 0.54, "grad_norm": 0.9443738087006439, "learning_rate": 9.06945296548023e-06, "loss": 0.5157, "step": 6691 }, { "epoch": 0.54, "grad_norm": 0.9300489829476154, "learning_rate": 9.066831947581297e-06, "loss": 0.5403, "step": 6692 }, { "epoch": 0.54, "grad_norm": 1.053327155730105, "learning_rate": 9.064210994350077e-06, "loss": 0.6007, "step": 6693 }, { "epoch": 0.54, "grad_norm": 1.4474967110123402, "learning_rate": 9.061590105968208e-06, "loss": 0.575, "step": 6694 }, { "epoch": 0.54, "grad_norm": 0.8526689997310141, "learning_rate": 9.058969282617314e-06, "loss": 0.5209, "step": 6695 }, { "epoch": 0.54, "grad_norm": 0.8097094716494864, "learning_rate": 9.056348524479011e-06, "loss": 0.4076, "step": 6696 }, { "epoch": 0.54, "grad_norm": 0.8654787700636426, "learning_rate": 9.05372783173492e-06, "loss": 0.4703, "step": 6697 }, { "epoch": 0.54, "grad_norm": 0.8186544312924194, "learning_rate": 9.051107204566652e-06, "loss": 0.4329, "step": 6698 }, { "epoch": 0.54, "grad_norm": 0.8674267898300603, "learning_rate": 9.04848664315581e-06, "loss": 0.5081, "step": 6699 }, { "epoch": 0.54, "grad_norm": 0.925496440387974, "learning_rate": 9.045866147684002e-06, "loss": 0.6014, "step": 6700 }, { "epoch": 0.54, "grad_norm": 0.974342361846761, "learning_rate": 9.043245718332821e-06, "loss": 0.5809, "step": 6701 }, { "epoch": 0.54, "grad_norm": 0.8923396073658344, "learning_rate": 9.040625355283865e-06, "loss": 0.5784, "step": 6702 }, { "epoch": 0.54, "grad_norm": 0.861738870749732, "learning_rate": 9.038005058718722e-06, "loss": 0.5445, "step": 6703 }, { "epoch": 0.54, "grad_norm": 0.8438303457500101, "learning_rate": 9.035384828818974e-06, "loss": 0.5452, "step": 6704 }, { "epoch": 0.54, "grad_norm": 0.8829006208766645, "learning_rate": 9.0327646657662e-06, "loss": 0.5218, "step": 6705 }, { "epoch": 0.55, "grad_norm": 0.8418172476199486, "learning_rate": 9.03014456974198e-06, "loss": 0.4835, "step": 6706 }, { "epoch": 0.55, "grad_norm": 1.008223396610821, "learning_rate": 9.027524540927878e-06, "loss": 0.5586, "step": 6707 }, { "epoch": 0.55, "grad_norm": 0.8712725958245666, "learning_rate": 9.024904579505465e-06, "loss": 0.4877, "step": 6708 }, { "epoch": 0.55, "grad_norm": 0.9502419365005647, "learning_rate": 9.0222846856563e-06, "loss": 0.5817, "step": 6709 }, { "epoch": 0.55, "grad_norm": 0.951721220447067, "learning_rate": 9.019664859561938e-06, "loss": 0.5443, "step": 6710 }, { "epoch": 0.55, "grad_norm": 0.8979820465371918, "learning_rate": 9.01704510140393e-06, "loss": 0.5145, "step": 6711 }, { "epoch": 0.55, "grad_norm": 0.9145141115880605, "learning_rate": 9.014425411363827e-06, "loss": 0.5198, "step": 6712 }, { "epoch": 0.55, "grad_norm": 0.891068921265497, "learning_rate": 9.011805789623168e-06, "loss": 0.5452, "step": 6713 }, { "epoch": 0.55, "grad_norm": 0.8933266887513471, "learning_rate": 9.00918623636349e-06, "loss": 0.5038, "step": 6714 }, { "epoch": 0.55, "grad_norm": 0.8983927826301715, "learning_rate": 9.00656675176633e-06, "loss": 0.5516, "step": 6715 }, { "epoch": 0.55, "grad_norm": 1.145812936653427, "learning_rate": 9.003947336013212e-06, "loss": 0.5339, "step": 6716 }, { "epoch": 0.55, "grad_norm": 0.800954853444465, "learning_rate": 9.001327989285658e-06, "loss": 0.4313, "step": 6717 }, { "epoch": 0.55, "grad_norm": 0.9599757274661858, "learning_rate": 8.99870871176519e-06, "loss": 0.5393, "step": 6718 }, { "epoch": 0.55, "grad_norm": 0.8637559886276752, "learning_rate": 8.99608950363332e-06, "loss": 0.4987, "step": 6719 }, { "epoch": 0.55, "grad_norm": 0.9177154870428762, "learning_rate": 8.993470365071557e-06, "loss": 0.4984, "step": 6720 }, { "epoch": 0.55, "grad_norm": 0.8671382758403665, "learning_rate": 8.990851296261403e-06, "loss": 0.4806, "step": 6721 }, { "epoch": 0.55, "grad_norm": 0.8485265776210158, "learning_rate": 8.988232297384363e-06, "loss": 0.5311, "step": 6722 }, { "epoch": 0.55, "grad_norm": 0.8996283623951162, "learning_rate": 8.985613368621923e-06, "loss": 0.479, "step": 6723 }, { "epoch": 0.55, "grad_norm": 0.8339680584043668, "learning_rate": 8.98299451015558e-06, "loss": 0.4592, "step": 6724 }, { "epoch": 0.55, "grad_norm": 0.911429171245189, "learning_rate": 8.980375722166816e-06, "loss": 0.5446, "step": 6725 }, { "epoch": 0.55, "grad_norm": 0.9296998668878459, "learning_rate": 8.977757004837107e-06, "loss": 0.526, "step": 6726 }, { "epoch": 0.55, "grad_norm": 0.8934152401740832, "learning_rate": 8.975138358347931e-06, "loss": 0.5168, "step": 6727 }, { "epoch": 0.55, "grad_norm": 0.8699340458300051, "learning_rate": 8.97251978288076e-06, "loss": 0.5115, "step": 6728 }, { "epoch": 0.55, "grad_norm": 0.8042443273524694, "learning_rate": 8.969901278617056e-06, "loss": 0.493, "step": 6729 }, { "epoch": 0.55, "grad_norm": 0.9139150214408609, "learning_rate": 8.967282845738278e-06, "loss": 0.5782, "step": 6730 }, { "epoch": 0.55, "grad_norm": 0.9087431719596738, "learning_rate": 8.964664484425887e-06, "loss": 0.4987, "step": 6731 }, { "epoch": 0.55, "grad_norm": 0.9863678036432829, "learning_rate": 8.962046194861324e-06, "loss": 0.5613, "step": 6732 }, { "epoch": 0.55, "grad_norm": 0.8027492964740189, "learning_rate": 8.959427977226041e-06, "loss": 0.5256, "step": 6733 }, { "epoch": 0.55, "grad_norm": 0.9706281749582049, "learning_rate": 8.956809831701478e-06, "loss": 0.5961, "step": 6734 }, { "epoch": 0.55, "grad_norm": 0.837992334486424, "learning_rate": 8.954191758469065e-06, "loss": 0.5812, "step": 6735 }, { "epoch": 0.55, "grad_norm": 0.9209016693375767, "learning_rate": 8.951573757710237e-06, "loss": 0.5021, "step": 6736 }, { "epoch": 0.55, "grad_norm": 0.8954100318518413, "learning_rate": 8.948955829606419e-06, "loss": 0.5376, "step": 6737 }, { "epoch": 0.55, "grad_norm": 0.8414160352133793, "learning_rate": 8.946337974339025e-06, "loss": 0.4799, "step": 6738 }, { "epoch": 0.55, "grad_norm": 0.9261969375567992, "learning_rate": 8.94372019208948e-06, "loss": 0.5435, "step": 6739 }, { "epoch": 0.55, "grad_norm": 0.8862438885604225, "learning_rate": 8.941102483039188e-06, "loss": 0.4679, "step": 6740 }, { "epoch": 0.55, "grad_norm": 0.998787432571941, "learning_rate": 8.938484847369552e-06, "loss": 0.529, "step": 6741 }, { "epoch": 0.55, "grad_norm": 0.933973434550398, "learning_rate": 8.935867285261977e-06, "loss": 0.5466, "step": 6742 }, { "epoch": 0.55, "grad_norm": 0.8179520104205413, "learning_rate": 8.933249796897857e-06, "loss": 0.5482, "step": 6743 }, { "epoch": 0.55, "grad_norm": 1.0059179488657666, "learning_rate": 8.930632382458574e-06, "loss": 0.5231, "step": 6744 }, { "epoch": 0.55, "grad_norm": 0.8559062638771029, "learning_rate": 8.928015042125523e-06, "loss": 0.5056, "step": 6745 }, { "epoch": 0.55, "grad_norm": 0.9097764966615546, "learning_rate": 8.92539777608008e-06, "loss": 0.5607, "step": 6746 }, { "epoch": 0.55, "grad_norm": 0.9397337848518229, "learning_rate": 8.92278058450361e-06, "loss": 0.5203, "step": 6747 }, { "epoch": 0.55, "grad_norm": 0.9255883182156407, "learning_rate": 8.920163467577498e-06, "loss": 0.5321, "step": 6748 }, { "epoch": 0.55, "grad_norm": 1.0030233998599511, "learning_rate": 8.9175464254831e-06, "loss": 0.5644, "step": 6749 }, { "epoch": 0.55, "grad_norm": 0.8286218798653401, "learning_rate": 8.914929458401767e-06, "loss": 0.5238, "step": 6750 }, { "epoch": 0.55, "grad_norm": 0.8400491207198404, "learning_rate": 8.912312566514867e-06, "loss": 0.5077, "step": 6751 }, { "epoch": 0.55, "grad_norm": 1.0080073977252997, "learning_rate": 8.909695750003741e-06, "loss": 0.5366, "step": 6752 }, { "epoch": 0.55, "grad_norm": 0.9119849927606701, "learning_rate": 8.907079009049728e-06, "loss": 0.4794, "step": 6753 }, { "epoch": 0.55, "grad_norm": 0.9238807903194287, "learning_rate": 8.904462343834174e-06, "loss": 0.4932, "step": 6754 }, { "epoch": 0.55, "grad_norm": 0.8953783979565785, "learning_rate": 8.901845754538408e-06, "loss": 0.5132, "step": 6755 }, { "epoch": 0.55, "grad_norm": 0.8881835413458181, "learning_rate": 8.899229241343753e-06, "loss": 0.5388, "step": 6756 }, { "epoch": 0.55, "grad_norm": 0.8059493226002098, "learning_rate": 8.89661280443154e-06, "loss": 0.4756, "step": 6757 }, { "epoch": 0.55, "grad_norm": 0.8589226325513761, "learning_rate": 8.89399644398308e-06, "loss": 0.5012, "step": 6758 }, { "epoch": 0.55, "grad_norm": 0.8661157764886962, "learning_rate": 8.891380160179683e-06, "loss": 0.5472, "step": 6759 }, { "epoch": 0.55, "grad_norm": 0.8802847133080646, "learning_rate": 8.88876395320266e-06, "loss": 0.5278, "step": 6760 }, { "epoch": 0.55, "grad_norm": 0.839132635416687, "learning_rate": 8.88614782323331e-06, "loss": 0.5119, "step": 6761 }, { "epoch": 0.55, "grad_norm": 0.8662673702579419, "learning_rate": 8.883531770452924e-06, "loss": 0.5376, "step": 6762 }, { "epoch": 0.55, "grad_norm": 0.8629463522649556, "learning_rate": 8.880915795042798e-06, "loss": 0.4509, "step": 6763 }, { "epoch": 0.55, "grad_norm": 0.9198570897646432, "learning_rate": 8.878299897184218e-06, "loss": 0.5309, "step": 6764 }, { "epoch": 0.55, "grad_norm": 0.9682401080681828, "learning_rate": 8.875684077058453e-06, "loss": 0.5539, "step": 6765 }, { "epoch": 0.55, "grad_norm": 0.9131512395292984, "learning_rate": 8.87306833484679e-06, "loss": 0.5804, "step": 6766 }, { "epoch": 0.55, "grad_norm": 0.9043883538180063, "learning_rate": 8.870452670730491e-06, "loss": 0.5376, "step": 6767 }, { "epoch": 0.55, "grad_norm": 0.8842007847462771, "learning_rate": 8.867837084890817e-06, "loss": 0.5603, "step": 6768 }, { "epoch": 0.55, "grad_norm": 0.8917436434495278, "learning_rate": 8.865221577509034e-06, "loss": 0.5391, "step": 6769 }, { "epoch": 0.55, "grad_norm": 0.8969528174452776, "learning_rate": 8.862606148766386e-06, "loss": 0.5521, "step": 6770 }, { "epoch": 0.55, "grad_norm": 0.8549046038065036, "learning_rate": 8.85999079884412e-06, "loss": 0.5104, "step": 6771 }, { "epoch": 0.55, "grad_norm": 0.8370046492826245, "learning_rate": 8.857375527923487e-06, "loss": 0.4694, "step": 6772 }, { "epoch": 0.55, "grad_norm": 0.9201091933672658, "learning_rate": 8.854760336185709e-06, "loss": 0.5549, "step": 6773 }, { "epoch": 0.55, "grad_norm": 0.9192420832976912, "learning_rate": 8.85214522381203e-06, "loss": 0.5098, "step": 6774 }, { "epoch": 0.55, "grad_norm": 0.8566796001040332, "learning_rate": 8.849530190983669e-06, "loss": 0.4605, "step": 6775 }, { "epoch": 0.55, "grad_norm": 0.8527635064927266, "learning_rate": 8.846915237881838e-06, "loss": 0.5575, "step": 6776 }, { "epoch": 0.55, "grad_norm": 0.8049186625770314, "learning_rate": 8.844300364687766e-06, "loss": 0.4864, "step": 6777 }, { "epoch": 0.55, "grad_norm": 0.874725545010611, "learning_rate": 8.841685571582652e-06, "loss": 0.5308, "step": 6778 }, { "epoch": 0.55, "grad_norm": 0.9224938208424457, "learning_rate": 8.839070858747697e-06, "loss": 0.5409, "step": 6779 }, { "epoch": 0.55, "grad_norm": 0.8814961698465333, "learning_rate": 8.836456226364106e-06, "loss": 0.5138, "step": 6780 }, { "epoch": 0.55, "grad_norm": 0.9087348939049014, "learning_rate": 8.833841674613066e-06, "loss": 0.548, "step": 6781 }, { "epoch": 0.55, "grad_norm": 0.9069806988718064, "learning_rate": 8.831227203675759e-06, "loss": 0.5685, "step": 6782 }, { "epoch": 0.55, "grad_norm": 0.9541666664847532, "learning_rate": 8.828612813733375e-06, "loss": 0.5086, "step": 6783 }, { "epoch": 0.55, "grad_norm": 0.8871536751057871, "learning_rate": 8.825998504967083e-06, "loss": 0.5631, "step": 6784 }, { "epoch": 0.55, "grad_norm": 0.8795996020980996, "learning_rate": 8.823384277558049e-06, "loss": 0.5313, "step": 6785 }, { "epoch": 0.55, "grad_norm": 0.9247932552615178, "learning_rate": 8.820770131687447e-06, "loss": 0.5294, "step": 6786 }, { "epoch": 0.55, "grad_norm": 0.8857003101974371, "learning_rate": 8.818156067536428e-06, "loss": 0.5431, "step": 6787 }, { "epoch": 0.55, "grad_norm": 0.9214931834118468, "learning_rate": 8.81554208528614e-06, "loss": 0.5022, "step": 6788 }, { "epoch": 0.55, "grad_norm": 0.9891148743193596, "learning_rate": 8.81292818511774e-06, "loss": 0.5028, "step": 6789 }, { "epoch": 0.55, "grad_norm": 0.8078361977439683, "learning_rate": 8.810314367212363e-06, "loss": 0.4507, "step": 6790 }, { "epoch": 0.55, "grad_norm": 0.8869455383110579, "learning_rate": 8.807700631751142e-06, "loss": 0.5118, "step": 6791 }, { "epoch": 0.55, "grad_norm": 0.9101666591005333, "learning_rate": 8.805086978915215e-06, "loss": 0.5276, "step": 6792 }, { "epoch": 0.55, "grad_norm": 0.8699626001339014, "learning_rate": 8.802473408885698e-06, "loss": 0.5151, "step": 6793 }, { "epoch": 0.55, "grad_norm": 0.8984143344812208, "learning_rate": 8.799859921843708e-06, "loss": 0.5043, "step": 6794 }, { "epoch": 0.55, "grad_norm": 0.8903818770283506, "learning_rate": 8.797246517970365e-06, "loss": 0.4752, "step": 6795 }, { "epoch": 0.55, "grad_norm": 0.8499060192340044, "learning_rate": 8.79463319744677e-06, "loss": 0.5005, "step": 6796 }, { "epoch": 0.55, "grad_norm": 0.8716575656378632, "learning_rate": 8.792019960454025e-06, "loss": 0.5114, "step": 6797 }, { "epoch": 0.55, "grad_norm": 0.9138479375865836, "learning_rate": 8.789406807173226e-06, "loss": 0.5285, "step": 6798 }, { "epoch": 0.55, "grad_norm": 0.9210455881917426, "learning_rate": 8.786793737785465e-06, "loss": 0.5623, "step": 6799 }, { "epoch": 0.55, "grad_norm": 1.059953054962172, "learning_rate": 8.784180752471814e-06, "loss": 0.6021, "step": 6800 }, { "epoch": 0.55, "grad_norm": 0.9384117653547553, "learning_rate": 8.781567851413363e-06, "loss": 0.4719, "step": 6801 }, { "epoch": 0.55, "grad_norm": 0.9469162552609387, "learning_rate": 8.77895503479118e-06, "loss": 0.544, "step": 6802 }, { "epoch": 0.55, "grad_norm": 0.918780292574788, "learning_rate": 8.776342302786327e-06, "loss": 0.5458, "step": 6803 }, { "epoch": 0.55, "grad_norm": 0.8163978896404098, "learning_rate": 8.773729655579872e-06, "loss": 0.4747, "step": 6804 }, { "epoch": 0.55, "grad_norm": 0.8581993144879657, "learning_rate": 8.771117093352861e-06, "loss": 0.4709, "step": 6805 }, { "epoch": 0.55, "grad_norm": 0.7416031093198225, "learning_rate": 8.768504616286343e-06, "loss": 0.4403, "step": 6806 }, { "epoch": 0.55, "grad_norm": 0.9415202265596095, "learning_rate": 8.765892224561367e-06, "loss": 0.4747, "step": 6807 }, { "epoch": 0.55, "grad_norm": 0.8392199576195705, "learning_rate": 8.763279918358965e-06, "loss": 0.4821, "step": 6808 }, { "epoch": 0.55, "grad_norm": 0.9156867689783389, "learning_rate": 8.760667697860163e-06, "loss": 0.5173, "step": 6809 }, { "epoch": 0.55, "grad_norm": 0.8526700674513049, "learning_rate": 8.758055563245994e-06, "loss": 0.4999, "step": 6810 }, { "epoch": 0.55, "grad_norm": 0.9897941428051377, "learning_rate": 8.755443514697475e-06, "loss": 0.5442, "step": 6811 }, { "epoch": 0.55, "grad_norm": 0.9994090842074854, "learning_rate": 8.75283155239561e-06, "loss": 0.5672, "step": 6812 }, { "epoch": 0.55, "grad_norm": 0.9643497009203205, "learning_rate": 8.750219676521417e-06, "loss": 0.5924, "step": 6813 }, { "epoch": 0.55, "grad_norm": 0.9052958275216263, "learning_rate": 8.747607887255892e-06, "loss": 0.4995, "step": 6814 }, { "epoch": 0.55, "grad_norm": 0.9110981702917157, "learning_rate": 8.744996184780027e-06, "loss": 0.5191, "step": 6815 }, { "epoch": 0.55, "grad_norm": 0.9418260441700346, "learning_rate": 8.742384569274815e-06, "loss": 0.5403, "step": 6816 }, { "epoch": 0.55, "grad_norm": 0.9570874072452588, "learning_rate": 8.739773040921237e-06, "loss": 0.5515, "step": 6817 }, { "epoch": 0.55, "grad_norm": 0.8969656147692997, "learning_rate": 8.737161599900267e-06, "loss": 0.5075, "step": 6818 }, { "epoch": 0.55, "grad_norm": 0.8998191286869733, "learning_rate": 8.734550246392881e-06, "loss": 0.5951, "step": 6819 }, { "epoch": 0.55, "grad_norm": 0.8362595923787626, "learning_rate": 8.73193898058004e-06, "loss": 0.4867, "step": 6820 }, { "epoch": 0.55, "grad_norm": 0.8519217677907318, "learning_rate": 8.7293278026427e-06, "loss": 0.545, "step": 6821 }, { "epoch": 0.55, "grad_norm": 0.9234059581598991, "learning_rate": 8.726716712761821e-06, "loss": 0.5394, "step": 6822 }, { "epoch": 0.55, "grad_norm": 0.8558200276209291, "learning_rate": 8.724105711118342e-06, "loss": 0.5496, "step": 6823 }, { "epoch": 0.55, "grad_norm": 0.8377667534966279, "learning_rate": 8.721494797893201e-06, "loss": 0.5268, "step": 6824 }, { "epoch": 0.55, "grad_norm": 0.8517243807535965, "learning_rate": 8.718883973267344e-06, "loss": 0.5228, "step": 6825 }, { "epoch": 0.55, "grad_norm": 0.8573727687125215, "learning_rate": 8.716273237421688e-06, "loss": 0.4513, "step": 6826 }, { "epoch": 0.55, "grad_norm": 0.8949363228005951, "learning_rate": 8.713662590537155e-06, "loss": 0.5142, "step": 6827 }, { "epoch": 0.55, "grad_norm": 0.9823612055364075, "learning_rate": 8.711052032794668e-06, "loss": 0.6011, "step": 6828 }, { "epoch": 0.56, "grad_norm": 0.9043280010894629, "learning_rate": 8.708441564375132e-06, "loss": 0.5257, "step": 6829 }, { "epoch": 0.56, "grad_norm": 0.8884025036511776, "learning_rate": 8.705831185459446e-06, "loss": 0.5371, "step": 6830 }, { "epoch": 0.56, "grad_norm": 0.8851961931117828, "learning_rate": 8.703220896228515e-06, "loss": 0.5296, "step": 6831 }, { "epoch": 0.56, "grad_norm": 0.9496141722730437, "learning_rate": 8.700610696863225e-06, "loss": 0.5642, "step": 6832 }, { "epoch": 0.56, "grad_norm": 0.8771895138899715, "learning_rate": 8.698000587544457e-06, "loss": 0.5325, "step": 6833 }, { "epoch": 0.56, "grad_norm": 0.8952605449602975, "learning_rate": 8.695390568453099e-06, "loss": 0.5094, "step": 6834 }, { "epoch": 0.56, "grad_norm": 0.9544184892925124, "learning_rate": 8.692780639770015e-06, "loss": 0.5366, "step": 6835 }, { "epoch": 0.56, "grad_norm": 0.9309194420917352, "learning_rate": 8.69017080167607e-06, "loss": 0.541, "step": 6836 }, { "epoch": 0.56, "grad_norm": 0.8723271018988956, "learning_rate": 8.687561054352132e-06, "loss": 0.4398, "step": 6837 }, { "epoch": 0.56, "grad_norm": 0.8901099533404632, "learning_rate": 8.684951397979049e-06, "loss": 0.4952, "step": 6838 }, { "epoch": 0.56, "grad_norm": 0.9158614565675948, "learning_rate": 8.68234183273766e-06, "loss": 0.498, "step": 6839 }, { "epoch": 0.56, "grad_norm": 0.9034404937210446, "learning_rate": 8.679732358808822e-06, "loss": 0.5241, "step": 6840 }, { "epoch": 0.56, "grad_norm": 0.91821176168655, "learning_rate": 8.677122976373356e-06, "loss": 0.5759, "step": 6841 }, { "epoch": 0.56, "grad_norm": 0.9047712664252271, "learning_rate": 8.674513685612093e-06, "loss": 0.5376, "step": 6842 }, { "epoch": 0.56, "grad_norm": 0.9452668129583185, "learning_rate": 8.67190448670586e-06, "loss": 0.5454, "step": 6843 }, { "epoch": 0.56, "grad_norm": 0.9045047109963618, "learning_rate": 8.669295379835467e-06, "loss": 0.4366, "step": 6844 }, { "epoch": 0.56, "grad_norm": 0.848893201507573, "learning_rate": 8.66668636518172e-06, "loss": 0.5043, "step": 6845 }, { "epoch": 0.56, "grad_norm": 0.9522331654771756, "learning_rate": 8.66407744292543e-06, "loss": 0.5705, "step": 6846 }, { "epoch": 0.56, "grad_norm": 0.8362035815919093, "learning_rate": 8.661468613247387e-06, "loss": 0.528, "step": 6847 }, { "epoch": 0.56, "grad_norm": 0.8439852570234441, "learning_rate": 8.658859876328379e-06, "loss": 0.5723, "step": 6848 }, { "epoch": 0.56, "grad_norm": 0.8729192359407367, "learning_rate": 8.656251232349196e-06, "loss": 0.5041, "step": 6849 }, { "epoch": 0.56, "grad_norm": 0.9365431033561334, "learning_rate": 8.653642681490608e-06, "loss": 0.541, "step": 6850 }, { "epoch": 0.56, "grad_norm": 0.9090828954084572, "learning_rate": 8.651034223933387e-06, "loss": 0.5001, "step": 6851 }, { "epoch": 0.56, "grad_norm": 0.9040470072628123, "learning_rate": 8.648425859858302e-06, "loss": 0.5238, "step": 6852 }, { "epoch": 0.56, "grad_norm": 0.8724924522448079, "learning_rate": 8.645817589446104e-06, "loss": 0.5102, "step": 6853 }, { "epoch": 0.56, "grad_norm": 0.9094503790788749, "learning_rate": 8.643209412877545e-06, "loss": 0.4854, "step": 6854 }, { "epoch": 0.56, "grad_norm": 0.8456235151710461, "learning_rate": 8.640601330333372e-06, "loss": 0.4561, "step": 6855 }, { "epoch": 0.56, "grad_norm": 0.847154330493836, "learning_rate": 8.637993341994318e-06, "loss": 0.4553, "step": 6856 }, { "epoch": 0.56, "grad_norm": 0.9621209098974626, "learning_rate": 8.63538544804112e-06, "loss": 0.6182, "step": 6857 }, { "epoch": 0.56, "grad_norm": 0.8996509203921742, "learning_rate": 8.6327776486545e-06, "loss": 0.557, "step": 6858 }, { "epoch": 0.56, "grad_norm": 0.9084173699307313, "learning_rate": 8.630169944015175e-06, "loss": 0.5676, "step": 6859 }, { "epoch": 0.56, "grad_norm": 0.9108279490107368, "learning_rate": 8.627562334303856e-06, "loss": 0.5245, "step": 6860 }, { "epoch": 0.56, "grad_norm": 0.951437155317072, "learning_rate": 8.624954819701254e-06, "loss": 0.5329, "step": 6861 }, { "epoch": 0.56, "grad_norm": 0.8850829837968626, "learning_rate": 8.62234740038806e-06, "loss": 0.5477, "step": 6862 }, { "epoch": 0.56, "grad_norm": 0.9464888994757814, "learning_rate": 8.61974007654497e-06, "loss": 0.5163, "step": 6863 }, { "epoch": 0.56, "grad_norm": 0.8720404929484166, "learning_rate": 8.617132848352672e-06, "loss": 0.5445, "step": 6864 }, { "epoch": 0.56, "grad_norm": 0.8728133761361274, "learning_rate": 8.614525715991838e-06, "loss": 0.4891, "step": 6865 }, { "epoch": 0.56, "grad_norm": 0.8127871687528759, "learning_rate": 8.61191867964314e-06, "loss": 0.5541, "step": 6866 }, { "epoch": 0.56, "grad_norm": 0.9500036866001732, "learning_rate": 8.60931173948725e-06, "loss": 0.5357, "step": 6867 }, { "epoch": 0.56, "grad_norm": 0.9133395294343943, "learning_rate": 8.606704895704824e-06, "loss": 0.5075, "step": 6868 }, { "epoch": 0.56, "grad_norm": 0.8935830929853792, "learning_rate": 8.60409814847651e-06, "loss": 0.5594, "step": 6869 }, { "epoch": 0.56, "grad_norm": 0.8317842659008998, "learning_rate": 8.601491497982956e-06, "loss": 0.4818, "step": 6870 }, { "epoch": 0.56, "grad_norm": 0.8450371891233477, "learning_rate": 8.598884944404803e-06, "loss": 0.5497, "step": 6871 }, { "epoch": 0.56, "grad_norm": 0.8318739755346117, "learning_rate": 8.59627848792268e-06, "loss": 0.4475, "step": 6872 }, { "epoch": 0.56, "grad_norm": 0.8811775776785117, "learning_rate": 8.593672128717211e-06, "loss": 0.549, "step": 6873 }, { "epoch": 0.56, "grad_norm": 0.9664851965942245, "learning_rate": 8.591065866969019e-06, "loss": 0.5226, "step": 6874 }, { "epoch": 0.56, "grad_norm": 0.896869716587723, "learning_rate": 8.588459702858709e-06, "loss": 0.5001, "step": 6875 }, { "epoch": 0.56, "grad_norm": 0.8946735161087283, "learning_rate": 8.585853636566891e-06, "loss": 0.5486, "step": 6876 }, { "epoch": 0.56, "grad_norm": 0.9655935262527788, "learning_rate": 8.583247668274163e-06, "loss": 0.5092, "step": 6877 }, { "epoch": 0.56, "grad_norm": 0.8410789878708989, "learning_rate": 8.580641798161114e-06, "loss": 0.544, "step": 6878 }, { "epoch": 0.56, "grad_norm": 0.8277452055258614, "learning_rate": 8.578036026408329e-06, "loss": 0.4597, "step": 6879 }, { "epoch": 0.56, "grad_norm": 0.815244774195561, "learning_rate": 8.575430353196388e-06, "loss": 0.444, "step": 6880 }, { "epoch": 0.56, "grad_norm": 0.9093735584737653, "learning_rate": 8.572824778705858e-06, "loss": 0.5305, "step": 6881 }, { "epoch": 0.56, "grad_norm": 0.824061186847819, "learning_rate": 8.570219303117305e-06, "loss": 0.5214, "step": 6882 }, { "epoch": 0.56, "grad_norm": 0.8606650396604164, "learning_rate": 8.567613926611287e-06, "loss": 0.5188, "step": 6883 }, { "epoch": 0.56, "grad_norm": 0.8166098225852793, "learning_rate": 8.565008649368353e-06, "loss": 0.4376, "step": 6884 }, { "epoch": 0.56, "grad_norm": 0.8347231452093975, "learning_rate": 8.562403471569045e-06, "loss": 0.547, "step": 6885 }, { "epoch": 0.56, "grad_norm": 0.8097988214796327, "learning_rate": 8.559798393393905e-06, "loss": 0.5447, "step": 6886 }, { "epoch": 0.56, "grad_norm": 0.9244774758950597, "learning_rate": 8.557193415023453e-06, "loss": 0.5247, "step": 6887 }, { "epoch": 0.56, "grad_norm": 0.9112277686677142, "learning_rate": 8.554588536638223e-06, "loss": 0.5273, "step": 6888 }, { "epoch": 0.56, "grad_norm": 0.9646248263987668, "learning_rate": 8.551983758418726e-06, "loss": 0.5838, "step": 6889 }, { "epoch": 0.56, "grad_norm": 0.9010237227640583, "learning_rate": 8.549379080545465e-06, "loss": 0.5373, "step": 6890 }, { "epoch": 0.56, "grad_norm": 0.8840857549032455, "learning_rate": 8.546774503198952e-06, "loss": 0.5212, "step": 6891 }, { "epoch": 0.56, "grad_norm": 0.8703836152904741, "learning_rate": 8.544170026559675e-06, "loss": 0.515, "step": 6892 }, { "epoch": 0.56, "grad_norm": 0.9110363190568741, "learning_rate": 8.541565650808121e-06, "loss": 0.536, "step": 6893 }, { "epoch": 0.56, "grad_norm": 0.8667711655341527, "learning_rate": 8.538961376124778e-06, "loss": 0.4941, "step": 6894 }, { "epoch": 0.56, "grad_norm": 0.8986411060576323, "learning_rate": 8.536357202690115e-06, "loss": 0.5344, "step": 6895 }, { "epoch": 0.56, "grad_norm": 0.8943813528753555, "learning_rate": 8.533753130684596e-06, "loss": 0.5101, "step": 6896 }, { "epoch": 0.56, "grad_norm": 0.7868350464166751, "learning_rate": 8.531149160288689e-06, "loss": 0.4988, "step": 6897 }, { "epoch": 0.56, "grad_norm": 0.8812378069066342, "learning_rate": 8.528545291682839e-06, "loss": 0.4645, "step": 6898 }, { "epoch": 0.56, "grad_norm": 0.8133762449585381, "learning_rate": 8.525941525047497e-06, "loss": 0.4826, "step": 6899 }, { "epoch": 0.56, "grad_norm": 0.9905503171322216, "learning_rate": 8.5233378605631e-06, "loss": 0.5461, "step": 6900 }, { "epoch": 0.56, "grad_norm": 0.9258311041322348, "learning_rate": 8.520734298410078e-06, "loss": 0.5587, "step": 6901 }, { "epoch": 0.56, "grad_norm": 0.8495388917943362, "learning_rate": 8.51813083876886e-06, "loss": 0.5298, "step": 6902 }, { "epoch": 0.56, "grad_norm": 0.9334580528775865, "learning_rate": 8.51552748181986e-06, "loss": 0.4874, "step": 6903 }, { "epoch": 0.56, "grad_norm": 0.8939242695409655, "learning_rate": 8.512924227743482e-06, "loss": 0.5229, "step": 6904 }, { "epoch": 0.56, "grad_norm": 0.8807139695796883, "learning_rate": 8.510321076720143e-06, "loss": 0.5246, "step": 6905 }, { "epoch": 0.56, "grad_norm": 0.8559006349127389, "learning_rate": 8.507718028930232e-06, "loss": 0.4901, "step": 6906 }, { "epoch": 0.56, "grad_norm": 1.0000213916080734, "learning_rate": 8.505115084554133e-06, "loss": 0.5512, "step": 6907 }, { "epoch": 0.56, "grad_norm": 0.8732842596234691, "learning_rate": 8.502512243772238e-06, "loss": 0.5052, "step": 6908 }, { "epoch": 0.56, "grad_norm": 0.883491030341863, "learning_rate": 8.499909506764914e-06, "loss": 0.4797, "step": 6909 }, { "epoch": 0.56, "grad_norm": 0.8306560563324287, "learning_rate": 8.497306873712529e-06, "loss": 0.5345, "step": 6910 }, { "epoch": 0.56, "grad_norm": 0.9538067482071597, "learning_rate": 8.494704344795447e-06, "loss": 0.5573, "step": 6911 }, { "epoch": 0.56, "grad_norm": 0.8159645915961217, "learning_rate": 8.49210192019402e-06, "loss": 0.4658, "step": 6912 }, { "epoch": 0.56, "grad_norm": 0.8417783364151031, "learning_rate": 8.489499600088587e-06, "loss": 0.4694, "step": 6913 }, { "epoch": 0.56, "grad_norm": 0.9480484593522762, "learning_rate": 8.486897384659496e-06, "loss": 0.5814, "step": 6914 }, { "epoch": 0.56, "grad_norm": 0.9328104862498665, "learning_rate": 8.484295274087077e-06, "loss": 0.5421, "step": 6915 }, { "epoch": 0.56, "grad_norm": 0.9775408046991969, "learning_rate": 8.481693268551645e-06, "loss": 0.5591, "step": 6916 }, { "epoch": 0.56, "grad_norm": 0.9216249536740916, "learning_rate": 8.479091368233527e-06, "loss": 0.5456, "step": 6917 }, { "epoch": 0.56, "grad_norm": 0.8600506186216511, "learning_rate": 8.476489573313026e-06, "loss": 0.4522, "step": 6918 }, { "epoch": 0.56, "grad_norm": 0.9424247242351319, "learning_rate": 8.473887883970444e-06, "loss": 0.5218, "step": 6919 }, { "epoch": 0.56, "grad_norm": 0.9545641299517474, "learning_rate": 8.471286300386084e-06, "loss": 0.5171, "step": 6920 }, { "epoch": 0.56, "grad_norm": 0.9165517930847911, "learning_rate": 8.468684822740226e-06, "loss": 0.554, "step": 6921 }, { "epoch": 0.56, "grad_norm": 0.8727313173381859, "learning_rate": 8.466083451213145e-06, "loss": 0.4691, "step": 6922 }, { "epoch": 0.56, "grad_norm": 0.8684377015353146, "learning_rate": 8.463482185985127e-06, "loss": 0.5302, "step": 6923 }, { "epoch": 0.56, "grad_norm": 0.85865382887651, "learning_rate": 8.46088102723643e-06, "loss": 0.5485, "step": 6924 }, { "epoch": 0.56, "grad_norm": 0.9178264914623333, "learning_rate": 8.458279975147308e-06, "loss": 0.5013, "step": 6925 }, { "epoch": 0.56, "grad_norm": 0.8364225548664053, "learning_rate": 8.45567902989802e-06, "loss": 0.4822, "step": 6926 }, { "epoch": 0.56, "grad_norm": 0.8802465512047095, "learning_rate": 8.453078191668806e-06, "loss": 0.4854, "step": 6927 }, { "epoch": 0.56, "grad_norm": 0.9392464025402688, "learning_rate": 8.450477460639898e-06, "loss": 0.5663, "step": 6928 }, { "epoch": 0.56, "grad_norm": 0.9006510750755666, "learning_rate": 8.447876836991531e-06, "loss": 0.5447, "step": 6929 }, { "epoch": 0.56, "grad_norm": 0.8320821905591601, "learning_rate": 8.445276320903922e-06, "loss": 0.4939, "step": 6930 }, { "epoch": 0.56, "grad_norm": 0.8839694642333762, "learning_rate": 8.442675912557281e-06, "loss": 0.506, "step": 6931 }, { "epoch": 0.56, "grad_norm": 0.9395330291731121, "learning_rate": 8.440075612131823e-06, "loss": 0.5072, "step": 6932 }, { "epoch": 0.56, "grad_norm": 0.8635009440795924, "learning_rate": 8.437475419807742e-06, "loss": 0.5385, "step": 6933 }, { "epoch": 0.56, "grad_norm": 0.9849878980592961, "learning_rate": 8.434875335765222e-06, "loss": 0.6095, "step": 6934 }, { "epoch": 0.56, "grad_norm": 0.9255841893242462, "learning_rate": 8.432275360184458e-06, "loss": 0.5174, "step": 6935 }, { "epoch": 0.56, "grad_norm": 0.9090364558178271, "learning_rate": 8.42967549324562e-06, "loss": 0.5509, "step": 6936 }, { "epoch": 0.56, "grad_norm": 1.0063817065902216, "learning_rate": 8.427075735128874e-06, "loss": 0.5363, "step": 6937 }, { "epoch": 0.56, "grad_norm": 0.9496204700450661, "learning_rate": 8.42447608601439e-06, "loss": 0.4875, "step": 6938 }, { "epoch": 0.56, "grad_norm": 0.7609053421255205, "learning_rate": 8.421876546082315e-06, "loss": 0.4664, "step": 6939 }, { "epoch": 0.56, "grad_norm": 0.915322118497608, "learning_rate": 8.419277115512791e-06, "loss": 0.5448, "step": 6940 }, { "epoch": 0.56, "grad_norm": 0.9743354645399194, "learning_rate": 8.416677794485965e-06, "loss": 0.483, "step": 6941 }, { "epoch": 0.56, "grad_norm": 0.8938767929650255, "learning_rate": 8.414078583181963e-06, "loss": 0.4847, "step": 6942 }, { "epoch": 0.56, "grad_norm": 0.8624235874313937, "learning_rate": 8.411479481780904e-06, "loss": 0.4361, "step": 6943 }, { "epoch": 0.56, "grad_norm": 0.8460237552503656, "learning_rate": 8.408880490462914e-06, "loss": 0.5013, "step": 6944 }, { "epoch": 0.56, "grad_norm": 0.9505478379663955, "learning_rate": 8.406281609408094e-06, "loss": 0.5043, "step": 6945 }, { "epoch": 0.56, "grad_norm": 1.0443733816143181, "learning_rate": 8.403682838796539e-06, "loss": 0.6496, "step": 6946 }, { "epoch": 0.56, "grad_norm": 0.8187946528356171, "learning_rate": 8.401084178808353e-06, "loss": 0.529, "step": 6947 }, { "epoch": 0.56, "grad_norm": 1.0118247077047577, "learning_rate": 8.398485629623613e-06, "loss": 0.6163, "step": 6948 }, { "epoch": 0.56, "grad_norm": 0.9438440213096889, "learning_rate": 8.395887191422397e-06, "loss": 0.5318, "step": 6949 }, { "epoch": 0.56, "grad_norm": 0.8133948811582256, "learning_rate": 8.393288864384778e-06, "loss": 0.491, "step": 6950 }, { "epoch": 0.56, "grad_norm": 0.8383840381164659, "learning_rate": 8.390690648690818e-06, "loss": 0.4626, "step": 6951 }, { "epoch": 0.57, "grad_norm": 0.9563077923567581, "learning_rate": 8.388092544520563e-06, "loss": 0.4867, "step": 6952 }, { "epoch": 0.57, "grad_norm": 0.9220258557852953, "learning_rate": 8.385494552054069e-06, "loss": 0.4902, "step": 6953 }, { "epoch": 0.57, "grad_norm": 0.8819023030230523, "learning_rate": 8.38289667147137e-06, "loss": 0.5122, "step": 6954 }, { "epoch": 0.57, "grad_norm": 0.9896701536921275, "learning_rate": 8.380298902952493e-06, "loss": 0.4423, "step": 6955 }, { "epoch": 0.57, "grad_norm": 0.8947119579675652, "learning_rate": 8.37770124667747e-06, "loss": 0.4862, "step": 6956 }, { "epoch": 0.57, "grad_norm": 0.884022859472568, "learning_rate": 8.375103702826313e-06, "loss": 0.5322, "step": 6957 }, { "epoch": 0.57, "grad_norm": 0.8643526429122288, "learning_rate": 8.372506271579022e-06, "loss": 0.4419, "step": 6958 }, { "epoch": 0.57, "grad_norm": 0.836333281072999, "learning_rate": 8.369908953115609e-06, "loss": 0.4877, "step": 6959 }, { "epoch": 0.57, "grad_norm": 0.8772669558464992, "learning_rate": 8.367311747616057e-06, "loss": 0.5038, "step": 6960 }, { "epoch": 0.57, "grad_norm": 0.902382786223706, "learning_rate": 8.36471465526035e-06, "loss": 0.5299, "step": 6961 }, { "epoch": 0.57, "grad_norm": 0.7776272809054607, "learning_rate": 8.362117676228471e-06, "loss": 0.4358, "step": 6962 }, { "epoch": 0.57, "grad_norm": 0.8457410241726842, "learning_rate": 8.359520810700384e-06, "loss": 0.5294, "step": 6963 }, { "epoch": 0.57, "grad_norm": 0.8787627448587005, "learning_rate": 8.356924058856046e-06, "loss": 0.5382, "step": 6964 }, { "epoch": 0.57, "grad_norm": 0.8641307163943069, "learning_rate": 8.354327420875416e-06, "loss": 0.5088, "step": 6965 }, { "epoch": 0.57, "grad_norm": 0.9338969195084026, "learning_rate": 8.351730896938438e-06, "loss": 0.5405, "step": 6966 }, { "epoch": 0.57, "grad_norm": 0.9571853366920003, "learning_rate": 8.349134487225041e-06, "loss": 0.5288, "step": 6967 }, { "epoch": 0.57, "grad_norm": 0.9484888179387783, "learning_rate": 8.346538191915166e-06, "loss": 0.5038, "step": 6968 }, { "epoch": 0.57, "grad_norm": 0.9092391023046498, "learning_rate": 8.343942011188726e-06, "loss": 0.5155, "step": 6969 }, { "epoch": 0.57, "grad_norm": 0.9628755470864837, "learning_rate": 8.341345945225632e-06, "loss": 0.5141, "step": 6970 }, { "epoch": 0.57, "grad_norm": 0.8925177180147768, "learning_rate": 8.338749994205797e-06, "loss": 0.5277, "step": 6971 }, { "epoch": 0.57, "grad_norm": 0.8013471751031944, "learning_rate": 8.336154158309114e-06, "loss": 0.4862, "step": 6972 }, { "epoch": 0.57, "grad_norm": 0.9451259573313375, "learning_rate": 8.333558437715468e-06, "loss": 0.6033, "step": 6973 }, { "epoch": 0.57, "grad_norm": 0.9367395661597856, "learning_rate": 8.330962832604747e-06, "loss": 0.5249, "step": 6974 }, { "epoch": 0.57, "grad_norm": 0.9432032175641137, "learning_rate": 8.328367343156823e-06, "loss": 0.5391, "step": 6975 }, { "epoch": 0.57, "grad_norm": 0.8479723797015235, "learning_rate": 8.325771969551553e-06, "loss": 0.5147, "step": 6976 }, { "epoch": 0.57, "grad_norm": 0.893248762027732, "learning_rate": 8.323176711968807e-06, "loss": 0.5, "step": 6977 }, { "epoch": 0.57, "grad_norm": 0.8770357987840728, "learning_rate": 8.320581570588426e-06, "loss": 0.5198, "step": 6978 }, { "epoch": 0.57, "grad_norm": 0.9602752488044517, "learning_rate": 8.31798654559025e-06, "loss": 0.4744, "step": 6979 }, { "epoch": 0.57, "grad_norm": 0.9535097302063359, "learning_rate": 8.315391637154116e-06, "loss": 0.4729, "step": 6980 }, { "epoch": 0.57, "grad_norm": 0.9705726891103388, "learning_rate": 8.31279684545985e-06, "loss": 0.5509, "step": 6981 }, { "epoch": 0.57, "grad_norm": 0.8861562625840504, "learning_rate": 8.31020217068726e-06, "loss": 0.5101, "step": 6982 }, { "epoch": 0.57, "grad_norm": 0.9006660475549908, "learning_rate": 8.307607613016166e-06, "loss": 0.505, "step": 6983 }, { "epoch": 0.57, "grad_norm": 0.9894974040605742, "learning_rate": 8.305013172626363e-06, "loss": 0.4981, "step": 6984 }, { "epoch": 0.57, "grad_norm": 0.8933836425811029, "learning_rate": 8.30241884969764e-06, "loss": 0.5277, "step": 6985 }, { "epoch": 0.57, "grad_norm": 0.8515700939920732, "learning_rate": 8.299824644409787e-06, "loss": 0.521, "step": 6986 }, { "epoch": 0.57, "grad_norm": 0.941090038946153, "learning_rate": 8.29723055694258e-06, "loss": 0.5387, "step": 6987 }, { "epoch": 0.57, "grad_norm": 0.896364200664156, "learning_rate": 8.294636587475781e-06, "loss": 0.609, "step": 6988 }, { "epoch": 0.57, "grad_norm": 1.03393686900745, "learning_rate": 8.292042736189156e-06, "loss": 0.5685, "step": 6989 }, { "epoch": 0.57, "grad_norm": 0.9231576064543081, "learning_rate": 8.289449003262457e-06, "loss": 0.5319, "step": 6990 }, { "epoch": 0.57, "grad_norm": 0.9946567275629944, "learning_rate": 8.286855388875418e-06, "loss": 0.5518, "step": 6991 }, { "epoch": 0.57, "grad_norm": 0.886001255462769, "learning_rate": 8.284261893207788e-06, "loss": 0.5029, "step": 6992 }, { "epoch": 0.57, "grad_norm": 0.9284596693800119, "learning_rate": 8.281668516439286e-06, "loss": 0.5258, "step": 6993 }, { "epoch": 0.57, "grad_norm": 0.8278539413461194, "learning_rate": 8.279075258749627e-06, "loss": 0.4514, "step": 6994 }, { "epoch": 0.57, "grad_norm": 0.9555577959427803, "learning_rate": 8.276482120318532e-06, "loss": 0.6038, "step": 6995 }, { "epoch": 0.57, "grad_norm": 0.8387302642220866, "learning_rate": 8.273889101325693e-06, "loss": 0.4807, "step": 6996 }, { "epoch": 0.57, "grad_norm": 0.8962507721279136, "learning_rate": 8.271296201950809e-06, "loss": 0.5234, "step": 6997 }, { "epoch": 0.57, "grad_norm": 0.8712383728560765, "learning_rate": 8.268703422373564e-06, "loss": 0.4448, "step": 6998 }, { "epoch": 0.57, "grad_norm": 1.0123153230611763, "learning_rate": 8.266110762773638e-06, "loss": 0.643, "step": 6999 }, { "epoch": 0.57, "grad_norm": 0.935456479745211, "learning_rate": 8.263518223330698e-06, "loss": 0.4819, "step": 7000 }, { "epoch": 0.57, "grad_norm": 0.8840558016114327, "learning_rate": 8.260925804224406e-06, "loss": 0.5238, "step": 7001 }, { "epoch": 0.57, "grad_norm": 0.9027702652673076, "learning_rate": 8.25833350563441e-06, "loss": 0.4374, "step": 7002 }, { "epoch": 0.57, "grad_norm": 0.9705104517200848, "learning_rate": 8.25574132774036e-06, "loss": 0.5247, "step": 7003 }, { "epoch": 0.57, "grad_norm": 0.93427552403838, "learning_rate": 8.253149270721889e-06, "loss": 0.5646, "step": 7004 }, { "epoch": 0.57, "grad_norm": 0.9734270631354048, "learning_rate": 8.250557334758623e-06, "loss": 0.5581, "step": 7005 }, { "epoch": 0.57, "grad_norm": 0.8741399160481841, "learning_rate": 8.24796552003018e-06, "loss": 0.5323, "step": 7006 }, { "epoch": 0.57, "grad_norm": 0.9273933255218394, "learning_rate": 8.245373826716176e-06, "loss": 0.4716, "step": 7007 }, { "epoch": 0.57, "grad_norm": 0.8434637395989865, "learning_rate": 8.242782254996207e-06, "loss": 0.5463, "step": 7008 }, { "epoch": 0.57, "grad_norm": 0.9336333731635436, "learning_rate": 8.240190805049868e-06, "loss": 0.548, "step": 7009 }, { "epoch": 0.57, "grad_norm": 0.8929443364357782, "learning_rate": 8.23759947705675e-06, "loss": 0.5514, "step": 7010 }, { "epoch": 0.57, "grad_norm": 0.9201455293723961, "learning_rate": 8.235008271196421e-06, "loss": 0.559, "step": 7011 }, { "epoch": 0.57, "grad_norm": 0.9102701414183574, "learning_rate": 8.232417187648454e-06, "loss": 0.509, "step": 7012 }, { "epoch": 0.57, "grad_norm": 0.9264031657521252, "learning_rate": 8.22982622659241e-06, "loss": 0.5409, "step": 7013 }, { "epoch": 0.57, "grad_norm": 1.0630764564563944, "learning_rate": 8.227235388207835e-06, "loss": 0.4508, "step": 7014 }, { "epoch": 0.57, "grad_norm": 0.8822532387155293, "learning_rate": 8.224644672674276e-06, "loss": 0.5393, "step": 7015 }, { "epoch": 0.57, "grad_norm": 0.9174219199288024, "learning_rate": 8.222054080171267e-06, "loss": 0.5642, "step": 7016 }, { "epoch": 0.57, "grad_norm": 0.864390451108849, "learning_rate": 8.219463610878336e-06, "loss": 0.4691, "step": 7017 }, { "epoch": 0.57, "grad_norm": 0.8587854032206618, "learning_rate": 8.216873264974993e-06, "loss": 0.4795, "step": 7018 }, { "epoch": 0.57, "grad_norm": 0.9363362878742622, "learning_rate": 8.214283042640752e-06, "loss": 0.5474, "step": 7019 }, { "epoch": 0.57, "grad_norm": 0.9144074402642741, "learning_rate": 8.211692944055116e-06, "loss": 0.5136, "step": 7020 }, { "epoch": 0.57, "grad_norm": 1.032086763222279, "learning_rate": 8.20910296939757e-06, "loss": 0.4673, "step": 7021 }, { "epoch": 0.57, "grad_norm": 0.917707524800078, "learning_rate": 8.206513118847598e-06, "loss": 0.5868, "step": 7022 }, { "epoch": 0.57, "grad_norm": 0.922625636653506, "learning_rate": 8.203923392584676e-06, "loss": 0.4888, "step": 7023 }, { "epoch": 0.57, "grad_norm": 0.8764509529934924, "learning_rate": 8.201333790788275e-06, "loss": 0.5328, "step": 7024 }, { "epoch": 0.57, "grad_norm": 0.859695349317226, "learning_rate": 8.198744313637842e-06, "loss": 0.5474, "step": 7025 }, { "epoch": 0.57, "grad_norm": 1.0344486716589807, "learning_rate": 8.19615496131283e-06, "loss": 0.5053, "step": 7026 }, { "epoch": 0.57, "grad_norm": 0.8837125709121174, "learning_rate": 8.193565733992684e-06, "loss": 0.5018, "step": 7027 }, { "epoch": 0.57, "grad_norm": 0.8819851186881112, "learning_rate": 8.190976631856827e-06, "loss": 0.5101, "step": 7028 }, { "epoch": 0.57, "grad_norm": 0.798697172712139, "learning_rate": 8.188387655084684e-06, "loss": 0.4678, "step": 7029 }, { "epoch": 0.57, "grad_norm": 0.8911852719708158, "learning_rate": 8.185798803855671e-06, "loss": 0.5498, "step": 7030 }, { "epoch": 0.57, "grad_norm": 0.9231967644330206, "learning_rate": 8.183210078349191e-06, "loss": 0.5421, "step": 7031 }, { "epoch": 0.57, "grad_norm": 0.8819993157677812, "learning_rate": 8.180621478744641e-06, "loss": 0.5196, "step": 7032 }, { "epoch": 0.57, "grad_norm": 0.9075262050015743, "learning_rate": 8.178033005221412e-06, "loss": 0.4782, "step": 7033 }, { "epoch": 0.57, "grad_norm": 0.8737630080614599, "learning_rate": 8.175444657958875e-06, "loss": 0.487, "step": 7034 }, { "epoch": 0.57, "grad_norm": 0.9154320954425816, "learning_rate": 8.172856437136407e-06, "loss": 0.5353, "step": 7035 }, { "epoch": 0.57, "grad_norm": 0.9871118216349929, "learning_rate": 8.170268342933365e-06, "loss": 0.5067, "step": 7036 }, { "epoch": 0.57, "grad_norm": 0.8265507952904249, "learning_rate": 8.167680375529108e-06, "loss": 0.531, "step": 7037 }, { "epoch": 0.57, "grad_norm": 0.9496077446671609, "learning_rate": 8.165092535102972e-06, "loss": 0.5114, "step": 7038 }, { "epoch": 0.57, "grad_norm": 0.8783081821659814, "learning_rate": 8.162504821834296e-06, "loss": 0.4619, "step": 7039 }, { "epoch": 0.57, "grad_norm": 0.8717227626487613, "learning_rate": 8.159917235902409e-06, "loss": 0.5163, "step": 7040 }, { "epoch": 0.57, "grad_norm": 0.8932338295571399, "learning_rate": 8.157329777486623e-06, "loss": 0.5319, "step": 7041 }, { "epoch": 0.57, "grad_norm": 0.9199211873496858, "learning_rate": 8.154742446766249e-06, "loss": 0.498, "step": 7042 }, { "epoch": 0.57, "grad_norm": 0.891227190168023, "learning_rate": 8.15215524392059e-06, "loss": 0.4943, "step": 7043 }, { "epoch": 0.57, "grad_norm": 0.9412199201992494, "learning_rate": 8.14956816912893e-06, "loss": 0.5129, "step": 7044 }, { "epoch": 0.57, "grad_norm": 0.8556676533700766, "learning_rate": 8.146981222570553e-06, "loss": 0.5195, "step": 7045 }, { "epoch": 0.57, "grad_norm": 0.9710161830339895, "learning_rate": 8.144394404424739e-06, "loss": 0.5701, "step": 7046 }, { "epoch": 0.57, "grad_norm": 0.9812013727215241, "learning_rate": 8.141807714870743e-06, "loss": 0.5025, "step": 7047 }, { "epoch": 0.57, "grad_norm": 1.6689600561603462, "learning_rate": 8.139221154087825e-06, "loss": 0.4416, "step": 7048 }, { "epoch": 0.57, "grad_norm": 0.8059420540732499, "learning_rate": 8.136634722255232e-06, "loss": 0.4449, "step": 7049 }, { "epoch": 0.57, "grad_norm": 0.9333593303331403, "learning_rate": 8.134048419552197e-06, "loss": 0.568, "step": 7050 }, { "epoch": 0.57, "grad_norm": 0.8472084290656892, "learning_rate": 8.131462246157953e-06, "loss": 0.5321, "step": 7051 }, { "epoch": 0.57, "grad_norm": 0.9126154074678705, "learning_rate": 8.128876202251719e-06, "loss": 0.5109, "step": 7052 }, { "epoch": 0.57, "grad_norm": 0.8763009264392355, "learning_rate": 8.126290288012701e-06, "loss": 0.5216, "step": 7053 }, { "epoch": 0.57, "grad_norm": 0.8264540746375304, "learning_rate": 8.123704503620107e-06, "loss": 0.485, "step": 7054 }, { "epoch": 0.57, "grad_norm": 0.8936189165840086, "learning_rate": 8.121118849253127e-06, "loss": 0.5393, "step": 7055 }, { "epoch": 0.57, "grad_norm": 0.8849486877355847, "learning_rate": 8.11853332509094e-06, "loss": 0.5256, "step": 7056 }, { "epoch": 0.57, "grad_norm": 0.874095324515099, "learning_rate": 8.11594793131273e-06, "loss": 0.5495, "step": 7057 }, { "epoch": 0.57, "grad_norm": 0.8441694949243681, "learning_rate": 8.113362668097658e-06, "loss": 0.4437, "step": 7058 }, { "epoch": 0.57, "grad_norm": 0.902798856936396, "learning_rate": 8.110777535624875e-06, "loss": 0.5449, "step": 7059 }, { "epoch": 0.57, "grad_norm": 0.8255384853373445, "learning_rate": 8.108192534073534e-06, "loss": 0.5014, "step": 7060 }, { "epoch": 0.57, "grad_norm": 0.8517134480254044, "learning_rate": 8.105607663622775e-06, "loss": 0.5029, "step": 7061 }, { "epoch": 0.57, "grad_norm": 0.8067488124368722, "learning_rate": 8.10302292445172e-06, "loss": 0.452, "step": 7062 }, { "epoch": 0.57, "grad_norm": 0.9532445683932202, "learning_rate": 8.100438316739499e-06, "loss": 0.5633, "step": 7063 }, { "epoch": 0.57, "grad_norm": 0.8412583157335193, "learning_rate": 8.097853840665217e-06, "loss": 0.4861, "step": 7064 }, { "epoch": 0.57, "grad_norm": 0.9635981222701807, "learning_rate": 8.095269496407972e-06, "loss": 0.5369, "step": 7065 }, { "epoch": 0.57, "grad_norm": 0.9723101800664249, "learning_rate": 8.092685284146865e-06, "loss": 0.5529, "step": 7066 }, { "epoch": 0.57, "grad_norm": 0.882965623304785, "learning_rate": 8.090101204060977e-06, "loss": 0.548, "step": 7067 }, { "epoch": 0.57, "grad_norm": 0.8898000120431133, "learning_rate": 8.087517256329376e-06, "loss": 0.5369, "step": 7068 }, { "epoch": 0.57, "grad_norm": 0.9130973916070677, "learning_rate": 8.08493344113114e-06, "loss": 0.557, "step": 7069 }, { "epoch": 0.57, "grad_norm": 0.8234264567707733, "learning_rate": 8.082349758645316e-06, "loss": 0.525, "step": 7070 }, { "epoch": 0.57, "grad_norm": 0.7937641653928239, "learning_rate": 8.079766209050947e-06, "loss": 0.4841, "step": 7071 }, { "epoch": 0.57, "grad_norm": 0.875824564412514, "learning_rate": 8.077182792527082e-06, "loss": 0.4847, "step": 7072 }, { "epoch": 0.57, "grad_norm": 0.9159490950540292, "learning_rate": 8.074599509252745e-06, "loss": 0.5131, "step": 7073 }, { "epoch": 0.57, "grad_norm": 0.8557200095988629, "learning_rate": 8.072016359406949e-06, "loss": 0.511, "step": 7074 }, { "epoch": 0.58, "grad_norm": 0.860909775478447, "learning_rate": 8.069433343168713e-06, "loss": 0.4982, "step": 7075 }, { "epoch": 0.58, "grad_norm": 0.9084085645848757, "learning_rate": 8.066850460717035e-06, "loss": 0.4887, "step": 7076 }, { "epoch": 0.58, "grad_norm": 0.9552739657622884, "learning_rate": 8.0642677122309e-06, "loss": 0.5326, "step": 7077 }, { "epoch": 0.58, "grad_norm": 0.8301615599182511, "learning_rate": 8.0616850978893e-06, "loss": 0.4942, "step": 7078 }, { "epoch": 0.58, "grad_norm": 0.8730009479479265, "learning_rate": 8.059102617871203e-06, "loss": 0.4616, "step": 7079 }, { "epoch": 0.58, "grad_norm": 0.9619992822577931, "learning_rate": 8.056520272355571e-06, "loss": 0.492, "step": 7080 }, { "epoch": 0.58, "grad_norm": 0.9328794065066321, "learning_rate": 8.053938061521363e-06, "loss": 0.5819, "step": 7081 }, { "epoch": 0.58, "grad_norm": 0.8940979839317937, "learning_rate": 8.05135598554752e-06, "loss": 0.5396, "step": 7082 }, { "epoch": 0.58, "grad_norm": 0.9114092619314533, "learning_rate": 8.048774044612977e-06, "loss": 0.4878, "step": 7083 }, { "epoch": 0.58, "grad_norm": 0.9038805767427703, "learning_rate": 8.046192238896665e-06, "loss": 0.5246, "step": 7084 }, { "epoch": 0.58, "grad_norm": 0.9281520876936268, "learning_rate": 8.043610568577497e-06, "loss": 0.5246, "step": 7085 }, { "epoch": 0.58, "grad_norm": 0.8755547684924566, "learning_rate": 8.041029033834378e-06, "loss": 0.5387, "step": 7086 }, { "epoch": 0.58, "grad_norm": 1.0413527560484352, "learning_rate": 8.038447634846214e-06, "loss": 0.4924, "step": 7087 }, { "epoch": 0.58, "grad_norm": 0.9365148241845588, "learning_rate": 8.035866371791889e-06, "loss": 0.5573, "step": 7088 }, { "epoch": 0.58, "grad_norm": 0.8583013464946238, "learning_rate": 8.033285244850276e-06, "loss": 0.4931, "step": 7089 }, { "epoch": 0.58, "grad_norm": 0.9798987553897932, "learning_rate": 8.030704254200256e-06, "loss": 0.5488, "step": 7090 }, { "epoch": 0.58, "grad_norm": 0.8481462707609014, "learning_rate": 8.028123400020686e-06, "loss": 0.5436, "step": 7091 }, { "epoch": 0.58, "grad_norm": 0.8908057979277204, "learning_rate": 8.02554268249041e-06, "loss": 0.5143, "step": 7092 }, { "epoch": 0.58, "grad_norm": 0.9223070040189832, "learning_rate": 8.02296210178828e-06, "loss": 0.5308, "step": 7093 }, { "epoch": 0.58, "grad_norm": 0.8816963595479083, "learning_rate": 8.02038165809312e-06, "loss": 0.5327, "step": 7094 }, { "epoch": 0.58, "grad_norm": 0.8929250749843264, "learning_rate": 8.017801351583753e-06, "loss": 0.4613, "step": 7095 }, { "epoch": 0.58, "grad_norm": 0.7983790901220064, "learning_rate": 8.015221182439e-06, "loss": 0.4302, "step": 7096 }, { "epoch": 0.58, "grad_norm": 0.8384414175374417, "learning_rate": 8.012641150837656e-06, "loss": 0.431, "step": 7097 }, { "epoch": 0.58, "grad_norm": 0.8930745141481806, "learning_rate": 8.010061256958515e-06, "loss": 0.517, "step": 7098 }, { "epoch": 0.58, "grad_norm": 0.9267017949110046, "learning_rate": 8.007481500980366e-06, "loss": 0.4581, "step": 7099 }, { "epoch": 0.58, "grad_norm": 0.8493740892753433, "learning_rate": 8.004901883081983e-06, "loss": 0.4934, "step": 7100 }, { "epoch": 0.58, "grad_norm": 0.885590654760843, "learning_rate": 8.002322403442125e-06, "loss": 0.5524, "step": 7101 }, { "epoch": 0.58, "grad_norm": 0.9324619489542131, "learning_rate": 7.999743062239557e-06, "loss": 0.5681, "step": 7102 }, { "epoch": 0.58, "grad_norm": 0.9608122058291328, "learning_rate": 7.99716385965302e-06, "loss": 0.5449, "step": 7103 }, { "epoch": 0.58, "grad_norm": 0.8549468729838979, "learning_rate": 7.994584795861248e-06, "loss": 0.5272, "step": 7104 }, { "epoch": 0.58, "grad_norm": 0.9065242963654259, "learning_rate": 7.992005871042975e-06, "loss": 0.5188, "step": 7105 }, { "epoch": 0.58, "grad_norm": 0.8642241252773025, "learning_rate": 7.989427085376914e-06, "loss": 0.4868, "step": 7106 }, { "epoch": 0.58, "grad_norm": 0.8191305164843538, "learning_rate": 7.986848439041767e-06, "loss": 0.5231, "step": 7107 }, { "epoch": 0.58, "grad_norm": 0.9345831959630203, "learning_rate": 7.984269932216241e-06, "loss": 0.5122, "step": 7108 }, { "epoch": 0.58, "grad_norm": 2.0591280413854145, "learning_rate": 7.981691565079024e-06, "loss": 0.5526, "step": 7109 }, { "epoch": 0.58, "grad_norm": 1.0004936234543034, "learning_rate": 7.979113337808786e-06, "loss": 0.4942, "step": 7110 }, { "epoch": 0.58, "grad_norm": 0.816574338598874, "learning_rate": 7.976535250584204e-06, "loss": 0.5098, "step": 7111 }, { "epoch": 0.58, "grad_norm": 0.8664437742943795, "learning_rate": 7.973957303583936e-06, "loss": 0.5368, "step": 7112 }, { "epoch": 0.58, "grad_norm": 0.91862171457887, "learning_rate": 7.971379496986625e-06, "loss": 0.5826, "step": 7113 }, { "epoch": 0.58, "grad_norm": 0.9417244042887671, "learning_rate": 7.968801830970917e-06, "loss": 0.5015, "step": 7114 }, { "epoch": 0.58, "grad_norm": 0.9634260358824619, "learning_rate": 7.966224305715443e-06, "loss": 0.5266, "step": 7115 }, { "epoch": 0.58, "grad_norm": 0.8893242016477338, "learning_rate": 7.963646921398818e-06, "loss": 0.4669, "step": 7116 }, { "epoch": 0.58, "grad_norm": 0.9680052559082771, "learning_rate": 7.961069678199658e-06, "loss": 0.5176, "step": 7117 }, { "epoch": 0.58, "grad_norm": 0.8865734175274613, "learning_rate": 7.95849257629656e-06, "loss": 0.5046, "step": 7118 }, { "epoch": 0.58, "grad_norm": 0.9284345145026058, "learning_rate": 7.95591561586811e-06, "loss": 0.5279, "step": 7119 }, { "epoch": 0.58, "grad_norm": 0.8619392433080499, "learning_rate": 7.953338797092902e-06, "loss": 0.4835, "step": 7120 }, { "epoch": 0.58, "grad_norm": 1.0180767658901928, "learning_rate": 7.950762120149499e-06, "loss": 0.5636, "step": 7121 }, { "epoch": 0.58, "grad_norm": 0.8686763777204012, "learning_rate": 7.94818558521646e-06, "loss": 0.5189, "step": 7122 }, { "epoch": 0.58, "grad_norm": 0.93918029784971, "learning_rate": 7.94560919247234e-06, "loss": 0.4968, "step": 7123 }, { "epoch": 0.58, "grad_norm": 0.9442326161843043, "learning_rate": 7.943032942095685e-06, "loss": 0.5392, "step": 7124 }, { "epoch": 0.58, "grad_norm": 0.9935891972254188, "learning_rate": 7.940456834265017e-06, "loss": 0.5106, "step": 7125 }, { "epoch": 0.58, "grad_norm": 0.8666080937052979, "learning_rate": 7.937880869158868e-06, "loss": 0.4962, "step": 7126 }, { "epoch": 0.58, "grad_norm": 0.849261195585096, "learning_rate": 7.935305046955746e-06, "loss": 0.5083, "step": 7127 }, { "epoch": 0.58, "grad_norm": 0.8464975330500006, "learning_rate": 7.93272936783415e-06, "loss": 0.5107, "step": 7128 }, { "epoch": 0.58, "grad_norm": 0.9556219695413329, "learning_rate": 7.930153831972575e-06, "loss": 0.5619, "step": 7129 }, { "epoch": 0.58, "grad_norm": 0.9412515314641338, "learning_rate": 7.927578439549506e-06, "loss": 0.5491, "step": 7130 }, { "epoch": 0.58, "grad_norm": 0.8079216989915801, "learning_rate": 7.92500319074341e-06, "loss": 0.5057, "step": 7131 }, { "epoch": 0.58, "grad_norm": 0.9025189985626063, "learning_rate": 7.922428085732755e-06, "loss": 0.5127, "step": 7132 }, { "epoch": 0.58, "grad_norm": 0.9482220895231857, "learning_rate": 7.919853124695993e-06, "loss": 0.4783, "step": 7133 }, { "epoch": 0.58, "grad_norm": 0.8388943639401142, "learning_rate": 7.917278307811557e-06, "loss": 0.4902, "step": 7134 }, { "epoch": 0.58, "grad_norm": 0.8968964756970629, "learning_rate": 7.914703635257892e-06, "loss": 0.4887, "step": 7135 }, { "epoch": 0.58, "grad_norm": 0.8833263700487893, "learning_rate": 7.912129107213417e-06, "loss": 0.5087, "step": 7136 }, { "epoch": 0.58, "grad_norm": 0.8963174813537905, "learning_rate": 7.909554723856537e-06, "loss": 0.5032, "step": 7137 }, { "epoch": 0.58, "grad_norm": 0.9074153912009865, "learning_rate": 7.906980485365665e-06, "loss": 0.5232, "step": 7138 }, { "epoch": 0.58, "grad_norm": 0.8283767399063273, "learning_rate": 7.90440639191919e-06, "loss": 0.4602, "step": 7139 }, { "epoch": 0.58, "grad_norm": 0.8903407901604158, "learning_rate": 7.901832443695487e-06, "loss": 0.5257, "step": 7140 }, { "epoch": 0.58, "grad_norm": 0.9091972597395084, "learning_rate": 7.899258640872942e-06, "loss": 0.5404, "step": 7141 }, { "epoch": 0.58, "grad_norm": 1.0976786999602899, "learning_rate": 7.896684983629907e-06, "loss": 0.5162, "step": 7142 }, { "epoch": 0.58, "grad_norm": 0.8003995289691711, "learning_rate": 7.894111472144733e-06, "loss": 0.4649, "step": 7143 }, { "epoch": 0.58, "grad_norm": 0.8420642146592251, "learning_rate": 7.89153810659577e-06, "loss": 0.5088, "step": 7144 }, { "epoch": 0.58, "grad_norm": 1.0426511293723346, "learning_rate": 7.888964887161348e-06, "loss": 0.576, "step": 7145 }, { "epoch": 0.58, "grad_norm": 0.9027421728823426, "learning_rate": 7.886391814019782e-06, "loss": 0.5263, "step": 7146 }, { "epoch": 0.58, "grad_norm": 0.9367759837162823, "learning_rate": 7.883818887349391e-06, "loss": 0.5385, "step": 7147 }, { "epoch": 0.58, "grad_norm": 0.8860146610850669, "learning_rate": 7.881246107328472e-06, "loss": 0.4902, "step": 7148 }, { "epoch": 0.58, "grad_norm": 0.9387255568652029, "learning_rate": 7.878673474135321e-06, "loss": 0.5192, "step": 7149 }, { "epoch": 0.58, "grad_norm": 0.9864023313146227, "learning_rate": 7.876100987948217e-06, "loss": 0.588, "step": 7150 }, { "epoch": 0.58, "grad_norm": 0.994880359320626, "learning_rate": 7.873528648945428e-06, "loss": 0.5507, "step": 7151 }, { "epoch": 0.58, "grad_norm": 0.8755218895848427, "learning_rate": 7.87095645730522e-06, "loss": 0.5207, "step": 7152 }, { "epoch": 0.58, "grad_norm": 0.8424039240765442, "learning_rate": 7.868384413205842e-06, "loss": 0.5149, "step": 7153 }, { "epoch": 0.58, "grad_norm": 0.9298680000054024, "learning_rate": 7.865812516825528e-06, "loss": 0.4513, "step": 7154 }, { "epoch": 0.58, "grad_norm": 0.9378542402912848, "learning_rate": 7.863240768342518e-06, "loss": 0.56, "step": 7155 }, { "epoch": 0.58, "grad_norm": 0.941553399591606, "learning_rate": 7.860669167935028e-06, "loss": 0.5182, "step": 7156 }, { "epoch": 0.58, "grad_norm": 0.9906634051079232, "learning_rate": 7.858097715781264e-06, "loss": 0.5827, "step": 7157 }, { "epoch": 0.58, "grad_norm": 0.8681871023458261, "learning_rate": 7.85552641205943e-06, "loss": 0.5365, "step": 7158 }, { "epoch": 0.58, "grad_norm": 0.8604262563644228, "learning_rate": 7.852955256947717e-06, "loss": 0.4941, "step": 7159 }, { "epoch": 0.58, "grad_norm": 0.9035245788633994, "learning_rate": 7.850384250624293e-06, "loss": 0.4967, "step": 7160 }, { "epoch": 0.58, "grad_norm": 0.9448437606155281, "learning_rate": 7.847813393267338e-06, "loss": 0.5439, "step": 7161 }, { "epoch": 0.58, "grad_norm": 0.8518107595401718, "learning_rate": 7.845242685055008e-06, "loss": 0.4615, "step": 7162 }, { "epoch": 0.58, "grad_norm": 0.9074697637525523, "learning_rate": 7.842672126165443e-06, "loss": 0.545, "step": 7163 }, { "epoch": 0.58, "grad_norm": 0.8956320738778516, "learning_rate": 7.84010171677679e-06, "loss": 0.5295, "step": 7164 }, { "epoch": 0.58, "grad_norm": 0.9930685920080908, "learning_rate": 7.837531457067171e-06, "loss": 0.5653, "step": 7165 }, { "epoch": 0.58, "grad_norm": 0.9672444191250434, "learning_rate": 7.834961347214704e-06, "loss": 0.496, "step": 7166 }, { "epoch": 0.58, "grad_norm": 0.9470240702819805, "learning_rate": 7.832391387397495e-06, "loss": 0.5262, "step": 7167 }, { "epoch": 0.58, "grad_norm": 0.890014574596134, "learning_rate": 7.829821577793642e-06, "loss": 0.5246, "step": 7168 }, { "epoch": 0.58, "grad_norm": 0.9901783082879074, "learning_rate": 7.827251918581225e-06, "loss": 0.6147, "step": 7169 }, { "epoch": 0.58, "grad_norm": 0.8810342910057687, "learning_rate": 7.824682409938328e-06, "loss": 0.5329, "step": 7170 }, { "epoch": 0.58, "grad_norm": 0.9177398262818202, "learning_rate": 7.822113052043007e-06, "loss": 0.5194, "step": 7171 }, { "epoch": 0.58, "grad_norm": 0.9514360356116424, "learning_rate": 7.819543845073319e-06, "loss": 0.5121, "step": 7172 }, { "epoch": 0.58, "grad_norm": 0.9172146908279984, "learning_rate": 7.816974789207311e-06, "loss": 0.4944, "step": 7173 }, { "epoch": 0.58, "grad_norm": 0.8682178687211466, "learning_rate": 7.814405884623012e-06, "loss": 0.5039, "step": 7174 }, { "epoch": 0.58, "grad_norm": 0.8708333791973569, "learning_rate": 7.811837131498448e-06, "loss": 0.5385, "step": 7175 }, { "epoch": 0.58, "grad_norm": 0.8474069428549672, "learning_rate": 7.80926853001163e-06, "loss": 0.4809, "step": 7176 }, { "epoch": 0.58, "grad_norm": 0.9659314208908955, "learning_rate": 7.806700080340558e-06, "loss": 0.5581, "step": 7177 }, { "epoch": 0.58, "grad_norm": 1.0070230808243348, "learning_rate": 7.804131782663224e-06, "loss": 0.5707, "step": 7178 }, { "epoch": 0.58, "grad_norm": 0.9345471374137605, "learning_rate": 7.801563637157614e-06, "loss": 0.5631, "step": 7179 }, { "epoch": 0.58, "grad_norm": 0.8770225209942285, "learning_rate": 7.79899564400169e-06, "loss": 0.5538, "step": 7180 }, { "epoch": 0.58, "grad_norm": 0.9830268920995888, "learning_rate": 7.796427803373416e-06, "loss": 0.5664, "step": 7181 }, { "epoch": 0.58, "grad_norm": 0.9248689772553825, "learning_rate": 7.793860115450744e-06, "loss": 0.5054, "step": 7182 }, { "epoch": 0.58, "grad_norm": 0.9004370145255842, "learning_rate": 7.791292580411606e-06, "loss": 0.4964, "step": 7183 }, { "epoch": 0.58, "grad_norm": 0.8716515992647562, "learning_rate": 7.788725198433933e-06, "loss": 0.4919, "step": 7184 }, { "epoch": 0.58, "grad_norm": 0.8169934980297398, "learning_rate": 7.786157969695643e-06, "loss": 0.4821, "step": 7185 }, { "epoch": 0.58, "grad_norm": 0.8647980879138384, "learning_rate": 7.783590894374642e-06, "loss": 0.5286, "step": 7186 }, { "epoch": 0.58, "grad_norm": 0.8614129816503232, "learning_rate": 7.781023972648826e-06, "loss": 0.4908, "step": 7187 }, { "epoch": 0.58, "grad_norm": 0.9324921763996864, "learning_rate": 7.778457204696082e-06, "loss": 0.5547, "step": 7188 }, { "epoch": 0.58, "grad_norm": 0.9111081523908714, "learning_rate": 7.775890590694283e-06, "loss": 0.4669, "step": 7189 }, { "epoch": 0.58, "grad_norm": 0.9610565102640668, "learning_rate": 7.77332413082129e-06, "loss": 0.5295, "step": 7190 }, { "epoch": 0.58, "grad_norm": 0.9293235516497907, "learning_rate": 7.770757825254962e-06, "loss": 0.5041, "step": 7191 }, { "epoch": 0.58, "grad_norm": 0.9121050505057278, "learning_rate": 7.768191674173142e-06, "loss": 0.5348, "step": 7192 }, { "epoch": 0.58, "grad_norm": 0.7854722354449046, "learning_rate": 7.765625677753656e-06, "loss": 0.4922, "step": 7193 }, { "epoch": 0.58, "grad_norm": 0.9010995980188043, "learning_rate": 7.763059836174329e-06, "loss": 0.5002, "step": 7194 }, { "epoch": 0.58, "grad_norm": 0.8549641547968623, "learning_rate": 7.760494149612971e-06, "loss": 0.4898, "step": 7195 }, { "epoch": 0.58, "grad_norm": 0.8698049730542157, "learning_rate": 7.757928618247384e-06, "loss": 0.4968, "step": 7196 }, { "epoch": 0.58, "grad_norm": 0.9109125156970205, "learning_rate": 7.755363242255352e-06, "loss": 0.5399, "step": 7197 }, { "epoch": 0.59, "grad_norm": 0.9878217858400418, "learning_rate": 7.752798021814659e-06, "loss": 0.56, "step": 7198 }, { "epoch": 0.59, "grad_norm": 0.9468859034587981, "learning_rate": 7.750232957103068e-06, "loss": 0.4919, "step": 7199 }, { "epoch": 0.59, "grad_norm": 0.9303700707294289, "learning_rate": 7.747668048298338e-06, "loss": 0.572, "step": 7200 }, { "epoch": 0.59, "grad_norm": 0.9098675738960599, "learning_rate": 7.745103295578216e-06, "loss": 0.5466, "step": 7201 }, { "epoch": 0.59, "grad_norm": 0.9343802851406701, "learning_rate": 7.74253869912043e-06, "loss": 0.5386, "step": 7202 }, { "epoch": 0.59, "grad_norm": 0.9476829141758246, "learning_rate": 7.739974259102716e-06, "loss": 0.5189, "step": 7203 }, { "epoch": 0.59, "grad_norm": 0.7783194872808666, "learning_rate": 7.73740997570278e-06, "loss": 0.4538, "step": 7204 }, { "epoch": 0.59, "grad_norm": 1.1009354505492426, "learning_rate": 7.734845849098324e-06, "loss": 0.5546, "step": 7205 }, { "epoch": 0.59, "grad_norm": 0.9266294852249874, "learning_rate": 7.732281879467043e-06, "loss": 0.5376, "step": 7206 }, { "epoch": 0.59, "grad_norm": 0.8711521298481469, "learning_rate": 7.729718066986617e-06, "loss": 0.496, "step": 7207 }, { "epoch": 0.59, "grad_norm": 0.8892162259369395, "learning_rate": 7.727154411834712e-06, "loss": 0.483, "step": 7208 }, { "epoch": 0.59, "grad_norm": 0.9028078827519032, "learning_rate": 7.724590914188994e-06, "loss": 0.5044, "step": 7209 }, { "epoch": 0.59, "grad_norm": 0.9815763452473252, "learning_rate": 7.722027574227107e-06, "loss": 0.5594, "step": 7210 }, { "epoch": 0.59, "grad_norm": 0.8967294073556648, "learning_rate": 7.719464392126684e-06, "loss": 0.5169, "step": 7211 }, { "epoch": 0.59, "grad_norm": 0.9065002695688444, "learning_rate": 7.71690136806536e-06, "loss": 0.5255, "step": 7212 }, { "epoch": 0.59, "grad_norm": 0.9222558351013072, "learning_rate": 7.714338502220746e-06, "loss": 0.5538, "step": 7213 }, { "epoch": 0.59, "grad_norm": 0.8671006891727058, "learning_rate": 7.711775794770443e-06, "loss": 0.5086, "step": 7214 }, { "epoch": 0.59, "grad_norm": 1.0232282025989965, "learning_rate": 7.709213245892051e-06, "loss": 0.548, "step": 7215 }, { "epoch": 0.59, "grad_norm": 0.9529100879092692, "learning_rate": 7.70665085576315e-06, "loss": 0.5342, "step": 7216 }, { "epoch": 0.59, "grad_norm": 0.9734892020915824, "learning_rate": 7.704088624561306e-06, "loss": 0.5127, "step": 7217 }, { "epoch": 0.59, "grad_norm": 0.9270597288383303, "learning_rate": 7.701526552464087e-06, "loss": 0.5173, "step": 7218 }, { "epoch": 0.59, "grad_norm": 0.8720870889009098, "learning_rate": 7.698964639649041e-06, "loss": 0.4809, "step": 7219 }, { "epoch": 0.59, "grad_norm": 0.923839288776839, "learning_rate": 7.6964028862937e-06, "loss": 0.5046, "step": 7220 }, { "epoch": 0.59, "grad_norm": 1.02966338890196, "learning_rate": 7.6938412925756e-06, "loss": 0.5881, "step": 7221 }, { "epoch": 0.59, "grad_norm": 0.9594420898093694, "learning_rate": 7.691279858672252e-06, "loss": 0.5048, "step": 7222 }, { "epoch": 0.59, "grad_norm": 0.9115683816055996, "learning_rate": 7.688718584761158e-06, "loss": 0.4904, "step": 7223 }, { "epoch": 0.59, "grad_norm": 0.9054139733472941, "learning_rate": 7.68615747101982e-06, "loss": 0.5241, "step": 7224 }, { "epoch": 0.59, "grad_norm": 0.7437088104314823, "learning_rate": 7.683596517625716e-06, "loss": 0.4348, "step": 7225 }, { "epoch": 0.59, "grad_norm": 0.8783371843681202, "learning_rate": 7.681035724756317e-06, "loss": 0.5548, "step": 7226 }, { "epoch": 0.59, "grad_norm": 0.9830512757805387, "learning_rate": 7.678475092589088e-06, "loss": 0.5338, "step": 7227 }, { "epoch": 0.59, "grad_norm": 0.8493114744220931, "learning_rate": 7.675914621301476e-06, "loss": 0.4998, "step": 7228 }, { "epoch": 0.59, "grad_norm": 0.9992033936519503, "learning_rate": 7.673354311070914e-06, "loss": 0.5458, "step": 7229 }, { "epoch": 0.59, "grad_norm": 0.8176998664379581, "learning_rate": 7.67079416207484e-06, "loss": 0.4448, "step": 7230 }, { "epoch": 0.59, "grad_norm": 0.8844593418859039, "learning_rate": 7.668234174490664e-06, "loss": 0.508, "step": 7231 }, { "epoch": 0.59, "grad_norm": 0.8417371432578538, "learning_rate": 7.665674348495788e-06, "loss": 0.4771, "step": 7232 }, { "epoch": 0.59, "grad_norm": 0.9956057436389267, "learning_rate": 7.663114684267612e-06, "loss": 0.5216, "step": 7233 }, { "epoch": 0.59, "grad_norm": 0.9428429486313148, "learning_rate": 7.660555181983517e-06, "loss": 0.4689, "step": 7234 }, { "epoch": 0.59, "grad_norm": 0.9375806767617715, "learning_rate": 7.657995841820869e-06, "loss": 0.5296, "step": 7235 }, { "epoch": 0.59, "grad_norm": 0.8450786498494435, "learning_rate": 7.655436663957035e-06, "loss": 0.4782, "step": 7236 }, { "epoch": 0.59, "grad_norm": 0.9589117881742945, "learning_rate": 7.65287764856936e-06, "loss": 0.5631, "step": 7237 }, { "epoch": 0.59, "grad_norm": 0.9600384970168856, "learning_rate": 7.650318795835179e-06, "loss": 0.5248, "step": 7238 }, { "epoch": 0.59, "grad_norm": 0.9766270180046419, "learning_rate": 7.647760105931825e-06, "loss": 0.5195, "step": 7239 }, { "epoch": 0.59, "grad_norm": 1.0108489371038865, "learning_rate": 7.64520157903661e-06, "loss": 0.5331, "step": 7240 }, { "epoch": 0.59, "grad_norm": 0.9379986049299248, "learning_rate": 7.64264321532683e-06, "loss": 0.4585, "step": 7241 }, { "epoch": 0.59, "grad_norm": 0.9131980397882025, "learning_rate": 7.640085014979792e-06, "loss": 0.5732, "step": 7242 }, { "epoch": 0.59, "grad_norm": 0.9529997529146212, "learning_rate": 7.637526978172767e-06, "loss": 0.5263, "step": 7243 }, { "epoch": 0.59, "grad_norm": 0.8851984451396883, "learning_rate": 7.634969105083023e-06, "loss": 0.5099, "step": 7244 }, { "epoch": 0.59, "grad_norm": 0.9883433976165381, "learning_rate": 7.632411395887826e-06, "loss": 0.553, "step": 7245 }, { "epoch": 0.59, "grad_norm": 0.9934441008382426, "learning_rate": 7.62985385076442e-06, "loss": 0.543, "step": 7246 }, { "epoch": 0.59, "grad_norm": 0.9492112256678701, "learning_rate": 7.6272964698900356e-06, "loss": 0.5126, "step": 7247 }, { "epoch": 0.59, "grad_norm": 0.8564922898626104, "learning_rate": 7.624739253441905e-06, "loss": 0.5169, "step": 7248 }, { "epoch": 0.59, "grad_norm": 1.0115163408161787, "learning_rate": 7.622182201597238e-06, "loss": 0.5686, "step": 7249 }, { "epoch": 0.59, "grad_norm": 0.9392761542496402, "learning_rate": 7.619625314533231e-06, "loss": 0.5365, "step": 7250 }, { "epoch": 0.59, "grad_norm": 0.8739873176329055, "learning_rate": 7.6170685924270815e-06, "loss": 0.5418, "step": 7251 }, { "epoch": 0.59, "grad_norm": 0.8558631122985492, "learning_rate": 7.6145120354559666e-06, "loss": 0.5435, "step": 7252 }, { "epoch": 0.59, "grad_norm": 0.9014633753616382, "learning_rate": 7.611955643797046e-06, "loss": 0.5492, "step": 7253 }, { "epoch": 0.59, "grad_norm": 1.0011582929357539, "learning_rate": 7.609399417627486e-06, "loss": 0.6088, "step": 7254 }, { "epoch": 0.59, "grad_norm": 0.875694028910975, "learning_rate": 7.606843357124426e-06, "loss": 0.534, "step": 7255 }, { "epoch": 0.59, "grad_norm": 0.8900901373620113, "learning_rate": 7.604287462464995e-06, "loss": 0.5299, "step": 7256 }, { "epoch": 0.59, "grad_norm": 0.8280069475864995, "learning_rate": 7.60173173382632e-06, "loss": 0.4966, "step": 7257 }, { "epoch": 0.59, "grad_norm": 0.880686332922401, "learning_rate": 7.599176171385509e-06, "loss": 0.5073, "step": 7258 }, { "epoch": 0.59, "grad_norm": 0.8840388220644066, "learning_rate": 7.5966207753196574e-06, "loss": 0.5394, "step": 7259 }, { "epoch": 0.59, "grad_norm": 0.8961429378494427, "learning_rate": 7.5940655458058575e-06, "loss": 0.4807, "step": 7260 }, { "epoch": 0.59, "grad_norm": 0.9821194016091872, "learning_rate": 7.59151048302118e-06, "loss": 0.5458, "step": 7261 }, { "epoch": 0.59, "grad_norm": 0.9752484428211466, "learning_rate": 7.588955587142688e-06, "loss": 0.516, "step": 7262 }, { "epoch": 0.59, "grad_norm": 0.9843824414210166, "learning_rate": 7.586400858347438e-06, "loss": 0.5626, "step": 7263 }, { "epoch": 0.59, "grad_norm": 0.8707842938023541, "learning_rate": 7.583846296812467e-06, "loss": 0.4788, "step": 7264 }, { "epoch": 0.59, "grad_norm": 0.8926431378262193, "learning_rate": 7.581291902714801e-06, "loss": 0.4816, "step": 7265 }, { "epoch": 0.59, "grad_norm": 0.8237824939681814, "learning_rate": 7.578737676231466e-06, "loss": 0.508, "step": 7266 }, { "epoch": 0.59, "grad_norm": 0.9437865429552902, "learning_rate": 7.576183617539461e-06, "loss": 0.5317, "step": 7267 }, { "epoch": 0.59, "grad_norm": 0.9322937825461017, "learning_rate": 7.573629726815778e-06, "loss": 0.5568, "step": 7268 }, { "epoch": 0.59, "grad_norm": 0.8211316207432359, "learning_rate": 7.5710760042374056e-06, "loss": 0.4647, "step": 7269 }, { "epoch": 0.59, "grad_norm": 0.8637140877102417, "learning_rate": 7.568522449981308e-06, "loss": 0.4874, "step": 7270 }, { "epoch": 0.59, "grad_norm": 0.8855791028344088, "learning_rate": 7.565969064224453e-06, "loss": 0.5189, "step": 7271 }, { "epoch": 0.59, "grad_norm": 1.014135661037069, "learning_rate": 7.563415847143782e-06, "loss": 0.5775, "step": 7272 }, { "epoch": 0.59, "grad_norm": 0.9497931596687522, "learning_rate": 7.560862798916229e-06, "loss": 0.5442, "step": 7273 }, { "epoch": 0.59, "grad_norm": 0.9961094952611982, "learning_rate": 7.558309919718723e-06, "loss": 0.5498, "step": 7274 }, { "epoch": 0.59, "grad_norm": 0.8910273913576999, "learning_rate": 7.555757209728174e-06, "loss": 0.5109, "step": 7275 }, { "epoch": 0.59, "grad_norm": 0.8401278429671708, "learning_rate": 7.553204669121478e-06, "loss": 0.5287, "step": 7276 }, { "epoch": 0.59, "grad_norm": 1.0258306091345222, "learning_rate": 7.550652298075532e-06, "loss": 0.5642, "step": 7277 }, { "epoch": 0.59, "grad_norm": 1.0388928356690446, "learning_rate": 7.54810009676721e-06, "loss": 0.5273, "step": 7278 }, { "epoch": 0.59, "grad_norm": 0.894137035048821, "learning_rate": 7.545548065373372e-06, "loss": 0.5418, "step": 7279 }, { "epoch": 0.59, "grad_norm": 0.8520592226061066, "learning_rate": 7.54299620407088e-06, "loss": 0.5039, "step": 7280 }, { "epoch": 0.59, "grad_norm": 0.9289674724415184, "learning_rate": 7.540444513036572e-06, "loss": 0.5908, "step": 7281 }, { "epoch": 0.59, "grad_norm": 0.8688189308375063, "learning_rate": 7.5378929924472735e-06, "loss": 0.5631, "step": 7282 }, { "epoch": 0.59, "grad_norm": 0.9200999552756037, "learning_rate": 7.535341642479811e-06, "loss": 0.5082, "step": 7283 }, { "epoch": 0.59, "grad_norm": 0.9210146899679073, "learning_rate": 7.532790463310986e-06, "loss": 0.5396, "step": 7284 }, { "epoch": 0.59, "grad_norm": 0.9137799113440104, "learning_rate": 7.530239455117589e-06, "loss": 0.5432, "step": 7285 }, { "epoch": 0.59, "grad_norm": 0.9363505809898554, "learning_rate": 7.527688618076413e-06, "loss": 0.5339, "step": 7286 }, { "epoch": 0.59, "grad_norm": 0.9763535749511509, "learning_rate": 7.525137952364222e-06, "loss": 0.5225, "step": 7287 }, { "epoch": 0.59, "grad_norm": 0.8645863285917511, "learning_rate": 7.522587458157771e-06, "loss": 0.518, "step": 7288 }, { "epoch": 0.59, "grad_norm": 0.8407841151958855, "learning_rate": 7.520037135633817e-06, "loss": 0.4463, "step": 7289 }, { "epoch": 0.59, "grad_norm": 0.9285178402786097, "learning_rate": 7.517486984969088e-06, "loss": 0.5605, "step": 7290 }, { "epoch": 0.59, "grad_norm": 0.8945561346406458, "learning_rate": 7.514937006340306e-06, "loss": 0.5643, "step": 7291 }, { "epoch": 0.59, "grad_norm": 0.8963075832585806, "learning_rate": 7.512387199924189e-06, "loss": 0.5199, "step": 7292 }, { "epoch": 0.59, "grad_norm": 0.9563988735599971, "learning_rate": 7.509837565897432e-06, "loss": 0.5658, "step": 7293 }, { "epoch": 0.59, "grad_norm": 0.9423292581170268, "learning_rate": 7.507288104436719e-06, "loss": 0.5352, "step": 7294 }, { "epoch": 0.59, "grad_norm": 0.992898315830082, "learning_rate": 7.504738815718734e-06, "loss": 0.5415, "step": 7295 }, { "epoch": 0.59, "grad_norm": 0.9020731368058453, "learning_rate": 7.502189699920136e-06, "loss": 0.5312, "step": 7296 }, { "epoch": 0.59, "grad_norm": 0.9363841774035592, "learning_rate": 7.499640757217572e-06, "loss": 0.5127, "step": 7297 }, { "epoch": 0.59, "grad_norm": 0.8631830942622, "learning_rate": 7.497091987787689e-06, "loss": 0.4652, "step": 7298 }, { "epoch": 0.59, "grad_norm": 0.9029696039606694, "learning_rate": 7.494543391807112e-06, "loss": 0.469, "step": 7299 }, { "epoch": 0.59, "grad_norm": 1.0395322395699418, "learning_rate": 7.4919949694524506e-06, "loss": 0.5787, "step": 7300 }, { "epoch": 0.59, "grad_norm": 0.9576457627425172, "learning_rate": 7.489446720900319e-06, "loss": 0.541, "step": 7301 }, { "epoch": 0.59, "grad_norm": 0.9116151576314977, "learning_rate": 7.486898646327301e-06, "loss": 0.514, "step": 7302 }, { "epoch": 0.59, "grad_norm": 0.9491659654888721, "learning_rate": 7.484350745909974e-06, "loss": 0.497, "step": 7303 }, { "epoch": 0.59, "grad_norm": 0.9257407304664244, "learning_rate": 7.481803019824914e-06, "loss": 0.5212, "step": 7304 }, { "epoch": 0.59, "grad_norm": 0.830204046262912, "learning_rate": 7.47925546824867e-06, "loss": 0.5119, "step": 7305 }, { "epoch": 0.59, "grad_norm": 0.9099262225306468, "learning_rate": 7.476708091357783e-06, "loss": 0.5123, "step": 7306 }, { "epoch": 0.59, "grad_norm": 0.9658265435756943, "learning_rate": 7.47416088932879e-06, "loss": 0.5334, "step": 7307 }, { "epoch": 0.59, "grad_norm": 0.9570047689135044, "learning_rate": 7.471613862338207e-06, "loss": 0.5118, "step": 7308 }, { "epoch": 0.59, "grad_norm": 0.8938157106055836, "learning_rate": 7.469067010562538e-06, "loss": 0.5066, "step": 7309 }, { "epoch": 0.59, "grad_norm": 0.9216259117988101, "learning_rate": 7.466520334178284e-06, "loss": 0.4809, "step": 7310 }, { "epoch": 0.59, "grad_norm": 0.9052948098015442, "learning_rate": 7.463973833361923e-06, "loss": 0.5192, "step": 7311 }, { "epoch": 0.59, "grad_norm": 0.9010973814962012, "learning_rate": 7.461427508289922e-06, "loss": 0.4918, "step": 7312 }, { "epoch": 0.59, "grad_norm": 0.8954927965727761, "learning_rate": 7.458881359138746e-06, "loss": 0.4777, "step": 7313 }, { "epoch": 0.59, "grad_norm": 0.9163133645892921, "learning_rate": 7.4563353860848375e-06, "loss": 0.5013, "step": 7314 }, { "epoch": 0.59, "grad_norm": 0.9584330383366784, "learning_rate": 7.453789589304629e-06, "loss": 0.5731, "step": 7315 }, { "epoch": 0.59, "grad_norm": 0.8238821401967524, "learning_rate": 7.451243968974547e-06, "loss": 0.4768, "step": 7316 }, { "epoch": 0.59, "grad_norm": 0.9103939120287774, "learning_rate": 7.448698525270995e-06, "loss": 0.54, "step": 7317 }, { "epoch": 0.59, "grad_norm": 0.8802397289849867, "learning_rate": 7.446153258370372e-06, "loss": 0.489, "step": 7318 }, { "epoch": 0.59, "grad_norm": 0.9323534952947754, "learning_rate": 7.443608168449063e-06, "loss": 0.5387, "step": 7319 }, { "epoch": 0.59, "grad_norm": 0.9041005321484384, "learning_rate": 7.44106325568344e-06, "loss": 0.5025, "step": 7320 }, { "epoch": 0.6, "grad_norm": 0.9351837814636498, "learning_rate": 7.438518520249865e-06, "loss": 0.5344, "step": 7321 }, { "epoch": 0.6, "grad_norm": 0.9896753959713007, "learning_rate": 7.435973962324685e-06, "loss": 0.509, "step": 7322 }, { "epoch": 0.6, "grad_norm": 0.9217534565109436, "learning_rate": 7.433429582084233e-06, "loss": 0.527, "step": 7323 }, { "epoch": 0.6, "grad_norm": 0.8924443816109849, "learning_rate": 7.4308853797048355e-06, "loss": 0.493, "step": 7324 }, { "epoch": 0.6, "grad_norm": 0.790774098801519, "learning_rate": 7.428341355362803e-06, "loss": 0.5064, "step": 7325 }, { "epoch": 0.6, "grad_norm": 0.895239464915871, "learning_rate": 7.425797509234433e-06, "loss": 0.5247, "step": 7326 }, { "epoch": 0.6, "grad_norm": 0.8337451552078113, "learning_rate": 7.423253841496011e-06, "loss": 0.4385, "step": 7327 }, { "epoch": 0.6, "grad_norm": 0.9388895050134273, "learning_rate": 7.420710352323814e-06, "loss": 0.5099, "step": 7328 }, { "epoch": 0.6, "grad_norm": 0.8729235534099536, "learning_rate": 7.418167041894101e-06, "loss": 0.5312, "step": 7329 }, { "epoch": 0.6, "grad_norm": 0.8780973606573884, "learning_rate": 7.415623910383121e-06, "loss": 0.5222, "step": 7330 }, { "epoch": 0.6, "grad_norm": 0.8900258292874804, "learning_rate": 7.413080957967114e-06, "loss": 0.5311, "step": 7331 }, { "epoch": 0.6, "grad_norm": 0.8735285883101643, "learning_rate": 7.4105381848223005e-06, "loss": 0.4678, "step": 7332 }, { "epoch": 0.6, "grad_norm": 0.9108584288618684, "learning_rate": 7.407995591124892e-06, "loss": 0.5694, "step": 7333 }, { "epoch": 0.6, "grad_norm": 0.8409007811031063, "learning_rate": 7.405453177051092e-06, "loss": 0.5019, "step": 7334 }, { "epoch": 0.6, "grad_norm": 0.9763197336086359, "learning_rate": 7.4029109427770875e-06, "loss": 0.5829, "step": 7335 }, { "epoch": 0.6, "grad_norm": 0.8358779487127997, "learning_rate": 7.400368888479048e-06, "loss": 0.4701, "step": 7336 }, { "epoch": 0.6, "grad_norm": 0.9256093445064898, "learning_rate": 7.3978270143331386e-06, "loss": 0.4672, "step": 7337 }, { "epoch": 0.6, "grad_norm": 0.8703907207903545, "learning_rate": 7.395285320515513e-06, "loss": 0.4854, "step": 7338 }, { "epoch": 0.6, "grad_norm": 0.8341471873689289, "learning_rate": 7.392743807202301e-06, "loss": 0.4957, "step": 7339 }, { "epoch": 0.6, "grad_norm": 0.9060027701674533, "learning_rate": 7.39020247456963e-06, "loss": 0.5042, "step": 7340 }, { "epoch": 0.6, "grad_norm": 0.8764931059475367, "learning_rate": 7.3876613227936145e-06, "loss": 0.4678, "step": 7341 }, { "epoch": 0.6, "grad_norm": 1.0120630716408041, "learning_rate": 7.38512035205035e-06, "loss": 0.5496, "step": 7342 }, { "epoch": 0.6, "grad_norm": 0.8493623592295444, "learning_rate": 7.382579562515926e-06, "loss": 0.4452, "step": 7343 }, { "epoch": 0.6, "grad_norm": 0.7699481233724054, "learning_rate": 7.38003895436642e-06, "loss": 0.4266, "step": 7344 }, { "epoch": 0.6, "grad_norm": 0.8349390281702176, "learning_rate": 7.377498527777887e-06, "loss": 0.5024, "step": 7345 }, { "epoch": 0.6, "grad_norm": 0.8471489722827498, "learning_rate": 7.374958282926381e-06, "loss": 0.5316, "step": 7346 }, { "epoch": 0.6, "grad_norm": 0.9221474469176514, "learning_rate": 7.372418219987938e-06, "loss": 0.5334, "step": 7347 }, { "epoch": 0.6, "grad_norm": 0.8966209510688218, "learning_rate": 7.369878339138581e-06, "loss": 0.5412, "step": 7348 }, { "epoch": 0.6, "grad_norm": 0.9422004357531978, "learning_rate": 7.367338640554322e-06, "loss": 0.5665, "step": 7349 }, { "epoch": 0.6, "grad_norm": 0.9077745442266718, "learning_rate": 7.364799124411162e-06, "loss": 0.4843, "step": 7350 }, { "epoch": 0.6, "grad_norm": 0.8639359821414474, "learning_rate": 7.36225979088508e-06, "loss": 0.5359, "step": 7351 }, { "epoch": 0.6, "grad_norm": 0.9410610289961132, "learning_rate": 7.359720640152061e-06, "loss": 0.5008, "step": 7352 }, { "epoch": 0.6, "grad_norm": 0.7901229733572144, "learning_rate": 7.357181672388059e-06, "loss": 0.5045, "step": 7353 }, { "epoch": 0.6, "grad_norm": 0.9425112865880872, "learning_rate": 7.354642887769018e-06, "loss": 0.5639, "step": 7354 }, { "epoch": 0.6, "grad_norm": 0.8782766252811707, "learning_rate": 7.3521042864708825e-06, "loss": 0.4998, "step": 7355 }, { "epoch": 0.6, "grad_norm": 0.9183737268629781, "learning_rate": 7.349565868669573e-06, "loss": 0.5099, "step": 7356 }, { "epoch": 0.6, "grad_norm": 0.9348792780089465, "learning_rate": 7.347027634540993e-06, "loss": 0.5533, "step": 7357 }, { "epoch": 0.6, "grad_norm": 0.9344656577875735, "learning_rate": 7.344489584261047e-06, "loss": 0.5298, "step": 7358 }, { "epoch": 0.6, "grad_norm": 0.8642903378823802, "learning_rate": 7.34195171800562e-06, "loss": 0.4594, "step": 7359 }, { "epoch": 0.6, "grad_norm": 0.981632696955239, "learning_rate": 7.339414035950576e-06, "loss": 0.5647, "step": 7360 }, { "epoch": 0.6, "grad_norm": 0.9097634668291024, "learning_rate": 7.3368765382717835e-06, "loss": 0.5358, "step": 7361 }, { "epoch": 0.6, "grad_norm": 0.8220863835173063, "learning_rate": 7.334339225145084e-06, "loss": 0.5158, "step": 7362 }, { "epoch": 0.6, "grad_norm": 0.8641456239026523, "learning_rate": 7.331802096746309e-06, "loss": 0.484, "step": 7363 }, { "epoch": 0.6, "grad_norm": 0.8618023808624063, "learning_rate": 7.329265153251285e-06, "loss": 0.4845, "step": 7364 }, { "epoch": 0.6, "grad_norm": 0.8504294393521689, "learning_rate": 7.326728394835818e-06, "loss": 0.4895, "step": 7365 }, { "epoch": 0.6, "grad_norm": 0.8267207028527193, "learning_rate": 7.324191821675697e-06, "loss": 0.4765, "step": 7366 }, { "epoch": 0.6, "grad_norm": 0.8853464997118534, "learning_rate": 7.321655433946714e-06, "loss": 0.4995, "step": 7367 }, { "epoch": 0.6, "grad_norm": 0.8718350723134894, "learning_rate": 7.319119231824633e-06, "loss": 0.5163, "step": 7368 }, { "epoch": 0.6, "grad_norm": 0.9512318446047324, "learning_rate": 7.316583215485208e-06, "loss": 0.5818, "step": 7369 }, { "epoch": 0.6, "grad_norm": 0.8815002766107952, "learning_rate": 7.314047385104189e-06, "loss": 0.5589, "step": 7370 }, { "epoch": 0.6, "grad_norm": 0.8286685998102985, "learning_rate": 7.311511740857304e-06, "loss": 0.4836, "step": 7371 }, { "epoch": 0.6, "grad_norm": 0.9162307895735335, "learning_rate": 7.308976282920268e-06, "loss": 0.5594, "step": 7372 }, { "epoch": 0.6, "grad_norm": 1.0199676957259531, "learning_rate": 7.306441011468792e-06, "loss": 0.5708, "step": 7373 }, { "epoch": 0.6, "grad_norm": 0.8913322632852547, "learning_rate": 7.303905926678565e-06, "loss": 0.5483, "step": 7374 }, { "epoch": 0.6, "grad_norm": 0.8926702359397981, "learning_rate": 7.301371028725261e-06, "loss": 0.6005, "step": 7375 }, { "epoch": 0.6, "grad_norm": 0.8996125260292345, "learning_rate": 7.298836317784556e-06, "loss": 0.5087, "step": 7376 }, { "epoch": 0.6, "grad_norm": 0.8145218099302595, "learning_rate": 7.296301794032097e-06, "loss": 0.5221, "step": 7377 }, { "epoch": 0.6, "grad_norm": 0.9077749787424425, "learning_rate": 7.293767457643523e-06, "loss": 0.4831, "step": 7378 }, { "epoch": 0.6, "grad_norm": 1.011921708629939, "learning_rate": 7.291233308794467e-06, "loss": 0.5276, "step": 7379 }, { "epoch": 0.6, "grad_norm": 0.8568360633757683, "learning_rate": 7.28869934766054e-06, "loss": 0.4962, "step": 7380 }, { "epoch": 0.6, "grad_norm": 0.8952349900165589, "learning_rate": 7.286165574417339e-06, "loss": 0.5249, "step": 7381 }, { "epoch": 0.6, "grad_norm": 0.944935733661351, "learning_rate": 7.283631989240461e-06, "loss": 0.5662, "step": 7382 }, { "epoch": 0.6, "grad_norm": 0.9099120859981927, "learning_rate": 7.281098592305475e-06, "loss": 0.4988, "step": 7383 }, { "epoch": 0.6, "grad_norm": 0.8401807741972126, "learning_rate": 7.2785653837879435e-06, "loss": 0.4836, "step": 7384 }, { "epoch": 0.6, "grad_norm": 0.9144781640878747, "learning_rate": 7.276032363863419e-06, "loss": 0.551, "step": 7385 }, { "epoch": 0.6, "grad_norm": 0.8775209203998569, "learning_rate": 7.273499532707438e-06, "loss": 0.5175, "step": 7386 }, { "epoch": 0.6, "grad_norm": 0.9255115313293777, "learning_rate": 7.270966890495515e-06, "loss": 0.5155, "step": 7387 }, { "epoch": 0.6, "grad_norm": 0.7901310404074847, "learning_rate": 7.268434437403169e-06, "loss": 0.3836, "step": 7388 }, { "epoch": 0.6, "grad_norm": 0.911734446879483, "learning_rate": 7.2659021736058966e-06, "loss": 0.5051, "step": 7389 }, { "epoch": 0.6, "grad_norm": 0.8213031260282923, "learning_rate": 7.263370099279173e-06, "loss": 0.4969, "step": 7390 }, { "epoch": 0.6, "grad_norm": 0.8756849290520115, "learning_rate": 7.260838214598475e-06, "loss": 0.5081, "step": 7391 }, { "epoch": 0.6, "grad_norm": 0.9154177602786717, "learning_rate": 7.258306519739263e-06, "loss": 0.5225, "step": 7392 }, { "epoch": 0.6, "grad_norm": 0.8133074922872229, "learning_rate": 7.2557750148769724e-06, "loss": 0.4734, "step": 7393 }, { "epoch": 0.6, "grad_norm": 0.9995526440978737, "learning_rate": 7.253243700187043e-06, "loss": 0.5527, "step": 7394 }, { "epoch": 0.6, "grad_norm": 0.9495255233213361, "learning_rate": 7.250712575844885e-06, "loss": 0.5328, "step": 7395 }, { "epoch": 0.6, "grad_norm": 0.8704164475758905, "learning_rate": 7.248181642025911e-06, "loss": 0.5269, "step": 7396 }, { "epoch": 0.6, "grad_norm": 0.9126078437554483, "learning_rate": 7.245650898905507e-06, "loss": 0.5635, "step": 7397 }, { "epoch": 0.6, "grad_norm": 0.852380419372046, "learning_rate": 7.243120346659049e-06, "loss": 0.4722, "step": 7398 }, { "epoch": 0.6, "grad_norm": 0.9106044444571901, "learning_rate": 7.240589985461911e-06, "loss": 0.4851, "step": 7399 }, { "epoch": 0.6, "grad_norm": 0.850767141256381, "learning_rate": 7.238059815489439e-06, "loss": 0.4833, "step": 7400 }, { "epoch": 0.6, "grad_norm": 0.9655676492891794, "learning_rate": 7.235529836916968e-06, "loss": 0.4686, "step": 7401 }, { "epoch": 0.6, "grad_norm": 0.9151038100861985, "learning_rate": 7.233000049919829e-06, "loss": 0.5022, "step": 7402 }, { "epoch": 0.6, "grad_norm": 0.8971978008944801, "learning_rate": 7.230470454673335e-06, "loss": 0.5701, "step": 7403 }, { "epoch": 0.6, "grad_norm": 0.9944557059493082, "learning_rate": 7.227941051352777e-06, "loss": 0.5367, "step": 7404 }, { "epoch": 0.6, "grad_norm": 0.9504825452251051, "learning_rate": 7.225411840133449e-06, "loss": 0.5686, "step": 7405 }, { "epoch": 0.6, "grad_norm": 1.0065511172114605, "learning_rate": 7.22288282119062e-06, "loss": 0.5309, "step": 7406 }, { "epoch": 0.6, "grad_norm": 0.9250351141978376, "learning_rate": 7.2203539946995435e-06, "loss": 0.4719, "step": 7407 }, { "epoch": 0.6, "grad_norm": 0.8965608857066834, "learning_rate": 7.217825360835475e-06, "loss": 0.5639, "step": 7408 }, { "epoch": 0.6, "grad_norm": 0.877393505298854, "learning_rate": 7.21529691977364e-06, "loss": 0.4718, "step": 7409 }, { "epoch": 0.6, "grad_norm": 0.9556307511449684, "learning_rate": 7.212768671689255e-06, "loss": 0.4729, "step": 7410 }, { "epoch": 0.6, "grad_norm": 0.9377000629117607, "learning_rate": 7.210240616757531e-06, "loss": 0.4944, "step": 7411 }, { "epoch": 0.6, "grad_norm": 0.8263931357979006, "learning_rate": 7.20771275515366e-06, "loss": 0.4764, "step": 7412 }, { "epoch": 0.6, "grad_norm": 0.9111012936356778, "learning_rate": 7.205185087052813e-06, "loss": 0.5189, "step": 7413 }, { "epoch": 0.6, "grad_norm": 1.3651652953137372, "learning_rate": 7.202657612630165e-06, "loss": 0.5264, "step": 7414 }, { "epoch": 0.6, "grad_norm": 0.9200714563305669, "learning_rate": 7.200130332060864e-06, "loss": 0.5478, "step": 7415 }, { "epoch": 0.6, "grad_norm": 0.9002547184998756, "learning_rate": 7.197603245520042e-06, "loss": 0.5124, "step": 7416 }, { "epoch": 0.6, "grad_norm": 1.013949035151007, "learning_rate": 7.195076353182834e-06, "loss": 0.4858, "step": 7417 }, { "epoch": 0.6, "grad_norm": 0.824976407945111, "learning_rate": 7.192549655224346e-06, "loss": 0.4967, "step": 7418 }, { "epoch": 0.6, "grad_norm": 0.8838276237041307, "learning_rate": 7.190023151819674e-06, "loss": 0.5368, "step": 7419 }, { "epoch": 0.6, "grad_norm": 0.8606988929514742, "learning_rate": 7.187496843143908e-06, "loss": 0.4799, "step": 7420 }, { "epoch": 0.6, "grad_norm": 0.8872973984439431, "learning_rate": 7.184970729372118e-06, "loss": 0.4783, "step": 7421 }, { "epoch": 0.6, "grad_norm": 0.9381768183168121, "learning_rate": 7.182444810679354e-06, "loss": 0.5491, "step": 7422 }, { "epoch": 0.6, "grad_norm": 0.9379227502504542, "learning_rate": 7.17991908724067e-06, "loss": 0.556, "step": 7423 }, { "epoch": 0.6, "grad_norm": 0.9137485386338625, "learning_rate": 7.17739355923109e-06, "loss": 0.51, "step": 7424 }, { "epoch": 0.6, "grad_norm": 1.2644241726683547, "learning_rate": 7.174868226825631e-06, "loss": 0.5416, "step": 7425 }, { "epoch": 0.6, "grad_norm": 0.8958335672404645, "learning_rate": 7.172343090199301e-06, "loss": 0.5207, "step": 7426 }, { "epoch": 0.6, "grad_norm": 0.8590210296481723, "learning_rate": 7.169818149527087e-06, "loss": 0.5337, "step": 7427 }, { "epoch": 0.6, "grad_norm": 0.8384171957758327, "learning_rate": 7.167293404983962e-06, "loss": 0.4613, "step": 7428 }, { "epoch": 0.6, "grad_norm": 0.954130281660937, "learning_rate": 7.164768856744893e-06, "loss": 0.5646, "step": 7429 }, { "epoch": 0.6, "grad_norm": 1.0030607106468707, "learning_rate": 7.1622445049848286e-06, "loss": 0.5386, "step": 7430 }, { "epoch": 0.6, "grad_norm": 0.8139381902091727, "learning_rate": 7.159720349878698e-06, "loss": 0.4671, "step": 7431 }, { "epoch": 0.6, "grad_norm": 0.8966599226454, "learning_rate": 7.157196391601433e-06, "loss": 0.5602, "step": 7432 }, { "epoch": 0.6, "grad_norm": 0.9522741156653227, "learning_rate": 7.154672630327937e-06, "loss": 0.5023, "step": 7433 }, { "epoch": 0.6, "grad_norm": 0.9261395171564893, "learning_rate": 7.152149066233098e-06, "loss": 0.5739, "step": 7434 }, { "epoch": 0.6, "grad_norm": 0.9771922952299095, "learning_rate": 7.149625699491809e-06, "loss": 0.5441, "step": 7435 }, { "epoch": 0.6, "grad_norm": 0.8530678821927267, "learning_rate": 7.147102530278929e-06, "loss": 0.4926, "step": 7436 }, { "epoch": 0.6, "grad_norm": 0.926699880774957, "learning_rate": 7.14457955876931e-06, "loss": 0.5174, "step": 7437 }, { "epoch": 0.6, "grad_norm": 0.9079063732597161, "learning_rate": 7.142056785137799e-06, "loss": 0.475, "step": 7438 }, { "epoch": 0.6, "grad_norm": 1.0016805486137383, "learning_rate": 7.139534209559217e-06, "loss": 0.6123, "step": 7439 }, { "epoch": 0.6, "grad_norm": 0.926003799746655, "learning_rate": 7.137011832208374e-06, "loss": 0.5129, "step": 7440 }, { "epoch": 0.6, "grad_norm": 0.8901597233303454, "learning_rate": 7.134489653260075e-06, "loss": 0.4922, "step": 7441 }, { "epoch": 0.6, "grad_norm": 0.9039581597904545, "learning_rate": 7.131967672889101e-06, "loss": 0.494, "step": 7442 }, { "epoch": 0.6, "grad_norm": 0.7942695446448926, "learning_rate": 7.129445891270219e-06, "loss": 0.4672, "step": 7443 }, { "epoch": 0.61, "grad_norm": 0.8833734306634364, "learning_rate": 7.126924308578196e-06, "loss": 0.5106, "step": 7444 }, { "epoch": 0.61, "grad_norm": 0.8939098610259423, "learning_rate": 7.124402924987767e-06, "loss": 0.5146, "step": 7445 }, { "epoch": 0.61, "grad_norm": 0.8382061476262879, "learning_rate": 7.121881740673664e-06, "loss": 0.4823, "step": 7446 }, { "epoch": 0.61, "grad_norm": 0.7778752928864608, "learning_rate": 7.119360755810607e-06, "loss": 0.4548, "step": 7447 }, { "epoch": 0.61, "grad_norm": 0.8011529749679672, "learning_rate": 7.116839970573292e-06, "loss": 0.4825, "step": 7448 }, { "epoch": 0.61, "grad_norm": 0.9036417498542302, "learning_rate": 7.114319385136408e-06, "loss": 0.5491, "step": 7449 }, { "epoch": 0.61, "grad_norm": 0.8761401707218572, "learning_rate": 7.111798999674635e-06, "loss": 0.4814, "step": 7450 }, { "epoch": 0.61, "grad_norm": 0.8523161458806872, "learning_rate": 7.109278814362629e-06, "loss": 0.543, "step": 7451 }, { "epoch": 0.61, "grad_norm": 0.9528197182425885, "learning_rate": 7.106758829375033e-06, "loss": 0.5567, "step": 7452 }, { "epoch": 0.61, "grad_norm": 0.8899638209596322, "learning_rate": 7.104239044886487e-06, "loss": 0.5432, "step": 7453 }, { "epoch": 0.61, "grad_norm": 0.870345394746601, "learning_rate": 7.101719461071608e-06, "loss": 0.4923, "step": 7454 }, { "epoch": 0.61, "grad_norm": 0.9432026321304644, "learning_rate": 7.099200078104995e-06, "loss": 0.4855, "step": 7455 }, { "epoch": 0.61, "grad_norm": 0.909353097814008, "learning_rate": 7.0966808961612475e-06, "loss": 0.5445, "step": 7456 }, { "epoch": 0.61, "grad_norm": 0.8222116703513183, "learning_rate": 7.094161915414939e-06, "loss": 0.5132, "step": 7457 }, { "epoch": 0.61, "grad_norm": 0.8998287656530938, "learning_rate": 7.091643136040629e-06, "loss": 0.5051, "step": 7458 }, { "epoch": 0.61, "grad_norm": 0.909655368823903, "learning_rate": 7.089124558212872e-06, "loss": 0.5384, "step": 7459 }, { "epoch": 0.61, "grad_norm": 0.9122072478770556, "learning_rate": 7.0866061821062025e-06, "loss": 0.4496, "step": 7460 }, { "epoch": 0.61, "grad_norm": 0.9958858222651449, "learning_rate": 7.084088007895136e-06, "loss": 0.5486, "step": 7461 }, { "epoch": 0.61, "grad_norm": 0.8786152819765978, "learning_rate": 7.081570035754189e-06, "loss": 0.4973, "step": 7462 }, { "epoch": 0.61, "grad_norm": 0.895279929473395, "learning_rate": 7.079052265857847e-06, "loss": 0.5163, "step": 7463 }, { "epoch": 0.61, "grad_norm": 0.9016037453401397, "learning_rate": 7.0765346983805925e-06, "loss": 0.5418, "step": 7464 }, { "epoch": 0.61, "grad_norm": 0.9594257408814107, "learning_rate": 7.074017333496892e-06, "loss": 0.4872, "step": 7465 }, { "epoch": 0.61, "grad_norm": 0.9198839250326495, "learning_rate": 7.071500171381193e-06, "loss": 0.5639, "step": 7466 }, { "epoch": 0.61, "grad_norm": 0.9113176502875616, "learning_rate": 7.068983212207934e-06, "loss": 0.5025, "step": 7467 }, { "epoch": 0.61, "grad_norm": 0.8920345474434166, "learning_rate": 7.066466456151541e-06, "loss": 0.5027, "step": 7468 }, { "epoch": 0.61, "grad_norm": 0.926683305043765, "learning_rate": 7.063949903386419e-06, "loss": 0.478, "step": 7469 }, { "epoch": 0.61, "grad_norm": 0.8747561455025346, "learning_rate": 7.061433554086964e-06, "loss": 0.5093, "step": 7470 }, { "epoch": 0.61, "grad_norm": 0.8233419034552449, "learning_rate": 7.058917408427559e-06, "loss": 0.5061, "step": 7471 }, { "epoch": 0.61, "grad_norm": 0.968703122551061, "learning_rate": 7.056401466582567e-06, "loss": 0.5174, "step": 7472 }, { "epoch": 0.61, "grad_norm": 0.98020612835273, "learning_rate": 7.053885728726343e-06, "loss": 0.4846, "step": 7473 }, { "epoch": 0.61, "grad_norm": 0.9245074098344465, "learning_rate": 7.051370195033227e-06, "loss": 0.5086, "step": 7474 }, { "epoch": 0.61, "grad_norm": 0.8498280153892569, "learning_rate": 7.048854865677538e-06, "loss": 0.4974, "step": 7475 }, { "epoch": 0.61, "grad_norm": 0.9584978657105867, "learning_rate": 7.04633974083359e-06, "loss": 0.553, "step": 7476 }, { "epoch": 0.61, "grad_norm": 0.9813074133240363, "learning_rate": 7.04382482067568e-06, "loss": 0.5338, "step": 7477 }, { "epoch": 0.61, "grad_norm": 0.8597723255573146, "learning_rate": 7.041310105378085e-06, "loss": 0.5018, "step": 7478 }, { "epoch": 0.61, "grad_norm": 0.9700494913363266, "learning_rate": 7.038795595115076e-06, "loss": 0.5172, "step": 7479 }, { "epoch": 0.61, "grad_norm": 0.9476074585160819, "learning_rate": 7.036281290060907e-06, "loss": 0.5126, "step": 7480 }, { "epoch": 0.61, "grad_norm": 0.9883722573627236, "learning_rate": 7.033767190389814e-06, "loss": 0.5522, "step": 7481 }, { "epoch": 0.61, "grad_norm": 0.8750535173574526, "learning_rate": 7.031253296276024e-06, "loss": 0.4966, "step": 7482 }, { "epoch": 0.61, "grad_norm": 0.8808199103635561, "learning_rate": 7.028739607893746e-06, "loss": 0.4345, "step": 7483 }, { "epoch": 0.61, "grad_norm": 0.9292995866899433, "learning_rate": 7.026226125417182e-06, "loss": 0.5172, "step": 7484 }, { "epoch": 0.61, "grad_norm": 0.8682760191868, "learning_rate": 7.023712849020506e-06, "loss": 0.5119, "step": 7485 }, { "epoch": 0.61, "grad_norm": 0.9130486211387956, "learning_rate": 7.021199778877891e-06, "loss": 0.508, "step": 7486 }, { "epoch": 0.61, "grad_norm": 0.8989745028238334, "learning_rate": 7.01868691516349e-06, "loss": 0.5389, "step": 7487 }, { "epoch": 0.61, "grad_norm": 0.8711288089074521, "learning_rate": 7.016174258051441e-06, "loss": 0.5308, "step": 7488 }, { "epoch": 0.61, "grad_norm": 0.8887074836595515, "learning_rate": 7.013661807715866e-06, "loss": 0.5039, "step": 7489 }, { "epoch": 0.61, "grad_norm": 0.915092893637171, "learning_rate": 7.0111495643308836e-06, "loss": 0.5086, "step": 7490 }, { "epoch": 0.61, "grad_norm": 0.9629412883487775, "learning_rate": 7.008637528070583e-06, "loss": 0.5252, "step": 7491 }, { "epoch": 0.61, "grad_norm": 0.8843063699819966, "learning_rate": 7.006125699109048e-06, "loss": 0.4488, "step": 7492 }, { "epoch": 0.61, "grad_norm": 0.9509376929761509, "learning_rate": 7.003614077620348e-06, "loss": 0.525, "step": 7493 }, { "epoch": 0.61, "grad_norm": 0.8383261728774117, "learning_rate": 7.001102663778533e-06, "loss": 0.5241, "step": 7494 }, { "epoch": 0.61, "grad_norm": 0.8631840695371249, "learning_rate": 6.998591457757643e-06, "loss": 0.4911, "step": 7495 }, { "epoch": 0.61, "grad_norm": 0.8323660725814972, "learning_rate": 6.9960804597317045e-06, "loss": 0.421, "step": 7496 }, { "epoch": 0.61, "grad_norm": 0.7882803380832858, "learning_rate": 6.993569669874724e-06, "loss": 0.4342, "step": 7497 }, { "epoch": 0.61, "grad_norm": 0.9324318208303033, "learning_rate": 6.9910590883607e-06, "loss": 0.477, "step": 7498 }, { "epoch": 0.61, "grad_norm": 0.8485127165181465, "learning_rate": 6.9885487153636125e-06, "loss": 0.4926, "step": 7499 }, { "epoch": 0.61, "grad_norm": 0.9585992901038118, "learning_rate": 6.986038551057426e-06, "loss": 0.4797, "step": 7500 }, { "epoch": 0.61, "grad_norm": 0.9353390797402146, "learning_rate": 6.983528595616096e-06, "loss": 0.505, "step": 7501 }, { "epoch": 0.61, "grad_norm": 0.8813237756519612, "learning_rate": 6.98101884921356e-06, "loss": 0.4647, "step": 7502 }, { "epoch": 0.61, "grad_norm": 0.8437093853065601, "learning_rate": 6.978509312023736e-06, "loss": 0.4573, "step": 7503 }, { "epoch": 0.61, "grad_norm": 0.8652761631005595, "learning_rate": 6.975999984220541e-06, "loss": 0.4656, "step": 7504 }, { "epoch": 0.61, "grad_norm": 0.9284862732710436, "learning_rate": 6.9734908659778636e-06, "loss": 0.5179, "step": 7505 }, { "epoch": 0.61, "grad_norm": 0.8519841381992954, "learning_rate": 6.97098195746958e-06, "loss": 0.4691, "step": 7506 }, { "epoch": 0.61, "grad_norm": 0.9432823008748794, "learning_rate": 6.968473258869566e-06, "loss": 0.5224, "step": 7507 }, { "epoch": 0.61, "grad_norm": 0.9314459449139519, "learning_rate": 6.965964770351665e-06, "loss": 0.5125, "step": 7508 }, { "epoch": 0.61, "grad_norm": 0.9017714642380549, "learning_rate": 6.963456492089711e-06, "loss": 0.5316, "step": 7509 }, { "epoch": 0.61, "grad_norm": 0.9322043966660608, "learning_rate": 6.960948424257532e-06, "loss": 0.4958, "step": 7510 }, { "epoch": 0.61, "grad_norm": 1.0163089194858204, "learning_rate": 6.9584405670289326e-06, "loss": 0.5121, "step": 7511 }, { "epoch": 0.61, "grad_norm": 0.9067873188043927, "learning_rate": 6.955932920577699e-06, "loss": 0.576, "step": 7512 }, { "epoch": 0.61, "grad_norm": 0.8910120630030297, "learning_rate": 6.953425485077618e-06, "loss": 0.5537, "step": 7513 }, { "epoch": 0.61, "grad_norm": 0.9099219769405817, "learning_rate": 6.950918260702449e-06, "loss": 0.5537, "step": 7514 }, { "epoch": 0.61, "grad_norm": 0.9177750168038247, "learning_rate": 6.948411247625937e-06, "loss": 0.5237, "step": 7515 }, { "epoch": 0.61, "grad_norm": 0.8678464941195639, "learning_rate": 6.9459044460218205e-06, "loss": 0.5501, "step": 7516 }, { "epoch": 0.61, "grad_norm": 0.8816036797700124, "learning_rate": 6.943397856063818e-06, "loss": 0.5234, "step": 7517 }, { "epoch": 0.61, "grad_norm": 0.9188057188168129, "learning_rate": 6.9408914779256285e-06, "loss": 0.5016, "step": 7518 }, { "epoch": 0.61, "grad_norm": 0.8878831599490864, "learning_rate": 6.938385311780951e-06, "loss": 0.4565, "step": 7519 }, { "epoch": 0.61, "grad_norm": 0.9888578876329767, "learning_rate": 6.935879357803453e-06, "loss": 0.5835, "step": 7520 }, { "epoch": 0.61, "grad_norm": 0.993637491505397, "learning_rate": 6.933373616166799e-06, "loss": 0.5403, "step": 7521 }, { "epoch": 0.61, "grad_norm": 0.8068368590318049, "learning_rate": 6.930868087044634e-06, "loss": 0.4891, "step": 7522 }, { "epoch": 0.61, "grad_norm": 0.8173237797717122, "learning_rate": 6.9283627706105836e-06, "loss": 0.4959, "step": 7523 }, { "epoch": 0.61, "grad_norm": 0.8868903526963838, "learning_rate": 6.925857667038274e-06, "loss": 0.5011, "step": 7524 }, { "epoch": 0.61, "grad_norm": 0.882363978650632, "learning_rate": 6.923352776501302e-06, "loss": 0.5676, "step": 7525 }, { "epoch": 0.61, "grad_norm": 0.8759337087339624, "learning_rate": 6.920848099173247e-06, "loss": 0.5372, "step": 7526 }, { "epoch": 0.61, "grad_norm": 0.9091568143961809, "learning_rate": 6.918343635227694e-06, "loss": 0.5224, "step": 7527 }, { "epoch": 0.61, "grad_norm": 0.9492937811993646, "learning_rate": 6.915839384838192e-06, "loss": 0.5301, "step": 7528 }, { "epoch": 0.61, "grad_norm": 0.9466146485903585, "learning_rate": 6.913335348178283e-06, "loss": 0.4828, "step": 7529 }, { "epoch": 0.61, "grad_norm": 0.8734794349166198, "learning_rate": 6.910831525421499e-06, "loss": 0.5246, "step": 7530 }, { "epoch": 0.61, "grad_norm": 0.9151217895547622, "learning_rate": 6.90832791674135e-06, "loss": 0.5339, "step": 7531 }, { "epoch": 0.61, "grad_norm": 0.8742496126873698, "learning_rate": 6.905824522311331e-06, "loss": 0.4919, "step": 7532 }, { "epoch": 0.61, "grad_norm": 0.9385429607671967, "learning_rate": 6.90332134230493e-06, "loss": 0.5338, "step": 7533 }, { "epoch": 0.61, "grad_norm": 0.9194053424538637, "learning_rate": 6.900818376895615e-06, "loss": 0.5629, "step": 7534 }, { "epoch": 0.61, "grad_norm": 0.9612350119647755, "learning_rate": 6.898315626256833e-06, "loss": 0.5202, "step": 7535 }, { "epoch": 0.61, "grad_norm": 0.9415097629294374, "learning_rate": 6.895813090562031e-06, "loss": 0.5008, "step": 7536 }, { "epoch": 0.61, "grad_norm": 0.8088669541501505, "learning_rate": 6.893310769984629e-06, "loss": 0.5369, "step": 7537 }, { "epoch": 0.61, "grad_norm": 0.826545654887671, "learning_rate": 6.890808664698031e-06, "loss": 0.468, "step": 7538 }, { "epoch": 0.61, "grad_norm": 0.8972849914206232, "learning_rate": 6.888306774875638e-06, "loss": 0.4912, "step": 7539 }, { "epoch": 0.61, "grad_norm": 0.8815680247579892, "learning_rate": 6.885805100690825e-06, "loss": 0.5126, "step": 7540 }, { "epoch": 0.61, "grad_norm": 0.9884025324541118, "learning_rate": 6.883303642316954e-06, "loss": 0.5923, "step": 7541 }, { "epoch": 0.61, "grad_norm": 0.8249574853963665, "learning_rate": 6.8808023999273784e-06, "loss": 0.5241, "step": 7542 }, { "epoch": 0.61, "grad_norm": 0.8637034169688512, "learning_rate": 6.878301373695431e-06, "loss": 0.488, "step": 7543 }, { "epoch": 0.61, "grad_norm": 0.930755552156985, "learning_rate": 6.8758005637944245e-06, "loss": 0.5284, "step": 7544 }, { "epoch": 0.61, "grad_norm": 0.9909722910723238, "learning_rate": 6.873299970397672e-06, "loss": 0.5452, "step": 7545 }, { "epoch": 0.61, "grad_norm": 0.9544814719075744, "learning_rate": 6.870799593678459e-06, "loss": 0.5359, "step": 7546 }, { "epoch": 0.61, "grad_norm": 0.8329864765301852, "learning_rate": 6.868299433810053e-06, "loss": 0.4407, "step": 7547 }, { "epoch": 0.61, "grad_norm": 0.9008973754613913, "learning_rate": 6.8657994909657235e-06, "loss": 0.5463, "step": 7548 }, { "epoch": 0.61, "grad_norm": 0.8973104532423516, "learning_rate": 6.86329976531871e-06, "loss": 0.5159, "step": 7549 }, { "epoch": 0.61, "grad_norm": 0.8675670232532094, "learning_rate": 6.860800257042235e-06, "loss": 0.4741, "step": 7550 }, { "epoch": 0.61, "grad_norm": 0.9491734938139588, "learning_rate": 6.8583009663095215e-06, "loss": 0.5078, "step": 7551 }, { "epoch": 0.61, "grad_norm": 1.036963655326414, "learning_rate": 6.855801893293765e-06, "loss": 0.6484, "step": 7552 }, { "epoch": 0.61, "grad_norm": 0.9071145539618047, "learning_rate": 6.853303038168144e-06, "loss": 0.5096, "step": 7553 }, { "epoch": 0.61, "grad_norm": 1.0233484930312005, "learning_rate": 6.8508044011058375e-06, "loss": 0.5484, "step": 7554 }, { "epoch": 0.61, "grad_norm": 0.941587849993408, "learning_rate": 6.84830598227999e-06, "loss": 0.5463, "step": 7555 }, { "epoch": 0.61, "grad_norm": 0.9071870766619743, "learning_rate": 6.845807781863739e-06, "loss": 0.5312, "step": 7556 }, { "epoch": 0.61, "grad_norm": 0.8802657720651078, "learning_rate": 6.8433098000302155e-06, "loss": 0.5114, "step": 7557 }, { "epoch": 0.61, "grad_norm": 0.8716345359948411, "learning_rate": 6.840812036952522e-06, "loss": 0.43, "step": 7558 }, { "epoch": 0.61, "grad_norm": 0.9806567088859193, "learning_rate": 6.83831449280375e-06, "loss": 0.4978, "step": 7559 }, { "epoch": 0.61, "grad_norm": 0.927207704508905, "learning_rate": 6.8358171677569814e-06, "loss": 0.5402, "step": 7560 }, { "epoch": 0.61, "grad_norm": 0.9100483563196377, "learning_rate": 6.833320061985278e-06, "loss": 0.5295, "step": 7561 }, { "epoch": 0.61, "grad_norm": 0.9003269433900096, "learning_rate": 6.830823175661681e-06, "loss": 0.5654, "step": 7562 }, { "epoch": 0.61, "grad_norm": 0.8768709859948077, "learning_rate": 6.828326508959229e-06, "loss": 0.4975, "step": 7563 }, { "epoch": 0.61, "grad_norm": 0.9148195812629236, "learning_rate": 6.825830062050939e-06, "loss": 0.5429, "step": 7564 }, { "epoch": 0.61, "grad_norm": 0.8353913853922079, "learning_rate": 6.823333835109805e-06, "loss": 0.5226, "step": 7565 }, { "epoch": 0.61, "grad_norm": 0.9198354109819284, "learning_rate": 6.820837828308823e-06, "loss": 0.519, "step": 7566 }, { "epoch": 0.62, "grad_norm": 0.9656566943225391, "learning_rate": 6.818342041820959e-06, "loss": 0.5811, "step": 7567 }, { "epoch": 0.62, "grad_norm": 0.9093452706591472, "learning_rate": 6.815846475819166e-06, "loss": 0.5197, "step": 7568 }, { "epoch": 0.62, "grad_norm": 0.9813198921313696, "learning_rate": 6.813351130476391e-06, "loss": 0.4577, "step": 7569 }, { "epoch": 0.62, "grad_norm": 1.0110714074684641, "learning_rate": 6.810856005965558e-06, "loss": 0.5877, "step": 7570 }, { "epoch": 0.62, "grad_norm": 0.9041099671510405, "learning_rate": 6.808361102459568e-06, "loss": 0.5148, "step": 7571 }, { "epoch": 0.62, "grad_norm": 0.8462419013566584, "learning_rate": 6.80586642013133e-06, "loss": 0.4424, "step": 7572 }, { "epoch": 0.62, "grad_norm": 0.954344805260286, "learning_rate": 6.803371959153714e-06, "loss": 0.5495, "step": 7573 }, { "epoch": 0.62, "grad_norm": 0.8968157028231019, "learning_rate": 6.800877719699581e-06, "loss": 0.5173, "step": 7574 }, { "epoch": 0.62, "grad_norm": 0.8759713185900999, "learning_rate": 6.798383701941791e-06, "loss": 0.5261, "step": 7575 }, { "epoch": 0.62, "grad_norm": 0.9532005915781344, "learning_rate": 6.795889906053168e-06, "loss": 0.5458, "step": 7576 }, { "epoch": 0.62, "grad_norm": 0.958076507362562, "learning_rate": 6.79339633220653e-06, "loss": 0.4898, "step": 7577 }, { "epoch": 0.62, "grad_norm": 0.9654309823547377, "learning_rate": 6.7909029805746855e-06, "loss": 0.4927, "step": 7578 }, { "epoch": 0.62, "grad_norm": 0.8362198322373752, "learning_rate": 6.788409851330419e-06, "loss": 0.4931, "step": 7579 }, { "epoch": 0.62, "grad_norm": 0.8885475948416547, "learning_rate": 6.7859169446464955e-06, "loss": 0.464, "step": 7580 }, { "epoch": 0.62, "grad_norm": 0.8621341356587938, "learning_rate": 6.783424260695681e-06, "loss": 0.5048, "step": 7581 }, { "epoch": 0.62, "grad_norm": 0.9836189609295446, "learning_rate": 6.780931799650714e-06, "loss": 0.5756, "step": 7582 }, { "epoch": 0.62, "grad_norm": 0.9043346316869034, "learning_rate": 6.778439561684311e-06, "loss": 0.5557, "step": 7583 }, { "epoch": 0.62, "grad_norm": 0.9260466410600587, "learning_rate": 6.775947546969195e-06, "loss": 0.5034, "step": 7584 }, { "epoch": 0.62, "grad_norm": 0.9557467442673623, "learning_rate": 6.773455755678054e-06, "loss": 0.5385, "step": 7585 }, { "epoch": 0.62, "grad_norm": 0.8211146342579827, "learning_rate": 6.770964187983563e-06, "loss": 0.4589, "step": 7586 }, { "epoch": 0.62, "grad_norm": 0.9688902980547851, "learning_rate": 6.7684728440583934e-06, "loss": 0.5368, "step": 7587 }, { "epoch": 0.62, "grad_norm": 0.9309423999874568, "learning_rate": 6.7659817240751906e-06, "loss": 0.5372, "step": 7588 }, { "epoch": 0.62, "grad_norm": 0.8931467289113518, "learning_rate": 6.76349082820658e-06, "loss": 0.4692, "step": 7589 }, { "epoch": 0.62, "grad_norm": 0.8857663662886142, "learning_rate": 6.7610001566251885e-06, "loss": 0.4539, "step": 7590 }, { "epoch": 0.62, "grad_norm": 0.8738241729867862, "learning_rate": 6.758509709503614e-06, "loss": 0.5054, "step": 7591 }, { "epoch": 0.62, "grad_norm": 0.85924221496039, "learning_rate": 6.756019487014437e-06, "loss": 0.4874, "step": 7592 }, { "epoch": 0.62, "grad_norm": 0.8547022449248107, "learning_rate": 6.753529489330235e-06, "loss": 0.5146, "step": 7593 }, { "epoch": 0.62, "grad_norm": 0.7745590974676932, "learning_rate": 6.751039716623562e-06, "loss": 0.4518, "step": 7594 }, { "epoch": 0.62, "grad_norm": 0.9500988557366958, "learning_rate": 6.7485501690669495e-06, "loss": 0.5643, "step": 7595 }, { "epoch": 0.62, "grad_norm": 0.9158231254949393, "learning_rate": 6.74606084683293e-06, "loss": 0.4598, "step": 7596 }, { "epoch": 0.62, "grad_norm": 0.9059197379989498, "learning_rate": 6.743571750094009e-06, "loss": 0.534, "step": 7597 }, { "epoch": 0.62, "grad_norm": 0.9930882474280531, "learning_rate": 6.741082879022671e-06, "loss": 0.6028, "step": 7598 }, { "epoch": 0.62, "grad_norm": 0.9398046854732968, "learning_rate": 6.738594233791405e-06, "loss": 0.5218, "step": 7599 }, { "epoch": 0.62, "grad_norm": 0.9324822671783136, "learning_rate": 6.7361058145726645e-06, "loss": 0.5388, "step": 7600 }, { "epoch": 0.62, "grad_norm": 0.9652578493993295, "learning_rate": 6.733617621538893e-06, "loss": 0.6204, "step": 7601 }, { "epoch": 0.62, "grad_norm": 0.9326608597566178, "learning_rate": 6.731129654862526e-06, "loss": 0.5349, "step": 7602 }, { "epoch": 0.62, "grad_norm": 0.874818064011275, "learning_rate": 6.7286419147159745e-06, "loss": 0.5201, "step": 7603 }, { "epoch": 0.62, "grad_norm": 0.8026460981939355, "learning_rate": 6.726154401271633e-06, "loss": 0.452, "step": 7604 }, { "epoch": 0.62, "grad_norm": 0.9147355097876252, "learning_rate": 6.723667114701892e-06, "loss": 0.512, "step": 7605 }, { "epoch": 0.62, "grad_norm": 0.8944621682572889, "learning_rate": 6.721180055179113e-06, "loss": 0.4557, "step": 7606 }, { "epoch": 0.62, "grad_norm": 1.0302155219494231, "learning_rate": 6.718693222875644e-06, "loss": 0.4877, "step": 7607 }, { "epoch": 0.62, "grad_norm": 0.8921531524431237, "learning_rate": 6.7162066179638286e-06, "loss": 0.5354, "step": 7608 }, { "epoch": 0.62, "grad_norm": 0.8902878385006948, "learning_rate": 6.713720240615982e-06, "loss": 0.4891, "step": 7609 }, { "epoch": 0.62, "grad_norm": 0.8666985623979152, "learning_rate": 6.711234091004404e-06, "loss": 0.4978, "step": 7610 }, { "epoch": 0.62, "grad_norm": 0.8367718524021635, "learning_rate": 6.708748169301389e-06, "loss": 0.5615, "step": 7611 }, { "epoch": 0.62, "grad_norm": 0.9411993131581817, "learning_rate": 6.706262475679205e-06, "loss": 0.5926, "step": 7612 }, { "epoch": 0.62, "grad_norm": 0.9335725128206275, "learning_rate": 6.703777010310111e-06, "loss": 0.5449, "step": 7613 }, { "epoch": 0.62, "grad_norm": 0.9219406710574435, "learning_rate": 6.701291773366347e-06, "loss": 0.5564, "step": 7614 }, { "epoch": 0.62, "grad_norm": 0.9183979061357251, "learning_rate": 6.698806765020136e-06, "loss": 0.5195, "step": 7615 }, { "epoch": 0.62, "grad_norm": 0.9554532988160243, "learning_rate": 6.696321985443688e-06, "loss": 0.5166, "step": 7616 }, { "epoch": 0.62, "grad_norm": 0.8800060318983304, "learning_rate": 6.693837434809199e-06, "loss": 0.4841, "step": 7617 }, { "epoch": 0.62, "grad_norm": 0.9066257671231671, "learning_rate": 6.691353113288839e-06, "loss": 0.4821, "step": 7618 }, { "epoch": 0.62, "grad_norm": 0.8696221704487445, "learning_rate": 6.688869021054773e-06, "loss": 0.4696, "step": 7619 }, { "epoch": 0.62, "grad_norm": 0.810305875465549, "learning_rate": 6.686385158279151e-06, "loss": 0.4334, "step": 7620 }, { "epoch": 0.62, "grad_norm": 0.9236041534274468, "learning_rate": 6.683901525134096e-06, "loss": 0.5369, "step": 7621 }, { "epoch": 0.62, "grad_norm": 0.9437786609905792, "learning_rate": 6.681418121791725e-06, "loss": 0.525, "step": 7622 }, { "epoch": 0.62, "grad_norm": 0.9462694496156745, "learning_rate": 6.678934948424134e-06, "loss": 0.4898, "step": 7623 }, { "epoch": 0.62, "grad_norm": 0.9412840607698836, "learning_rate": 6.6764520052034054e-06, "loss": 0.5509, "step": 7624 }, { "epoch": 0.62, "grad_norm": 0.8845126699011401, "learning_rate": 6.673969292301604e-06, "loss": 0.4877, "step": 7625 }, { "epoch": 0.62, "grad_norm": 0.8569357062286083, "learning_rate": 6.6714868098907825e-06, "loss": 0.5046, "step": 7626 }, { "epoch": 0.62, "grad_norm": 0.9405002337042057, "learning_rate": 6.6690045581429705e-06, "loss": 0.5373, "step": 7627 }, { "epoch": 0.62, "grad_norm": 0.8880358306525056, "learning_rate": 6.666522537230189e-06, "loss": 0.5047, "step": 7628 }, { "epoch": 0.62, "grad_norm": 0.9598858005780319, "learning_rate": 6.664040747324437e-06, "loss": 0.5166, "step": 7629 }, { "epoch": 0.62, "grad_norm": 0.8742479010742386, "learning_rate": 6.661559188597706e-06, "loss": 0.5402, "step": 7630 }, { "epoch": 0.62, "grad_norm": 0.894172077733656, "learning_rate": 6.659077861221959e-06, "loss": 0.4528, "step": 7631 }, { "epoch": 0.62, "grad_norm": 0.8451101514996602, "learning_rate": 6.656596765369153e-06, "loss": 0.4539, "step": 7632 }, { "epoch": 0.62, "grad_norm": 0.9342223311005059, "learning_rate": 6.654115901211229e-06, "loss": 0.5558, "step": 7633 }, { "epoch": 0.62, "grad_norm": 0.9352876875867336, "learning_rate": 6.651635268920101e-06, "loss": 0.5217, "step": 7634 }, { "epoch": 0.62, "grad_norm": 0.9387669753786152, "learning_rate": 6.64915486866768e-06, "loss": 0.4883, "step": 7635 }, { "epoch": 0.62, "grad_norm": 0.7909913569761613, "learning_rate": 6.646674700625857e-06, "loss": 0.4587, "step": 7636 }, { "epoch": 0.62, "grad_norm": 0.940002808704583, "learning_rate": 6.644194764966499e-06, "loss": 0.5573, "step": 7637 }, { "epoch": 0.62, "grad_norm": 0.8798200273870914, "learning_rate": 6.641715061861469e-06, "loss": 0.5314, "step": 7638 }, { "epoch": 0.62, "grad_norm": 0.9118097465638171, "learning_rate": 6.639235591482608e-06, "loss": 0.4951, "step": 7639 }, { "epoch": 0.62, "grad_norm": 0.9265393540958253, "learning_rate": 6.636756354001737e-06, "loss": 0.5746, "step": 7640 }, { "epoch": 0.62, "grad_norm": 0.8996667634118599, "learning_rate": 6.6342773495906675e-06, "loss": 0.5058, "step": 7641 }, { "epoch": 0.62, "grad_norm": 0.9338778709815375, "learning_rate": 6.631798578421195e-06, "loss": 0.5082, "step": 7642 }, { "epoch": 0.62, "grad_norm": 0.8415158595072395, "learning_rate": 6.62932004066509e-06, "loss": 0.467, "step": 7643 }, { "epoch": 0.62, "grad_norm": 0.9406207341615731, "learning_rate": 6.626841736494119e-06, "loss": 0.551, "step": 7644 }, { "epoch": 0.62, "grad_norm": 0.9956040687979807, "learning_rate": 6.624363666080021e-06, "loss": 0.5501, "step": 7645 }, { "epoch": 0.62, "grad_norm": 0.984348172818565, "learning_rate": 6.62188582959453e-06, "loss": 0.5825, "step": 7646 }, { "epoch": 0.62, "grad_norm": 0.8693191073068617, "learning_rate": 6.619408227209352e-06, "loss": 0.5317, "step": 7647 }, { "epoch": 0.62, "grad_norm": 0.9489546286652094, "learning_rate": 6.616930859096185e-06, "loss": 0.5389, "step": 7648 }, { "epoch": 0.62, "grad_norm": 0.9362386586362307, "learning_rate": 6.61445372542671e-06, "loss": 0.4964, "step": 7649 }, { "epoch": 0.62, "grad_norm": 0.8275225693781739, "learning_rate": 6.61197682637259e-06, "loss": 0.5144, "step": 7650 }, { "epoch": 0.62, "grad_norm": 0.8801001351782364, "learning_rate": 6.609500162105469e-06, "loss": 0.5059, "step": 7651 }, { "epoch": 0.62, "grad_norm": 0.8904305060344985, "learning_rate": 6.60702373279698e-06, "loss": 0.5316, "step": 7652 }, { "epoch": 0.62, "grad_norm": 0.8565058285424657, "learning_rate": 6.6045475386187376e-06, "loss": 0.4461, "step": 7653 }, { "epoch": 0.62, "grad_norm": 0.9101220847706999, "learning_rate": 6.602071579742337e-06, "loss": 0.5361, "step": 7654 }, { "epoch": 0.62, "grad_norm": 0.9587323995861344, "learning_rate": 6.599595856339363e-06, "loss": 0.4869, "step": 7655 }, { "epoch": 0.62, "grad_norm": 0.976018429112491, "learning_rate": 6.597120368581382e-06, "loss": 0.518, "step": 7656 }, { "epoch": 0.62, "grad_norm": 0.7956454072227547, "learning_rate": 6.594645116639939e-06, "loss": 0.4691, "step": 7657 }, { "epoch": 0.62, "grad_norm": 0.9794199560151343, "learning_rate": 6.592170100686568e-06, "loss": 0.5548, "step": 7658 }, { "epoch": 0.62, "grad_norm": 0.9088047741398635, "learning_rate": 6.5896953208927886e-06, "loss": 0.5546, "step": 7659 }, { "epoch": 0.62, "grad_norm": 0.8844084202928726, "learning_rate": 6.587220777430097e-06, "loss": 0.5246, "step": 7660 }, { "epoch": 0.62, "grad_norm": 0.9737946962118639, "learning_rate": 6.584746470469978e-06, "loss": 0.4907, "step": 7661 }, { "epoch": 0.62, "grad_norm": 0.8894751424608862, "learning_rate": 6.582272400183901e-06, "loss": 0.5086, "step": 7662 }, { "epoch": 0.62, "grad_norm": 0.9085894752314807, "learning_rate": 6.579798566743314e-06, "loss": 0.4848, "step": 7663 }, { "epoch": 0.62, "grad_norm": 0.8982538680909244, "learning_rate": 6.577324970319652e-06, "loss": 0.5328, "step": 7664 }, { "epoch": 0.62, "grad_norm": 0.8584185228163657, "learning_rate": 6.574851611084335e-06, "loss": 0.5389, "step": 7665 }, { "epoch": 0.62, "grad_norm": 0.9102759729627776, "learning_rate": 6.572378489208762e-06, "loss": 0.5237, "step": 7666 }, { "epoch": 0.62, "grad_norm": 0.8677201197600535, "learning_rate": 6.569905604864319e-06, "loss": 0.4592, "step": 7667 }, { "epoch": 0.62, "grad_norm": 1.2005174417117501, "learning_rate": 6.567432958222379e-06, "loss": 0.537, "step": 7668 }, { "epoch": 0.62, "grad_norm": 0.8504096257899675, "learning_rate": 6.564960549454285e-06, "loss": 0.5129, "step": 7669 }, { "epoch": 0.62, "grad_norm": 0.9084205797953231, "learning_rate": 6.562488378731381e-06, "loss": 0.5082, "step": 7670 }, { "epoch": 0.62, "grad_norm": 0.961813736450623, "learning_rate": 6.560016446224983e-06, "loss": 0.5459, "step": 7671 }, { "epoch": 0.62, "grad_norm": 0.8893401843439549, "learning_rate": 6.557544752106392e-06, "loss": 0.5062, "step": 7672 }, { "epoch": 0.62, "grad_norm": 0.8730833336776954, "learning_rate": 6.5550732965468985e-06, "loss": 0.4991, "step": 7673 }, { "epoch": 0.62, "grad_norm": 0.96633050431716, "learning_rate": 6.552602079717772e-06, "loss": 0.5265, "step": 7674 }, { "epoch": 0.62, "grad_norm": 0.9610131291886921, "learning_rate": 6.550131101790258e-06, "loss": 0.5188, "step": 7675 }, { "epoch": 0.62, "grad_norm": 0.9082109466001308, "learning_rate": 6.547660362935603e-06, "loss": 0.542, "step": 7676 }, { "epoch": 0.62, "grad_norm": 0.9664922066319224, "learning_rate": 6.545189863325023e-06, "loss": 0.5302, "step": 7677 }, { "epoch": 0.62, "grad_norm": 0.8572812606217288, "learning_rate": 6.542719603129716e-06, "loss": 0.5131, "step": 7678 }, { "epoch": 0.62, "grad_norm": 1.0805563703213321, "learning_rate": 6.540249582520879e-06, "loss": 0.5196, "step": 7679 }, { "epoch": 0.62, "grad_norm": 0.9355592376625326, "learning_rate": 6.537779801669677e-06, "loss": 0.51, "step": 7680 }, { "epoch": 0.62, "grad_norm": 1.0094796867058717, "learning_rate": 6.535310260747259e-06, "loss": 0.5075, "step": 7681 }, { "epoch": 0.62, "grad_norm": 0.9183654901634732, "learning_rate": 6.5328409599247715e-06, "loss": 0.4783, "step": 7682 }, { "epoch": 0.62, "grad_norm": 0.918630109918347, "learning_rate": 6.530371899373329e-06, "loss": 0.5179, "step": 7683 }, { "epoch": 0.62, "grad_norm": 0.8697336708812505, "learning_rate": 6.527903079264033e-06, "loss": 0.4734, "step": 7684 }, { "epoch": 0.62, "grad_norm": 1.0264393557260938, "learning_rate": 6.525434499767978e-06, "loss": 0.5394, "step": 7685 }, { "epoch": 0.62, "grad_norm": 0.8638538677983133, "learning_rate": 6.52296616105623e-06, "loss": 0.4703, "step": 7686 }, { "epoch": 0.62, "grad_norm": 0.9552971643499772, "learning_rate": 6.5204980632998394e-06, "loss": 0.5097, "step": 7687 }, { "epoch": 0.62, "grad_norm": 0.8846975559947114, "learning_rate": 6.5180302066698495e-06, "loss": 0.5423, "step": 7688 }, { "epoch": 0.62, "grad_norm": 0.8952890587377771, "learning_rate": 6.515562591337279e-06, "loss": 0.5103, "step": 7689 }, { "epoch": 0.62, "grad_norm": 0.9432984337915332, "learning_rate": 6.513095217473127e-06, "loss": 0.5504, "step": 7690 }, { "epoch": 0.63, "grad_norm": 0.9411424831312857, "learning_rate": 6.510628085248385e-06, "loss": 0.4815, "step": 7691 }, { "epoch": 0.63, "grad_norm": 0.93391599034221, "learning_rate": 6.508161194834024e-06, "loss": 0.5194, "step": 7692 }, { "epoch": 0.63, "grad_norm": 0.8209924490621792, "learning_rate": 6.505694546400989e-06, "loss": 0.4406, "step": 7693 }, { "epoch": 0.63, "grad_norm": 0.9632496572429818, "learning_rate": 6.503228140120228e-06, "loss": 0.5211, "step": 7694 }, { "epoch": 0.63, "grad_norm": 0.9636864179124074, "learning_rate": 6.500761976162655e-06, "loss": 0.544, "step": 7695 }, { "epoch": 0.63, "grad_norm": 0.9628458753847853, "learning_rate": 6.498296054699169e-06, "loss": 0.4921, "step": 7696 }, { "epoch": 0.63, "grad_norm": 0.8605164541806277, "learning_rate": 6.495830375900665e-06, "loss": 0.5045, "step": 7697 }, { "epoch": 0.63, "grad_norm": 0.8357418989520136, "learning_rate": 6.493364939938007e-06, "loss": 0.5184, "step": 7698 }, { "epoch": 0.63, "grad_norm": 0.8775988577494073, "learning_rate": 6.490899746982045e-06, "loss": 0.5773, "step": 7699 }, { "epoch": 0.63, "grad_norm": 0.8507036855449437, "learning_rate": 6.48843479720362e-06, "loss": 0.5532, "step": 7700 }, { "epoch": 0.63, "grad_norm": 0.885247279447743, "learning_rate": 6.48597009077355e-06, "loss": 0.4917, "step": 7701 }, { "epoch": 0.63, "grad_norm": 0.8213151157416119, "learning_rate": 6.483505627862632e-06, "loss": 0.4146, "step": 7702 }, { "epoch": 0.63, "grad_norm": 0.9521487130066696, "learning_rate": 6.481041408641659e-06, "loss": 0.4853, "step": 7703 }, { "epoch": 0.63, "grad_norm": 0.9047442954374255, "learning_rate": 6.478577433281394e-06, "loss": 0.501, "step": 7704 }, { "epoch": 0.63, "grad_norm": 0.9123953950706234, "learning_rate": 6.476113701952587e-06, "loss": 0.505, "step": 7705 }, { "epoch": 0.63, "grad_norm": 0.8918355614489641, "learning_rate": 6.473650214825979e-06, "loss": 0.5052, "step": 7706 }, { "epoch": 0.63, "grad_norm": 0.8460692223732004, "learning_rate": 6.4711869720722804e-06, "loss": 0.5007, "step": 7707 }, { "epoch": 0.63, "grad_norm": 0.9514163634580783, "learning_rate": 6.468723973862194e-06, "loss": 0.4888, "step": 7708 }, { "epoch": 0.63, "grad_norm": 0.8407870858699927, "learning_rate": 6.466261220366406e-06, "loss": 0.5123, "step": 7709 }, { "epoch": 0.63, "grad_norm": 1.0438409207420924, "learning_rate": 6.463798711755582e-06, "loss": 0.5079, "step": 7710 }, { "epoch": 0.63, "grad_norm": 0.8393621192093546, "learning_rate": 6.461336448200366e-06, "loss": 0.5016, "step": 7711 }, { "epoch": 0.63, "grad_norm": 0.8504075861887, "learning_rate": 6.458874429871399e-06, "loss": 0.5265, "step": 7712 }, { "epoch": 0.63, "grad_norm": 0.8860413988426402, "learning_rate": 6.456412656939293e-06, "loss": 0.5535, "step": 7713 }, { "epoch": 0.63, "grad_norm": 1.0156180242304687, "learning_rate": 6.453951129574644e-06, "loss": 0.5815, "step": 7714 }, { "epoch": 0.63, "grad_norm": 1.0120525970975776, "learning_rate": 6.451489847948039e-06, "loss": 0.5458, "step": 7715 }, { "epoch": 0.63, "grad_norm": 0.9642455156600728, "learning_rate": 6.44902881223004e-06, "loss": 0.5594, "step": 7716 }, { "epoch": 0.63, "grad_norm": 0.8316070911321247, "learning_rate": 6.446568022591192e-06, "loss": 0.4743, "step": 7717 }, { "epoch": 0.63, "grad_norm": 0.9448781886763189, "learning_rate": 6.4441074792020305e-06, "loss": 0.5924, "step": 7718 }, { "epoch": 0.63, "grad_norm": 0.8313716764000463, "learning_rate": 6.4416471822330684e-06, "loss": 0.4905, "step": 7719 }, { "epoch": 0.63, "grad_norm": 0.9063248870520652, "learning_rate": 6.439187131854796e-06, "loss": 0.5645, "step": 7720 }, { "epoch": 0.63, "grad_norm": 0.9106218132909022, "learning_rate": 6.436727328237699e-06, "loss": 0.5104, "step": 7721 }, { "epoch": 0.63, "grad_norm": 0.9383859538169318, "learning_rate": 6.43426777155224e-06, "loss": 0.5514, "step": 7722 }, { "epoch": 0.63, "grad_norm": 0.8098969358101791, "learning_rate": 6.431808461968856e-06, "loss": 0.4979, "step": 7723 }, { "epoch": 0.63, "grad_norm": 0.9013036702084408, "learning_rate": 6.429349399657985e-06, "loss": 0.4835, "step": 7724 }, { "epoch": 0.63, "grad_norm": 0.939866858909316, "learning_rate": 6.4268905847900335e-06, "loss": 0.505, "step": 7725 }, { "epoch": 0.63, "grad_norm": 0.84843342037553, "learning_rate": 6.424432017535391e-06, "loss": 0.4922, "step": 7726 }, { "epoch": 0.63, "grad_norm": 0.9328549244243041, "learning_rate": 6.421973698064443e-06, "loss": 0.5338, "step": 7727 }, { "epoch": 0.63, "grad_norm": 0.9802417980606807, "learning_rate": 6.419515626547543e-06, "loss": 0.5675, "step": 7728 }, { "epoch": 0.63, "grad_norm": 0.8478739144926289, "learning_rate": 6.41705780315503e-06, "loss": 0.4788, "step": 7729 }, { "epoch": 0.63, "grad_norm": 0.8969066595806814, "learning_rate": 6.414600228057237e-06, "loss": 0.5265, "step": 7730 }, { "epoch": 0.63, "grad_norm": 0.961301932687817, "learning_rate": 6.41214290142447e-06, "loss": 0.5468, "step": 7731 }, { "epoch": 0.63, "grad_norm": 0.9124040441320103, "learning_rate": 6.409685823427012e-06, "loss": 0.5271, "step": 7732 }, { "epoch": 0.63, "grad_norm": 0.9247331515893926, "learning_rate": 6.407228994235146e-06, "loss": 0.4851, "step": 7733 }, { "epoch": 0.63, "grad_norm": 0.8764915994820545, "learning_rate": 6.404772414019124e-06, "loss": 0.5494, "step": 7734 }, { "epoch": 0.63, "grad_norm": 0.8456362246353507, "learning_rate": 6.40231608294918e-06, "loss": 0.4967, "step": 7735 }, { "epoch": 0.63, "grad_norm": 0.8166377742541707, "learning_rate": 6.399860001195546e-06, "loss": 0.4253, "step": 7736 }, { "epoch": 0.63, "grad_norm": 0.8533838510419213, "learning_rate": 6.397404168928418e-06, "loss": 0.5479, "step": 7737 }, { "epoch": 0.63, "grad_norm": 0.85629970661516, "learning_rate": 6.394948586317984e-06, "loss": 0.4668, "step": 7738 }, { "epoch": 0.63, "grad_norm": 0.9226611146431447, "learning_rate": 6.392493253534418e-06, "loss": 0.4656, "step": 7739 }, { "epoch": 0.63, "grad_norm": 0.9290966735283805, "learning_rate": 6.39003817074787e-06, "loss": 0.5628, "step": 7740 }, { "epoch": 0.63, "grad_norm": 0.9762580395364672, "learning_rate": 6.387583338128471e-06, "loss": 0.5663, "step": 7741 }, { "epoch": 0.63, "grad_norm": 0.9107305782629377, "learning_rate": 6.385128755846346e-06, "loss": 0.5754, "step": 7742 }, { "epoch": 0.63, "grad_norm": 0.8881321158916198, "learning_rate": 6.382674424071593e-06, "loss": 0.4683, "step": 7743 }, { "epoch": 0.63, "grad_norm": 0.8133542061284578, "learning_rate": 6.3802203429742884e-06, "loss": 0.4527, "step": 7744 }, { "epoch": 0.63, "grad_norm": 0.8277818013192928, "learning_rate": 6.377766512724508e-06, "loss": 0.4517, "step": 7745 }, { "epoch": 0.63, "grad_norm": 0.8749312544643651, "learning_rate": 6.375312933492295e-06, "loss": 0.493, "step": 7746 }, { "epoch": 0.63, "grad_norm": 0.8712980229852738, "learning_rate": 6.372859605447677e-06, "loss": 0.4727, "step": 7747 }, { "epoch": 0.63, "grad_norm": 0.8990200778286532, "learning_rate": 6.370406528760675e-06, "loss": 0.4291, "step": 7748 }, { "epoch": 0.63, "grad_norm": 0.9048743821787182, "learning_rate": 6.367953703601282e-06, "loss": 0.5218, "step": 7749 }, { "epoch": 0.63, "grad_norm": 0.8505330860159265, "learning_rate": 6.36550113013947e-06, "loss": 0.4798, "step": 7750 }, { "epoch": 0.63, "grad_norm": 0.9402198572741421, "learning_rate": 6.3630488085452105e-06, "loss": 0.5336, "step": 7751 }, { "epoch": 0.63, "grad_norm": 0.9729586284896738, "learning_rate": 6.360596738988443e-06, "loss": 0.5871, "step": 7752 }, { "epoch": 0.63, "grad_norm": 0.8969176170147588, "learning_rate": 6.358144921639089e-06, "loss": 0.549, "step": 7753 }, { "epoch": 0.63, "grad_norm": 0.8885249861439682, "learning_rate": 6.3556933566670656e-06, "loss": 0.5263, "step": 7754 }, { "epoch": 0.63, "grad_norm": 0.9578092824346056, "learning_rate": 6.353242044242261e-06, "loss": 0.5295, "step": 7755 }, { "epoch": 0.63, "grad_norm": 0.8080942797341487, "learning_rate": 6.350790984534543e-06, "loss": 0.4604, "step": 7756 }, { "epoch": 0.63, "grad_norm": 0.9077725823304328, "learning_rate": 6.348340177713776e-06, "loss": 0.4722, "step": 7757 }, { "epoch": 0.63, "grad_norm": 0.9109428713389269, "learning_rate": 6.3458896239497965e-06, "loss": 0.5195, "step": 7758 }, { "epoch": 0.63, "grad_norm": 0.9463265119734536, "learning_rate": 6.343439323412422e-06, "loss": 0.4988, "step": 7759 }, { "epoch": 0.63, "grad_norm": 0.9059028499902411, "learning_rate": 6.340989276271462e-06, "loss": 0.4988, "step": 7760 }, { "epoch": 0.63, "grad_norm": 0.8568743425472681, "learning_rate": 6.3385394826966975e-06, "loss": 0.5055, "step": 7761 }, { "epoch": 0.63, "grad_norm": 0.942925223145349, "learning_rate": 6.336089942857899e-06, "loss": 0.519, "step": 7762 }, { "epoch": 0.63, "grad_norm": 0.9032602036621082, "learning_rate": 6.33364065692482e-06, "loss": 0.4703, "step": 7763 }, { "epoch": 0.63, "grad_norm": 0.8792975094902231, "learning_rate": 6.33119162506719e-06, "loss": 0.4999, "step": 7764 }, { "epoch": 0.63, "grad_norm": 0.9501182269012853, "learning_rate": 6.3287428474547256e-06, "loss": 0.5199, "step": 7765 }, { "epoch": 0.63, "grad_norm": 0.9448877264739883, "learning_rate": 6.326294324257127e-06, "loss": 0.4954, "step": 7766 }, { "epoch": 0.63, "grad_norm": 0.8565240003077423, "learning_rate": 6.32384605564407e-06, "loss": 0.4208, "step": 7767 }, { "epoch": 0.63, "grad_norm": 0.8486055449790287, "learning_rate": 6.321398041785225e-06, "loss": 0.5115, "step": 7768 }, { "epoch": 0.63, "grad_norm": 0.9117738135586135, "learning_rate": 6.318950282850231e-06, "loss": 0.5145, "step": 7769 }, { "epoch": 0.63, "grad_norm": 0.8913146224618086, "learning_rate": 6.3165027790087156e-06, "loss": 0.4775, "step": 7770 }, { "epoch": 0.63, "grad_norm": 0.7365979070890599, "learning_rate": 6.3140555304302915e-06, "loss": 0.3933, "step": 7771 }, { "epoch": 0.63, "grad_norm": 0.9768804673095871, "learning_rate": 6.311608537284553e-06, "loss": 0.4881, "step": 7772 }, { "epoch": 0.63, "grad_norm": 1.016542017230885, "learning_rate": 6.309161799741064e-06, "loss": 0.526, "step": 7773 }, { "epoch": 0.63, "grad_norm": 0.8925076055403189, "learning_rate": 6.306715317969394e-06, "loss": 0.4922, "step": 7774 }, { "epoch": 0.63, "grad_norm": 0.8267041800491832, "learning_rate": 6.304269092139077e-06, "loss": 0.4777, "step": 7775 }, { "epoch": 0.63, "grad_norm": 1.0650334644803787, "learning_rate": 6.3018231224196305e-06, "loss": 0.5939, "step": 7776 }, { "epoch": 0.63, "grad_norm": 0.8943182460099235, "learning_rate": 6.299377408980563e-06, "loss": 0.5152, "step": 7777 }, { "epoch": 0.63, "grad_norm": 0.888006560881075, "learning_rate": 6.296931951991358e-06, "loss": 0.4804, "step": 7778 }, { "epoch": 0.63, "grad_norm": 0.9545552190672068, "learning_rate": 6.2944867516214845e-06, "loss": 0.5142, "step": 7779 }, { "epoch": 0.63, "grad_norm": 0.9929717798897725, "learning_rate": 6.292041808040393e-06, "loss": 0.5967, "step": 7780 }, { "epoch": 0.63, "grad_norm": 0.8714173763064299, "learning_rate": 6.289597121417514e-06, "loss": 0.4446, "step": 7781 }, { "epoch": 0.63, "grad_norm": 0.9078441721534038, "learning_rate": 6.287152691922264e-06, "loss": 0.5224, "step": 7782 }, { "epoch": 0.63, "grad_norm": 0.996162174356343, "learning_rate": 6.284708519724041e-06, "loss": 0.5057, "step": 7783 }, { "epoch": 0.63, "grad_norm": 0.8774138005700622, "learning_rate": 6.2822646049922185e-06, "loss": 0.4563, "step": 7784 }, { "epoch": 0.63, "grad_norm": 0.8856112477861796, "learning_rate": 6.279820947896163e-06, "loss": 0.5064, "step": 7785 }, { "epoch": 0.63, "grad_norm": 0.9295493307030581, "learning_rate": 6.277377548605217e-06, "loss": 0.5247, "step": 7786 }, { "epoch": 0.63, "grad_norm": 0.8765121315853294, "learning_rate": 6.274934407288704e-06, "loss": 0.4566, "step": 7787 }, { "epoch": 0.63, "grad_norm": 0.9399025860311087, "learning_rate": 6.2724915241159315e-06, "loss": 0.5375, "step": 7788 }, { "epoch": 0.63, "grad_norm": 0.8557716593867082, "learning_rate": 6.2700488992561925e-06, "loss": 0.4419, "step": 7789 }, { "epoch": 0.63, "grad_norm": 0.9658120983337019, "learning_rate": 6.267606532878754e-06, "loss": 0.5243, "step": 7790 }, { "epoch": 0.63, "grad_norm": 0.958679780460244, "learning_rate": 6.265164425152872e-06, "loss": 0.5179, "step": 7791 }, { "epoch": 0.63, "grad_norm": 0.9658173133446795, "learning_rate": 6.262722576247785e-06, "loss": 0.5464, "step": 7792 }, { "epoch": 0.63, "grad_norm": 0.8716878684699769, "learning_rate": 6.260280986332707e-06, "loss": 0.4832, "step": 7793 }, { "epoch": 0.63, "grad_norm": 0.9349477383809048, "learning_rate": 6.257839655576839e-06, "loss": 0.4898, "step": 7794 }, { "epoch": 0.63, "grad_norm": 0.9180139743849978, "learning_rate": 6.255398584149366e-06, "loss": 0.55, "step": 7795 }, { "epoch": 0.63, "grad_norm": 0.9231079645761651, "learning_rate": 6.252957772219446e-06, "loss": 0.5134, "step": 7796 }, { "epoch": 0.63, "grad_norm": 0.9223827123007751, "learning_rate": 6.25051721995623e-06, "loss": 0.5552, "step": 7797 }, { "epoch": 0.63, "grad_norm": 1.0099215371331756, "learning_rate": 6.248076927528845e-06, "loss": 0.5718, "step": 7798 }, { "epoch": 0.63, "grad_norm": 0.8269613087050963, "learning_rate": 6.245636895106403e-06, "loss": 0.4471, "step": 7799 }, { "epoch": 0.63, "grad_norm": 0.8713953691636022, "learning_rate": 6.243197122857991e-06, "loss": 0.5009, "step": 7800 }, { "epoch": 0.63, "grad_norm": 1.1097324395029895, "learning_rate": 6.240757610952688e-06, "loss": 0.4947, "step": 7801 }, { "epoch": 0.63, "grad_norm": 0.9103097554000824, "learning_rate": 6.238318359559548e-06, "loss": 0.4672, "step": 7802 }, { "epoch": 0.63, "grad_norm": 0.9089885001686926, "learning_rate": 6.2358793688476085e-06, "loss": 0.4802, "step": 7803 }, { "epoch": 0.63, "grad_norm": 0.9365668706441963, "learning_rate": 6.233440638985889e-06, "loss": 0.509, "step": 7804 }, { "epoch": 0.63, "grad_norm": 1.0305363078095187, "learning_rate": 6.231002170143395e-06, "loss": 0.5831, "step": 7805 }, { "epoch": 0.63, "grad_norm": 0.8677823936257921, "learning_rate": 6.228563962489106e-06, "loss": 0.5054, "step": 7806 }, { "epoch": 0.63, "grad_norm": 0.8956157333665741, "learning_rate": 6.226126016191989e-06, "loss": 0.4789, "step": 7807 }, { "epoch": 0.63, "grad_norm": 0.8960203160039782, "learning_rate": 6.223688331420992e-06, "loss": 0.4975, "step": 7808 }, { "epoch": 0.63, "grad_norm": 0.809149890697131, "learning_rate": 6.221250908345043e-06, "loss": 0.4941, "step": 7809 }, { "epoch": 0.63, "grad_norm": 0.943687691157147, "learning_rate": 6.218813747133054e-06, "loss": 0.4947, "step": 7810 }, { "epoch": 0.63, "grad_norm": 0.8036480414231428, "learning_rate": 6.2163768479539224e-06, "loss": 0.4487, "step": 7811 }, { "epoch": 0.63, "grad_norm": 0.9941996127619828, "learning_rate": 6.2139402109765145e-06, "loss": 0.4801, "step": 7812 }, { "epoch": 0.63, "grad_norm": 0.9008355927023047, "learning_rate": 6.211503836369695e-06, "loss": 0.4602, "step": 7813 }, { "epoch": 0.64, "grad_norm": 0.9154622606923055, "learning_rate": 6.209067724302298e-06, "loss": 0.5009, "step": 7814 }, { "epoch": 0.64, "grad_norm": 0.8444425530184236, "learning_rate": 6.206631874943142e-06, "loss": 0.4654, "step": 7815 }, { "epoch": 0.64, "grad_norm": 0.8723556650932853, "learning_rate": 6.204196288461037e-06, "loss": 0.4781, "step": 7816 }, { "epoch": 0.64, "grad_norm": 0.868682917707013, "learning_rate": 6.2017609650247616e-06, "loss": 0.4906, "step": 7817 }, { "epoch": 0.64, "grad_norm": 0.9050595246873593, "learning_rate": 6.19932590480308e-06, "loss": 0.5166, "step": 7818 }, { "epoch": 0.64, "grad_norm": 0.9716180252733098, "learning_rate": 6.196891107964744e-06, "loss": 0.5476, "step": 7819 }, { "epoch": 0.64, "grad_norm": 0.8566243321600501, "learning_rate": 6.194456574678481e-06, "loss": 0.5152, "step": 7820 }, { "epoch": 0.64, "grad_norm": 0.9824046976752693, "learning_rate": 6.192022305112999e-06, "loss": 0.544, "step": 7821 }, { "epoch": 0.64, "grad_norm": 0.9305953427523077, "learning_rate": 6.189588299436997e-06, "loss": 0.5713, "step": 7822 }, { "epoch": 0.64, "grad_norm": 0.8070106072056493, "learning_rate": 6.187154557819146e-06, "loss": 0.4906, "step": 7823 }, { "epoch": 0.64, "grad_norm": 0.9498681934374452, "learning_rate": 6.184721080428098e-06, "loss": 0.5671, "step": 7824 }, { "epoch": 0.64, "grad_norm": 0.820186983805444, "learning_rate": 6.1822878674324995e-06, "loss": 0.4313, "step": 7825 }, { "epoch": 0.64, "grad_norm": 0.9463991323254897, "learning_rate": 6.179854919000965e-06, "loss": 0.507, "step": 7826 }, { "epoch": 0.64, "grad_norm": 0.9640186853955188, "learning_rate": 6.177422235302093e-06, "loss": 0.5476, "step": 7827 }, { "epoch": 0.64, "grad_norm": 0.9492051998384852, "learning_rate": 6.174989816504472e-06, "loss": 0.4895, "step": 7828 }, { "epoch": 0.64, "grad_norm": 0.8160052673463781, "learning_rate": 6.172557662776665e-06, "loss": 0.4485, "step": 7829 }, { "epoch": 0.64, "grad_norm": 0.832072497682488, "learning_rate": 6.170125774287212e-06, "loss": 0.4871, "step": 7830 }, { "epoch": 0.64, "grad_norm": 0.9821687629742838, "learning_rate": 6.167694151204651e-06, "loss": 0.5267, "step": 7831 }, { "epoch": 0.64, "grad_norm": 0.8475254419513637, "learning_rate": 6.165262793697486e-06, "loss": 0.4286, "step": 7832 }, { "epoch": 0.64, "grad_norm": 1.0397427201090619, "learning_rate": 6.162831701934203e-06, "loss": 0.5719, "step": 7833 }, { "epoch": 0.64, "grad_norm": 0.9163483237401832, "learning_rate": 6.160400876083283e-06, "loss": 0.4926, "step": 7834 }, { "epoch": 0.64, "grad_norm": 0.8156428618841962, "learning_rate": 6.157970316313179e-06, "loss": 0.4452, "step": 7835 }, { "epoch": 0.64, "grad_norm": 0.8687730375391427, "learning_rate": 6.155540022792319e-06, "loss": 0.543, "step": 7836 }, { "epoch": 0.64, "grad_norm": 0.7983037783374483, "learning_rate": 6.153109995689129e-06, "loss": 0.4492, "step": 7837 }, { "epoch": 0.64, "grad_norm": 0.9766388620578832, "learning_rate": 6.150680235172004e-06, "loss": 0.4917, "step": 7838 }, { "epoch": 0.64, "grad_norm": 0.8463802416443318, "learning_rate": 6.148250741409321e-06, "loss": 0.4741, "step": 7839 }, { "epoch": 0.64, "grad_norm": 0.894432537815772, "learning_rate": 6.145821514569449e-06, "loss": 0.4996, "step": 7840 }, { "epoch": 0.64, "grad_norm": 0.8991657851225787, "learning_rate": 6.143392554820726e-06, "loss": 0.4578, "step": 7841 }, { "epoch": 0.64, "grad_norm": 0.9054030277853536, "learning_rate": 6.140963862331476e-06, "loss": 0.4742, "step": 7842 }, { "epoch": 0.64, "grad_norm": 0.8797646494203251, "learning_rate": 6.13853543727001e-06, "loss": 0.5034, "step": 7843 }, { "epoch": 0.64, "grad_norm": 0.9484979833940284, "learning_rate": 6.1361072798046155e-06, "loss": 0.513, "step": 7844 }, { "epoch": 0.64, "grad_norm": 0.9038890293909475, "learning_rate": 6.1336793901035526e-06, "loss": 0.4885, "step": 7845 }, { "epoch": 0.64, "grad_norm": 0.9380943990182781, "learning_rate": 6.131251768335083e-06, "loss": 0.5606, "step": 7846 }, { "epoch": 0.64, "grad_norm": 0.8525723409370285, "learning_rate": 6.128824414667436e-06, "loss": 0.481, "step": 7847 }, { "epoch": 0.64, "grad_norm": 0.8900496438478617, "learning_rate": 6.126397329268817e-06, "loss": 0.5184, "step": 7848 }, { "epoch": 0.64, "grad_norm": 0.924435430812342, "learning_rate": 6.123970512307433e-06, "loss": 0.5249, "step": 7849 }, { "epoch": 0.64, "grad_norm": 1.0436953649832952, "learning_rate": 6.121543963951453e-06, "loss": 0.5956, "step": 7850 }, { "epoch": 0.64, "grad_norm": 0.967753485718807, "learning_rate": 6.119117684369033e-06, "loss": 0.5847, "step": 7851 }, { "epoch": 0.64, "grad_norm": 0.9268366254061351, "learning_rate": 6.116691673728319e-06, "loss": 0.5063, "step": 7852 }, { "epoch": 0.64, "grad_norm": 1.0184125987174175, "learning_rate": 6.114265932197427e-06, "loss": 0.5789, "step": 7853 }, { "epoch": 0.64, "grad_norm": 0.9061381924635158, "learning_rate": 6.111840459944456e-06, "loss": 0.4939, "step": 7854 }, { "epoch": 0.64, "grad_norm": 0.8112643985533705, "learning_rate": 6.109415257137496e-06, "loss": 0.4545, "step": 7855 }, { "epoch": 0.64, "grad_norm": 0.8822508528963398, "learning_rate": 6.1069903239446085e-06, "loss": 0.4922, "step": 7856 }, { "epoch": 0.64, "grad_norm": 0.9376464388034186, "learning_rate": 6.104565660533834e-06, "loss": 0.5353, "step": 7857 }, { "epoch": 0.64, "grad_norm": 1.1579114301426976, "learning_rate": 6.102141267073207e-06, "loss": 0.586, "step": 7858 }, { "epoch": 0.64, "grad_norm": 0.9165096943932053, "learning_rate": 6.099717143730735e-06, "loss": 0.4738, "step": 7859 }, { "epoch": 0.64, "grad_norm": 0.9121514176411212, "learning_rate": 6.0972932906744e-06, "loss": 0.5212, "step": 7860 }, { "epoch": 0.64, "grad_norm": 0.9866894805187648, "learning_rate": 6.094869708072182e-06, "loss": 0.5184, "step": 7861 }, { "epoch": 0.64, "grad_norm": 0.9143118932789693, "learning_rate": 6.092446396092029e-06, "loss": 0.5419, "step": 7862 }, { "epoch": 0.64, "grad_norm": 0.8796210889513394, "learning_rate": 6.0900233549018715e-06, "loss": 0.5014, "step": 7863 }, { "epoch": 0.64, "grad_norm": 0.9600479927730894, "learning_rate": 6.087600584669631e-06, "loss": 0.5166, "step": 7864 }, { "epoch": 0.64, "grad_norm": 0.8497125065287534, "learning_rate": 6.0851780855632005e-06, "loss": 0.4715, "step": 7865 }, { "epoch": 0.64, "grad_norm": 0.8294242835797552, "learning_rate": 6.082755857750451e-06, "loss": 0.4082, "step": 7866 }, { "epoch": 0.64, "grad_norm": 0.8411002825465396, "learning_rate": 6.080333901399252e-06, "loss": 0.4416, "step": 7867 }, { "epoch": 0.64, "grad_norm": 1.0470602635891193, "learning_rate": 6.077912216677435e-06, "loss": 0.5246, "step": 7868 }, { "epoch": 0.64, "grad_norm": 0.9070343840890479, "learning_rate": 6.075490803752818e-06, "loss": 0.4806, "step": 7869 }, { "epoch": 0.64, "grad_norm": 0.9051386050346829, "learning_rate": 6.073069662793213e-06, "loss": 0.4905, "step": 7870 }, { "epoch": 0.64, "grad_norm": 0.8752448696259255, "learning_rate": 6.070648793966396e-06, "loss": 0.4699, "step": 7871 }, { "epoch": 0.64, "grad_norm": 0.8925887090586246, "learning_rate": 6.068228197440129e-06, "loss": 0.5167, "step": 7872 }, { "epoch": 0.64, "grad_norm": 0.964748588002339, "learning_rate": 6.065807873382163e-06, "loss": 0.5169, "step": 7873 }, { "epoch": 0.64, "grad_norm": 0.9275308110947283, "learning_rate": 6.063387821960224e-06, "loss": 0.5562, "step": 7874 }, { "epoch": 0.64, "grad_norm": 0.8429438517721336, "learning_rate": 6.060968043342013e-06, "loss": 0.4673, "step": 7875 }, { "epoch": 0.64, "grad_norm": 0.8850378488006732, "learning_rate": 6.058548537695225e-06, "loss": 0.4752, "step": 7876 }, { "epoch": 0.64, "grad_norm": 0.9524802145799047, "learning_rate": 6.056129305187528e-06, "loss": 0.5744, "step": 7877 }, { "epoch": 0.64, "grad_norm": 0.9361706523073291, "learning_rate": 6.053710345986568e-06, "loss": 0.6133, "step": 7878 }, { "epoch": 0.64, "grad_norm": 0.9839367384874946, "learning_rate": 6.051291660259984e-06, "loss": 0.5047, "step": 7879 }, { "epoch": 0.64, "grad_norm": 0.7923563256741162, "learning_rate": 6.048873248175387e-06, "loss": 0.4301, "step": 7880 }, { "epoch": 0.64, "grad_norm": 0.8936789644649209, "learning_rate": 6.046455109900364e-06, "loss": 0.4995, "step": 7881 }, { "epoch": 0.64, "grad_norm": 0.868032478797101, "learning_rate": 6.044037245602498e-06, "loss": 0.5239, "step": 7882 }, { "epoch": 0.64, "grad_norm": 0.98929414338559, "learning_rate": 6.041619655449345e-06, "loss": 0.5709, "step": 7883 }, { "epoch": 0.64, "grad_norm": 0.8506950731385884, "learning_rate": 6.039202339608432e-06, "loss": 0.5399, "step": 7884 }, { "epoch": 0.64, "grad_norm": 0.8806249635423963, "learning_rate": 6.03678529824729e-06, "loss": 0.4679, "step": 7885 }, { "epoch": 0.64, "grad_norm": 0.8525185718459264, "learning_rate": 6.03436853153341e-06, "loss": 0.4969, "step": 7886 }, { "epoch": 0.64, "grad_norm": 0.9305266634188015, "learning_rate": 6.031952039634269e-06, "loss": 0.4984, "step": 7887 }, { "epoch": 0.64, "grad_norm": 0.902035425375862, "learning_rate": 6.029535822717336e-06, "loss": 0.443, "step": 7888 }, { "epoch": 0.64, "grad_norm": 0.8470784591374255, "learning_rate": 6.0271198809500495e-06, "loss": 0.5191, "step": 7889 }, { "epoch": 0.64, "grad_norm": 0.974557691623203, "learning_rate": 6.024704214499828e-06, "loss": 0.5018, "step": 7890 }, { "epoch": 0.64, "grad_norm": 0.787087408132767, "learning_rate": 6.02228882353408e-06, "loss": 0.5078, "step": 7891 }, { "epoch": 0.64, "grad_norm": 0.8542335974387377, "learning_rate": 6.019873708220187e-06, "loss": 0.4908, "step": 7892 }, { "epoch": 0.64, "grad_norm": 0.9474423848025191, "learning_rate": 6.0174588687255175e-06, "loss": 0.5605, "step": 7893 }, { "epoch": 0.64, "grad_norm": 0.8575669120750463, "learning_rate": 6.0150443052174165e-06, "loss": 0.5251, "step": 7894 }, { "epoch": 0.64, "grad_norm": 0.8612373472694415, "learning_rate": 6.012630017863207e-06, "loss": 0.4617, "step": 7895 }, { "epoch": 0.64, "grad_norm": 0.8586947151074552, "learning_rate": 6.010216006830204e-06, "loss": 0.4587, "step": 7896 }, { "epoch": 0.64, "grad_norm": 0.8632841159755757, "learning_rate": 6.007802272285693e-06, "loss": 0.5079, "step": 7897 }, { "epoch": 0.64, "grad_norm": 0.8379444445979347, "learning_rate": 6.0053888143969395e-06, "loss": 0.4695, "step": 7898 }, { "epoch": 0.64, "grad_norm": 1.0050526074923891, "learning_rate": 6.002975633331202e-06, "loss": 0.4807, "step": 7899 }, { "epoch": 0.64, "grad_norm": 0.9495555894351163, "learning_rate": 6.000562729255708e-06, "loss": 0.5104, "step": 7900 }, { "epoch": 0.64, "grad_norm": 0.8893561306967496, "learning_rate": 5.998150102337665e-06, "loss": 0.4987, "step": 7901 }, { "epoch": 0.64, "grad_norm": 0.9049187018469663, "learning_rate": 5.995737752744274e-06, "loss": 0.3982, "step": 7902 }, { "epoch": 0.64, "grad_norm": 0.9645879156768508, "learning_rate": 5.9933256806427056e-06, "loss": 0.52, "step": 7903 }, { "epoch": 0.64, "grad_norm": 0.8483830405355759, "learning_rate": 5.990913886200109e-06, "loss": 0.5376, "step": 7904 }, { "epoch": 0.64, "grad_norm": 0.8176645223893816, "learning_rate": 5.9885023695836285e-06, "loss": 0.4675, "step": 7905 }, { "epoch": 0.64, "grad_norm": 0.8683650406947293, "learning_rate": 5.986091130960374e-06, "loss": 0.4619, "step": 7906 }, { "epoch": 0.64, "grad_norm": 0.8638758477258518, "learning_rate": 5.983680170497441e-06, "loss": 0.4918, "step": 7907 }, { "epoch": 0.64, "grad_norm": 0.9072700579393461, "learning_rate": 5.981269488361915e-06, "loss": 0.5199, "step": 7908 }, { "epoch": 0.64, "grad_norm": 0.8330566847213845, "learning_rate": 5.978859084720847e-06, "loss": 0.4817, "step": 7909 }, { "epoch": 0.64, "grad_norm": 0.853580850835999, "learning_rate": 5.9764489597412744e-06, "loss": 0.4865, "step": 7910 }, { "epoch": 0.64, "grad_norm": 0.9350521083805324, "learning_rate": 5.974039113590224e-06, "loss": 0.5252, "step": 7911 }, { "epoch": 0.64, "grad_norm": 0.8370042228867535, "learning_rate": 5.971629546434692e-06, "loss": 0.4613, "step": 7912 }, { "epoch": 0.64, "grad_norm": 0.9258955551707068, "learning_rate": 5.969220258441656e-06, "loss": 0.464, "step": 7913 }, { "epoch": 0.64, "grad_norm": 1.004101425937656, "learning_rate": 5.966811249778084e-06, "loss": 0.5862, "step": 7914 }, { "epoch": 0.64, "grad_norm": 1.0387617616671656, "learning_rate": 5.964402520610915e-06, "loss": 0.5051, "step": 7915 }, { "epoch": 0.64, "grad_norm": 0.9856894626875013, "learning_rate": 5.961994071107067e-06, "loss": 0.5665, "step": 7916 }, { "epoch": 0.64, "grad_norm": 0.9105291820348035, "learning_rate": 5.959585901433453e-06, "loss": 0.4826, "step": 7917 }, { "epoch": 0.64, "grad_norm": 0.8617063645840858, "learning_rate": 5.957178011756952e-06, "loss": 0.4357, "step": 7918 }, { "epoch": 0.64, "grad_norm": 0.9170271796296493, "learning_rate": 5.954770402244425e-06, "loss": 0.5619, "step": 7919 }, { "epoch": 0.64, "grad_norm": 0.9411333802566632, "learning_rate": 5.9523630730627255e-06, "loss": 0.4756, "step": 7920 }, { "epoch": 0.64, "grad_norm": 0.7903009111273711, "learning_rate": 5.949956024378673e-06, "loss": 0.5194, "step": 7921 }, { "epoch": 0.64, "grad_norm": 0.8558134713436262, "learning_rate": 5.947549256359074e-06, "loss": 0.493, "step": 7922 }, { "epoch": 0.64, "grad_norm": 0.8332628028313226, "learning_rate": 5.94514276917072e-06, "loss": 0.488, "step": 7923 }, { "epoch": 0.64, "grad_norm": 0.9005806018388123, "learning_rate": 5.9427365629803756e-06, "loss": 0.5588, "step": 7924 }, { "epoch": 0.64, "grad_norm": 0.9971919710537726, "learning_rate": 5.940330637954783e-06, "loss": 0.478, "step": 7925 }, { "epoch": 0.64, "grad_norm": 0.8456390472224813, "learning_rate": 5.937924994260682e-06, "loss": 0.4745, "step": 7926 }, { "epoch": 0.64, "grad_norm": 0.8541660700367373, "learning_rate": 5.9355196320647745e-06, "loss": 0.4937, "step": 7927 }, { "epoch": 0.64, "grad_norm": 0.8553699609380877, "learning_rate": 5.933114551533749e-06, "loss": 0.4747, "step": 7928 }, { "epoch": 0.64, "grad_norm": 0.9374042630967032, "learning_rate": 5.930709752834281e-06, "loss": 0.4991, "step": 7929 }, { "epoch": 0.64, "grad_norm": 0.9049318778548395, "learning_rate": 5.928305236133016e-06, "loss": 0.5303, "step": 7930 }, { "epoch": 0.64, "grad_norm": 0.9678237556822508, "learning_rate": 5.925901001596586e-06, "loss": 0.5392, "step": 7931 }, { "epoch": 0.64, "grad_norm": 0.8766681404745948, "learning_rate": 5.923497049391605e-06, "loss": 0.4992, "step": 7932 }, { "epoch": 0.64, "grad_norm": 1.0277711153649378, "learning_rate": 5.9210933796846616e-06, "loss": 0.5993, "step": 7933 }, { "epoch": 0.64, "grad_norm": 0.9128322651045907, "learning_rate": 5.918689992642328e-06, "loss": 0.5476, "step": 7934 }, { "epoch": 0.64, "grad_norm": 0.835028262630942, "learning_rate": 5.9162868884311596e-06, "loss": 0.448, "step": 7935 }, { "epoch": 0.64, "grad_norm": 0.9091933878596423, "learning_rate": 5.913884067217686e-06, "loss": 0.4761, "step": 7936 }, { "epoch": 0.65, "grad_norm": 0.8631763279005022, "learning_rate": 5.911481529168421e-06, "loss": 0.4818, "step": 7937 }, { "epoch": 0.65, "grad_norm": 0.9481818857199612, "learning_rate": 5.9090792744498625e-06, "loss": 0.5641, "step": 7938 }, { "epoch": 0.65, "grad_norm": 0.8160053258398908, "learning_rate": 5.9066773032284804e-06, "loss": 0.448, "step": 7939 }, { "epoch": 0.65, "grad_norm": 0.9511732542896206, "learning_rate": 5.9042756156707295e-06, "loss": 0.5564, "step": 7940 }, { "epoch": 0.65, "grad_norm": 0.929582203893334, "learning_rate": 5.901874211943048e-06, "loss": 0.5347, "step": 7941 }, { "epoch": 0.65, "grad_norm": 0.8946384425547035, "learning_rate": 5.899473092211847e-06, "loss": 0.5131, "step": 7942 }, { "epoch": 0.65, "grad_norm": 0.8740876502947456, "learning_rate": 5.897072256643522e-06, "loss": 0.4932, "step": 7943 }, { "epoch": 0.65, "grad_norm": 0.8753668882927068, "learning_rate": 5.894671705404453e-06, "loss": 0.495, "step": 7944 }, { "epoch": 0.65, "grad_norm": 0.9465339505107648, "learning_rate": 5.892271438660993e-06, "loss": 0.5105, "step": 7945 }, { "epoch": 0.65, "grad_norm": 0.8751984352332609, "learning_rate": 5.889871456579477e-06, "loss": 0.5559, "step": 7946 }, { "epoch": 0.65, "grad_norm": 1.0290060841753401, "learning_rate": 5.887471759326223e-06, "loss": 0.5856, "step": 7947 }, { "epoch": 0.65, "grad_norm": 0.8780230357458085, "learning_rate": 5.885072347067531e-06, "loss": 0.5009, "step": 7948 }, { "epoch": 0.65, "grad_norm": 0.880023849643744, "learning_rate": 5.882673219969673e-06, "loss": 0.5137, "step": 7949 }, { "epoch": 0.65, "grad_norm": 0.865486218156423, "learning_rate": 5.880274378198909e-06, "loss": 0.4774, "step": 7950 }, { "epoch": 0.65, "grad_norm": 0.8907322492856702, "learning_rate": 5.877875821921479e-06, "loss": 0.4895, "step": 7951 }, { "epoch": 0.65, "grad_norm": 0.8918944661195479, "learning_rate": 5.875477551303596e-06, "loss": 0.5237, "step": 7952 }, { "epoch": 0.65, "grad_norm": 0.9662505920258414, "learning_rate": 5.873079566511459e-06, "loss": 0.5327, "step": 7953 }, { "epoch": 0.65, "grad_norm": 0.9372929223735276, "learning_rate": 5.870681867711252e-06, "loss": 0.4707, "step": 7954 }, { "epoch": 0.65, "grad_norm": 0.9801069798337949, "learning_rate": 5.868284455069124e-06, "loss": 0.4903, "step": 7955 }, { "epoch": 0.65, "grad_norm": 0.8265909750429461, "learning_rate": 5.865887328751221e-06, "loss": 0.5053, "step": 7956 }, { "epoch": 0.65, "grad_norm": 0.8773827747816701, "learning_rate": 5.86349048892366e-06, "loss": 0.5323, "step": 7957 }, { "epoch": 0.65, "grad_norm": 0.9125462407522994, "learning_rate": 5.8610939357525365e-06, "loss": 0.457, "step": 7958 }, { "epoch": 0.65, "grad_norm": 0.9009283341814897, "learning_rate": 5.8586976694039325e-06, "loss": 0.4907, "step": 7959 }, { "epoch": 0.65, "grad_norm": 0.8862610572080457, "learning_rate": 5.856301690043909e-06, "loss": 0.4928, "step": 7960 }, { "epoch": 0.65, "grad_norm": 0.8271397349442127, "learning_rate": 5.8539059978385e-06, "loss": 0.4825, "step": 7961 }, { "epoch": 0.65, "grad_norm": 1.0027140353575406, "learning_rate": 5.851510592953729e-06, "loss": 0.5539, "step": 7962 }, { "epoch": 0.65, "grad_norm": 0.9227278978093961, "learning_rate": 5.849115475555596e-06, "loss": 0.4464, "step": 7963 }, { "epoch": 0.65, "grad_norm": 0.872548999788522, "learning_rate": 5.846720645810073e-06, "loss": 0.4953, "step": 7964 }, { "epoch": 0.65, "grad_norm": 0.8580393334113791, "learning_rate": 5.844326103883131e-06, "loss": 0.4775, "step": 7965 }, { "epoch": 0.65, "grad_norm": 1.0315902936759194, "learning_rate": 5.841931849940704e-06, "loss": 0.494, "step": 7966 }, { "epoch": 0.65, "grad_norm": 1.0272260657495227, "learning_rate": 5.839537884148707e-06, "loss": 0.5617, "step": 7967 }, { "epoch": 0.65, "grad_norm": 0.8998302210454385, "learning_rate": 5.837144206673049e-06, "loss": 0.4609, "step": 7968 }, { "epoch": 0.65, "grad_norm": 0.9546931742771414, "learning_rate": 5.834750817679606e-06, "loss": 0.5295, "step": 7969 }, { "epoch": 0.65, "grad_norm": 0.9181160691313232, "learning_rate": 5.832357717334229e-06, "loss": 0.5299, "step": 7970 }, { "epoch": 0.65, "grad_norm": 0.866694492393031, "learning_rate": 5.829964905802774e-06, "loss": 0.4898, "step": 7971 }, { "epoch": 0.65, "grad_norm": 0.829496100447018, "learning_rate": 5.827572383251048e-06, "loss": 0.5281, "step": 7972 }, { "epoch": 0.65, "grad_norm": 0.8751261025322412, "learning_rate": 5.825180149844856e-06, "loss": 0.4339, "step": 7973 }, { "epoch": 0.65, "grad_norm": 0.8704829205465353, "learning_rate": 5.822788205749974e-06, "loss": 0.4777, "step": 7974 }, { "epoch": 0.65, "grad_norm": 0.849680191816542, "learning_rate": 5.82039655113217e-06, "loss": 0.4442, "step": 7975 }, { "epoch": 0.65, "grad_norm": 0.8376653229107791, "learning_rate": 5.8180051861571695e-06, "loss": 0.4852, "step": 7976 }, { "epoch": 0.65, "grad_norm": 0.8389294368790021, "learning_rate": 5.815614110990708e-06, "loss": 0.462, "step": 7977 }, { "epoch": 0.65, "grad_norm": 0.8807585814155449, "learning_rate": 5.813223325798473e-06, "loss": 0.5042, "step": 7978 }, { "epoch": 0.65, "grad_norm": 1.000691488112414, "learning_rate": 5.810832830746147e-06, "loss": 0.4713, "step": 7979 }, { "epoch": 0.65, "grad_norm": 0.8532742444248469, "learning_rate": 5.8084426259993905e-06, "loss": 0.4491, "step": 7980 }, { "epoch": 0.65, "grad_norm": 0.9100918922284562, "learning_rate": 5.8060527117238475e-06, "loss": 0.5095, "step": 7981 }, { "epoch": 0.65, "grad_norm": 0.8929274143229312, "learning_rate": 5.803663088085123e-06, "loss": 0.5257, "step": 7982 }, { "epoch": 0.65, "grad_norm": 0.8455248193661312, "learning_rate": 5.801273755248831e-06, "loss": 0.477, "step": 7983 }, { "epoch": 0.65, "grad_norm": 0.8811988221328768, "learning_rate": 5.798884713380542e-06, "loss": 0.5042, "step": 7984 }, { "epoch": 0.65, "grad_norm": 0.967260471731767, "learning_rate": 5.796495962645814e-06, "loss": 0.5802, "step": 7985 }, { "epoch": 0.65, "grad_norm": 0.98700331385454, "learning_rate": 5.794107503210187e-06, "loss": 0.5205, "step": 7986 }, { "epoch": 0.65, "grad_norm": 0.902464167796891, "learning_rate": 5.791719335239185e-06, "loss": 0.5258, "step": 7987 }, { "epoch": 0.65, "grad_norm": 0.852618831364867, "learning_rate": 5.7893314588982905e-06, "loss": 0.4333, "step": 7988 }, { "epoch": 0.65, "grad_norm": 0.9179412089940903, "learning_rate": 5.7869438743529994e-06, "loss": 0.5615, "step": 7989 }, { "epoch": 0.65, "grad_norm": 0.9335365738514484, "learning_rate": 5.784556581768757e-06, "loss": 0.5715, "step": 7990 }, { "epoch": 0.65, "grad_norm": 0.7787335792129625, "learning_rate": 5.782169581311006e-06, "loss": 0.478, "step": 7991 }, { "epoch": 0.65, "grad_norm": 0.9464878407470988, "learning_rate": 5.7797828731451596e-06, "loss": 0.4992, "step": 7992 }, { "epoch": 0.65, "grad_norm": 0.9479587984475946, "learning_rate": 5.7773964574366185e-06, "loss": 0.5334, "step": 7993 }, { "epoch": 0.65, "grad_norm": 0.8369870158085331, "learning_rate": 5.7750103343507565e-06, "loss": 0.4922, "step": 7994 }, { "epoch": 0.65, "grad_norm": 0.8797202532805338, "learning_rate": 5.772624504052935e-06, "loss": 0.5185, "step": 7995 }, { "epoch": 0.65, "grad_norm": 0.8525431115226239, "learning_rate": 5.770238966708482e-06, "loss": 0.4396, "step": 7996 }, { "epoch": 0.65, "grad_norm": 0.8919277953759788, "learning_rate": 5.767853722482717e-06, "loss": 0.4865, "step": 7997 }, { "epoch": 0.65, "grad_norm": 0.924505401141746, "learning_rate": 5.765468771540934e-06, "loss": 0.5014, "step": 7998 }, { "epoch": 0.65, "grad_norm": 0.9132203193112512, "learning_rate": 5.763084114048409e-06, "loss": 0.4474, "step": 7999 }, { "epoch": 0.65, "grad_norm": 0.9793395084081031, "learning_rate": 5.7606997501703975e-06, "loss": 0.5358, "step": 8000 }, { "epoch": 0.65, "grad_norm": 0.9415267497989768, "learning_rate": 5.758315680072137e-06, "loss": 0.4871, "step": 8001 }, { "epoch": 0.65, "grad_norm": 0.899446816115677, "learning_rate": 5.755931903918835e-06, "loss": 0.4913, "step": 8002 }, { "epoch": 0.65, "grad_norm": 0.8433015727474659, "learning_rate": 5.753548421875686e-06, "loss": 0.4995, "step": 8003 }, { "epoch": 0.65, "grad_norm": 0.928114233175902, "learning_rate": 5.751165234107864e-06, "loss": 0.5294, "step": 8004 }, { "epoch": 0.65, "grad_norm": 0.7961058992253185, "learning_rate": 5.748782340780523e-06, "loss": 0.4393, "step": 8005 }, { "epoch": 0.65, "grad_norm": 0.9787936172926667, "learning_rate": 5.746399742058796e-06, "loss": 0.5387, "step": 8006 }, { "epoch": 0.65, "grad_norm": 0.8751511295227653, "learning_rate": 5.744017438107796e-06, "loss": 0.4633, "step": 8007 }, { "epoch": 0.65, "grad_norm": 0.9077415186497616, "learning_rate": 5.741635429092611e-06, "loss": 0.5377, "step": 8008 }, { "epoch": 0.65, "grad_norm": 0.8952564586058097, "learning_rate": 5.7392537151783125e-06, "loss": 0.5049, "step": 8009 }, { "epoch": 0.65, "grad_norm": 0.8646877275625106, "learning_rate": 5.736872296529952e-06, "loss": 0.4616, "step": 8010 }, { "epoch": 0.65, "grad_norm": 1.022240202103064, "learning_rate": 5.734491173312559e-06, "loss": 0.5524, "step": 8011 }, { "epoch": 0.65, "grad_norm": 0.8915803775655604, "learning_rate": 5.732110345691146e-06, "loss": 0.511, "step": 8012 }, { "epoch": 0.65, "grad_norm": 0.8479179390848613, "learning_rate": 5.7297298138307e-06, "loss": 0.4507, "step": 8013 }, { "epoch": 0.65, "grad_norm": 0.9476076758828081, "learning_rate": 5.727349577896194e-06, "loss": 0.4865, "step": 8014 }, { "epoch": 0.65, "grad_norm": 0.8676077389176223, "learning_rate": 5.724969638052569e-06, "loss": 0.4834, "step": 8015 }, { "epoch": 0.65, "grad_norm": 0.9255473966779431, "learning_rate": 5.722589994464754e-06, "loss": 0.5681, "step": 8016 }, { "epoch": 0.65, "grad_norm": 0.8470769391341288, "learning_rate": 5.72021064729766e-06, "loss": 0.5295, "step": 8017 }, { "epoch": 0.65, "grad_norm": 0.8572510515489944, "learning_rate": 5.717831596716173e-06, "loss": 0.487, "step": 8018 }, { "epoch": 0.65, "grad_norm": 1.0118769672041892, "learning_rate": 5.715452842885157e-06, "loss": 0.5201, "step": 8019 }, { "epoch": 0.65, "grad_norm": 0.9132906756275612, "learning_rate": 5.713074385969457e-06, "loss": 0.5172, "step": 8020 }, { "epoch": 0.65, "grad_norm": 0.8406249617881854, "learning_rate": 5.710696226133905e-06, "loss": 0.4563, "step": 8021 }, { "epoch": 0.65, "grad_norm": 0.9288896517388993, "learning_rate": 5.708318363543297e-06, "loss": 0.4847, "step": 8022 }, { "epoch": 0.65, "grad_norm": 0.9714000918656338, "learning_rate": 5.705940798362417e-06, "loss": 0.5423, "step": 8023 }, { "epoch": 0.65, "grad_norm": 0.7738297426860237, "learning_rate": 5.703563530756033e-06, "loss": 0.4568, "step": 8024 }, { "epoch": 0.65, "grad_norm": 0.9015248782470349, "learning_rate": 5.701186560888885e-06, "loss": 0.4834, "step": 8025 }, { "epoch": 0.65, "grad_norm": 0.9615545542578514, "learning_rate": 5.698809888925696e-06, "loss": 0.4691, "step": 8026 }, { "epoch": 0.65, "grad_norm": 0.8640647567153519, "learning_rate": 5.696433515031169e-06, "loss": 0.4209, "step": 8027 }, { "epoch": 0.65, "grad_norm": 0.8546615520462587, "learning_rate": 5.694057439369979e-06, "loss": 0.4583, "step": 8028 }, { "epoch": 0.65, "grad_norm": 0.8921814771572696, "learning_rate": 5.6916816621067895e-06, "loss": 0.5259, "step": 8029 }, { "epoch": 0.65, "grad_norm": 0.8644158011998065, "learning_rate": 5.689306183406238e-06, "loss": 0.5409, "step": 8030 }, { "epoch": 0.65, "grad_norm": 0.9855746769335014, "learning_rate": 5.686931003432945e-06, "loss": 0.498, "step": 8031 }, { "epoch": 0.65, "grad_norm": 0.8982014572232642, "learning_rate": 5.684556122351508e-06, "loss": 0.4861, "step": 8032 }, { "epoch": 0.65, "grad_norm": 0.925016604280845, "learning_rate": 5.682181540326503e-06, "loss": 0.4698, "step": 8033 }, { "epoch": 0.65, "grad_norm": 0.9717081180975732, "learning_rate": 5.679807257522493e-06, "loss": 0.4723, "step": 8034 }, { "epoch": 0.65, "grad_norm": 0.8842071042921347, "learning_rate": 5.677433274104003e-06, "loss": 0.546, "step": 8035 }, { "epoch": 0.65, "grad_norm": 0.9502961755883366, "learning_rate": 5.675059590235553e-06, "loss": 0.5469, "step": 8036 }, { "epoch": 0.65, "grad_norm": 0.9981482851768781, "learning_rate": 5.672686206081638e-06, "loss": 0.5291, "step": 8037 }, { "epoch": 0.65, "grad_norm": 0.9835649935761642, "learning_rate": 5.67031312180673e-06, "loss": 0.5477, "step": 8038 }, { "epoch": 0.65, "grad_norm": 0.8502872797613393, "learning_rate": 5.6679403375752816e-06, "loss": 0.5428, "step": 8039 }, { "epoch": 0.65, "grad_norm": 0.9344665940312018, "learning_rate": 5.66556785355173e-06, "loss": 0.5195, "step": 8040 }, { "epoch": 0.65, "grad_norm": 0.9192075507681627, "learning_rate": 5.663195669900479e-06, "loss": 0.4879, "step": 8041 }, { "epoch": 0.65, "grad_norm": 0.986064051073003, "learning_rate": 5.6608237867859184e-06, "loss": 0.5824, "step": 8042 }, { "epoch": 0.65, "grad_norm": 0.8066618730198377, "learning_rate": 5.6584522043724226e-06, "loss": 0.4839, "step": 8043 }, { "epoch": 0.65, "grad_norm": 0.9818638450965491, "learning_rate": 5.656080922824337e-06, "loss": 0.5366, "step": 8044 }, { "epoch": 0.65, "grad_norm": 0.8365237997024432, "learning_rate": 5.65370994230599e-06, "loss": 0.4576, "step": 8045 }, { "epoch": 0.65, "grad_norm": 0.9445525770789083, "learning_rate": 5.651339262981694e-06, "loss": 0.5432, "step": 8046 }, { "epoch": 0.65, "grad_norm": 0.8473995922041537, "learning_rate": 5.648968885015726e-06, "loss": 0.4734, "step": 8047 }, { "epoch": 0.65, "grad_norm": 0.9149444795504126, "learning_rate": 5.646598808572355e-06, "loss": 0.5067, "step": 8048 }, { "epoch": 0.65, "grad_norm": 0.9382812760027435, "learning_rate": 5.644229033815823e-06, "loss": 0.5029, "step": 8049 }, { "epoch": 0.65, "grad_norm": 0.9131763915953596, "learning_rate": 5.641859560910356e-06, "loss": 0.5207, "step": 8050 }, { "epoch": 0.65, "grad_norm": 0.9083095218043912, "learning_rate": 5.639490390020158e-06, "loss": 0.4829, "step": 8051 }, { "epoch": 0.65, "grad_norm": 0.9217744465084563, "learning_rate": 5.637121521309411e-06, "loss": 0.5367, "step": 8052 }, { "epoch": 0.65, "grad_norm": 0.8950244905018768, "learning_rate": 5.634752954942264e-06, "loss": 0.4848, "step": 8053 }, { "epoch": 0.65, "grad_norm": 0.9220101284527779, "learning_rate": 5.632384691082874e-06, "loss": 0.4886, "step": 8054 }, { "epoch": 0.65, "grad_norm": 0.8510786318647385, "learning_rate": 5.630016729895346e-06, "loss": 0.4604, "step": 8055 }, { "epoch": 0.65, "grad_norm": 1.0006639059946054, "learning_rate": 5.627649071543784e-06, "loss": 0.5222, "step": 8056 }, { "epoch": 0.65, "grad_norm": 0.9580343958505725, "learning_rate": 5.6252817161922616e-06, "loss": 0.5535, "step": 8057 }, { "epoch": 0.65, "grad_norm": 0.9379452625036485, "learning_rate": 5.6229146640048415e-06, "loss": 0.5168, "step": 8058 }, { "epoch": 0.65, "grad_norm": 0.872992477532868, "learning_rate": 5.620547915145542e-06, "loss": 0.5281, "step": 8059 }, { "epoch": 0.66, "grad_norm": 0.7956540694611413, "learning_rate": 5.618181469778399e-06, "loss": 0.4395, "step": 8060 }, { "epoch": 0.66, "grad_norm": 0.8921079894021173, "learning_rate": 5.615815328067387e-06, "loss": 0.4747, "step": 8061 }, { "epoch": 0.66, "grad_norm": 0.8262492322669378, "learning_rate": 5.613449490176484e-06, "loss": 0.4567, "step": 8062 }, { "epoch": 0.66, "grad_norm": 0.8583148399648072, "learning_rate": 5.6110839562696404e-06, "loss": 0.4694, "step": 8063 }, { "epoch": 0.66, "grad_norm": 0.9328997683673895, "learning_rate": 5.608718726510791e-06, "loss": 0.5111, "step": 8064 }, { "epoch": 0.66, "grad_norm": 0.8097909207413837, "learning_rate": 5.60635380106383e-06, "loss": 0.4543, "step": 8065 }, { "epoch": 0.66, "grad_norm": 0.863117123650207, "learning_rate": 5.603989180092661e-06, "loss": 0.4795, "step": 8066 }, { "epoch": 0.66, "grad_norm": 0.9068547480911131, "learning_rate": 5.6016248637611395e-06, "loss": 0.4863, "step": 8067 }, { "epoch": 0.66, "grad_norm": 0.8251512618816547, "learning_rate": 5.5992608522331126e-06, "loss": 0.4601, "step": 8068 }, { "epoch": 0.66, "grad_norm": 0.9337837799868443, "learning_rate": 5.596897145672407e-06, "loss": 0.537, "step": 8069 }, { "epoch": 0.66, "grad_norm": 0.8894318690847395, "learning_rate": 5.594533744242825e-06, "loss": 0.4685, "step": 8070 }, { "epoch": 0.66, "grad_norm": 0.8633650959858853, "learning_rate": 5.5921706481081405e-06, "loss": 0.536, "step": 8071 }, { "epoch": 0.66, "grad_norm": 0.9743246388023774, "learning_rate": 5.589807857432128e-06, "loss": 0.5396, "step": 8072 }, { "epoch": 0.66, "grad_norm": 0.9548473977649928, "learning_rate": 5.587445372378515e-06, "loss": 0.5391, "step": 8073 }, { "epoch": 0.66, "grad_norm": 0.8957082635019312, "learning_rate": 5.585083193111025e-06, "loss": 0.4466, "step": 8074 }, { "epoch": 0.66, "grad_norm": 0.9149137254325227, "learning_rate": 5.582721319793351e-06, "loss": 0.4543, "step": 8075 }, { "epoch": 0.66, "grad_norm": 0.9338244908287366, "learning_rate": 5.580359752589178e-06, "loss": 0.4991, "step": 8076 }, { "epoch": 0.66, "grad_norm": 0.9282335254284353, "learning_rate": 5.5779984916621455e-06, "loss": 0.5303, "step": 8077 }, { "epoch": 0.66, "grad_norm": 0.9862482780242752, "learning_rate": 5.575637537175902e-06, "loss": 0.4957, "step": 8078 }, { "epoch": 0.66, "grad_norm": 0.7528909278795783, "learning_rate": 5.57327688929405e-06, "loss": 0.4195, "step": 8079 }, { "epoch": 0.66, "grad_norm": 0.9557139196501997, "learning_rate": 5.570916548180183e-06, "loss": 0.4914, "step": 8080 }, { "epoch": 0.66, "grad_norm": 0.8315133258006441, "learning_rate": 5.568556513997869e-06, "loss": 0.4918, "step": 8081 }, { "epoch": 0.66, "grad_norm": 1.0828275890412324, "learning_rate": 5.566196786910665e-06, "loss": 0.564, "step": 8082 }, { "epoch": 0.66, "grad_norm": 0.9521834237171527, "learning_rate": 5.56383736708208e-06, "loss": 0.4365, "step": 8083 }, { "epoch": 0.66, "grad_norm": 0.8505018319958945, "learning_rate": 5.561478254675639e-06, "loss": 0.4845, "step": 8084 }, { "epoch": 0.66, "grad_norm": 0.9605289024212322, "learning_rate": 5.559119449854815e-06, "loss": 0.5306, "step": 8085 }, { "epoch": 0.66, "grad_norm": 1.0344634196486486, "learning_rate": 5.556760952783073e-06, "loss": 0.4886, "step": 8086 }, { "epoch": 0.66, "grad_norm": 0.9630866882564898, "learning_rate": 5.554402763623857e-06, "loss": 0.4603, "step": 8087 }, { "epoch": 0.66, "grad_norm": 0.8845755841128609, "learning_rate": 5.55204488254059e-06, "loss": 0.4963, "step": 8088 }, { "epoch": 0.66, "grad_norm": 0.927336189724174, "learning_rate": 5.549687309696658e-06, "loss": 0.5091, "step": 8089 }, { "epoch": 0.66, "grad_norm": 0.9453205589847619, "learning_rate": 5.547330045255458e-06, "loss": 0.5375, "step": 8090 }, { "epoch": 0.66, "grad_norm": 0.9241048097039017, "learning_rate": 5.5449730893803326e-06, "loss": 0.4709, "step": 8091 }, { "epoch": 0.66, "grad_norm": 0.8701604477982022, "learning_rate": 5.542616442234618e-06, "loss": 0.4896, "step": 8092 }, { "epoch": 0.66, "grad_norm": 1.0247372373451231, "learning_rate": 5.5402601039816315e-06, "loss": 0.5078, "step": 8093 }, { "epoch": 0.66, "grad_norm": 0.9368300927082287, "learning_rate": 5.537904074784668e-06, "loss": 0.5267, "step": 8094 }, { "epoch": 0.66, "grad_norm": 0.8953371671011336, "learning_rate": 5.5355483548069866e-06, "loss": 0.4929, "step": 8095 }, { "epoch": 0.66, "grad_norm": 0.8632314786605364, "learning_rate": 5.533192944211852e-06, "loss": 0.4403, "step": 8096 }, { "epoch": 0.66, "grad_norm": 0.9249710661785733, "learning_rate": 5.53083784316248e-06, "loss": 0.5305, "step": 8097 }, { "epoch": 0.66, "grad_norm": 0.9478487611876848, "learning_rate": 5.528483051822082e-06, "loss": 0.485, "step": 8098 }, { "epoch": 0.66, "grad_norm": 1.1189668918359563, "learning_rate": 5.526128570353842e-06, "loss": 0.5108, "step": 8099 }, { "epoch": 0.66, "grad_norm": 1.011156273732329, "learning_rate": 5.523774398920927e-06, "loss": 0.5158, "step": 8100 }, { "epoch": 0.66, "grad_norm": 0.8582598832505969, "learning_rate": 5.521420537686468e-06, "loss": 0.5109, "step": 8101 }, { "epoch": 0.66, "grad_norm": 0.890431587765962, "learning_rate": 5.519066986813602e-06, "loss": 0.5406, "step": 8102 }, { "epoch": 0.66, "grad_norm": 0.9234301484449461, "learning_rate": 5.5167137464654155e-06, "loss": 0.5037, "step": 8103 }, { "epoch": 0.66, "grad_norm": 0.8827748934064557, "learning_rate": 5.514360816804989e-06, "loss": 0.5078, "step": 8104 }, { "epoch": 0.66, "grad_norm": 0.9646521521805869, "learning_rate": 5.512008197995379e-06, "loss": 0.5017, "step": 8105 }, { "epoch": 0.66, "grad_norm": 0.9690885937124468, "learning_rate": 5.5096558901996235e-06, "loss": 0.5432, "step": 8106 }, { "epoch": 0.66, "grad_norm": 0.8448217293276843, "learning_rate": 5.507303893580724e-06, "loss": 0.4992, "step": 8107 }, { "epoch": 0.66, "grad_norm": 0.9631252639189662, "learning_rate": 5.5049522083016895e-06, "loss": 0.5089, "step": 8108 }, { "epoch": 0.66, "grad_norm": 0.9643236838561009, "learning_rate": 5.502600834525475e-06, "loss": 0.5508, "step": 8109 }, { "epoch": 0.66, "grad_norm": 0.9953018292367835, "learning_rate": 5.500249772415033e-06, "loss": 0.5355, "step": 8110 }, { "epoch": 0.66, "grad_norm": 0.8863125003391487, "learning_rate": 5.49789902213329e-06, "loss": 0.543, "step": 8111 }, { "epoch": 0.66, "grad_norm": 0.8268057293234355, "learning_rate": 5.495548583843155e-06, "loss": 0.4978, "step": 8112 }, { "epoch": 0.66, "grad_norm": 0.8258851400525931, "learning_rate": 5.493198457707503e-06, "loss": 0.4619, "step": 8113 }, { "epoch": 0.66, "grad_norm": 0.9138777270458119, "learning_rate": 5.490848643889205e-06, "loss": 0.4819, "step": 8114 }, { "epoch": 0.66, "grad_norm": 0.8789485096300262, "learning_rate": 5.488499142551094e-06, "loss": 0.4584, "step": 8115 }, { "epoch": 0.66, "grad_norm": 0.9067069193255343, "learning_rate": 5.48614995385599e-06, "loss": 0.5312, "step": 8116 }, { "epoch": 0.66, "grad_norm": 0.9639711693883865, "learning_rate": 5.48380107796669e-06, "loss": 0.474, "step": 8117 }, { "epoch": 0.66, "grad_norm": 0.9871386684830713, "learning_rate": 5.481452515045974e-06, "loss": 0.5496, "step": 8118 }, { "epoch": 0.66, "grad_norm": 0.856246256936417, "learning_rate": 5.479104265256583e-06, "loss": 0.4469, "step": 8119 }, { "epoch": 0.66, "grad_norm": 0.9666843510279574, "learning_rate": 5.476756328761264e-06, "loss": 0.4889, "step": 8120 }, { "epoch": 0.66, "grad_norm": 0.8589689713812814, "learning_rate": 5.474408705722716e-06, "loss": 0.4909, "step": 8121 }, { "epoch": 0.66, "grad_norm": 0.8871792197882404, "learning_rate": 5.47206139630363e-06, "loss": 0.4712, "step": 8122 }, { "epoch": 0.66, "grad_norm": 0.8684447485140943, "learning_rate": 5.469714400666673e-06, "loss": 0.4664, "step": 8123 }, { "epoch": 0.66, "grad_norm": 0.9112182769895082, "learning_rate": 5.467367718974492e-06, "loss": 0.5288, "step": 8124 }, { "epoch": 0.66, "grad_norm": 0.8856249596176711, "learning_rate": 5.465021351389702e-06, "loss": 0.5061, "step": 8125 }, { "epoch": 0.66, "grad_norm": 1.0244913612133435, "learning_rate": 5.462675298074918e-06, "loss": 0.5295, "step": 8126 }, { "epoch": 0.66, "grad_norm": 0.8781385330939273, "learning_rate": 5.460329559192705e-06, "loss": 0.4622, "step": 8127 }, { "epoch": 0.66, "grad_norm": 0.9235091415547136, "learning_rate": 5.4579841349056285e-06, "loss": 0.548, "step": 8128 }, { "epoch": 0.66, "grad_norm": 0.8837638906811883, "learning_rate": 5.455639025376223e-06, "loss": 0.4918, "step": 8129 }, { "epoch": 0.66, "grad_norm": 0.9490852904400812, "learning_rate": 5.453294230767005e-06, "loss": 0.5258, "step": 8130 }, { "epoch": 0.66, "grad_norm": 0.8936978817795787, "learning_rate": 5.450949751240456e-06, "loss": 0.5531, "step": 8131 }, { "epoch": 0.66, "grad_norm": 0.8136657702496216, "learning_rate": 5.448605586959063e-06, "loss": 0.419, "step": 8132 }, { "epoch": 0.66, "grad_norm": 0.8734233583149454, "learning_rate": 5.446261738085261e-06, "loss": 0.4688, "step": 8133 }, { "epoch": 0.66, "grad_norm": 0.9808667220509426, "learning_rate": 5.443918204781482e-06, "loss": 0.5776, "step": 8134 }, { "epoch": 0.66, "grad_norm": 0.9728198474455235, "learning_rate": 5.44157498721013e-06, "loss": 0.4703, "step": 8135 }, { "epoch": 0.66, "grad_norm": 0.8547436340944682, "learning_rate": 5.439232085533592e-06, "loss": 0.4995, "step": 8136 }, { "epoch": 0.66, "grad_norm": 1.037319351927126, "learning_rate": 5.436889499914218e-06, "loss": 0.5527, "step": 8137 }, { "epoch": 0.66, "grad_norm": 0.9336300856188522, "learning_rate": 5.43454723051436e-06, "loss": 0.5334, "step": 8138 }, { "epoch": 0.66, "grad_norm": 0.91337608051943, "learning_rate": 5.432205277496327e-06, "loss": 0.4993, "step": 8139 }, { "epoch": 0.66, "grad_norm": 0.9486914224331876, "learning_rate": 5.429863641022416e-06, "loss": 0.5073, "step": 8140 }, { "epoch": 0.66, "grad_norm": 0.9595276005602759, "learning_rate": 5.427522321254901e-06, "loss": 0.5485, "step": 8141 }, { "epoch": 0.66, "grad_norm": 0.9541951421866232, "learning_rate": 5.425181318356035e-06, "loss": 0.5148, "step": 8142 }, { "epoch": 0.66, "grad_norm": 0.8624392948766714, "learning_rate": 5.4228406324880434e-06, "loss": 0.503, "step": 8143 }, { "epoch": 0.66, "grad_norm": 0.9007306472206367, "learning_rate": 5.420500263813141e-06, "loss": 0.5872, "step": 8144 }, { "epoch": 0.66, "grad_norm": 0.9184533974463028, "learning_rate": 5.418160212493501e-06, "loss": 0.5086, "step": 8145 }, { "epoch": 0.66, "grad_norm": 0.9301255956721959, "learning_rate": 5.415820478691301e-06, "loss": 0.6116, "step": 8146 }, { "epoch": 0.66, "grad_norm": 0.8777358719757851, "learning_rate": 5.413481062568672e-06, "loss": 0.4959, "step": 8147 }, { "epoch": 0.66, "grad_norm": 0.944307831775758, "learning_rate": 5.411141964287737e-06, "loss": 0.5362, "step": 8148 }, { "epoch": 0.66, "grad_norm": 0.9121351906003211, "learning_rate": 5.408803184010593e-06, "loss": 0.544, "step": 8149 }, { "epoch": 0.66, "grad_norm": 0.8415452275491403, "learning_rate": 5.406464721899323e-06, "loss": 0.497, "step": 8150 }, { "epoch": 0.66, "grad_norm": 0.9062392888284309, "learning_rate": 5.404126578115962e-06, "loss": 0.5131, "step": 8151 }, { "epoch": 0.66, "grad_norm": 0.98075689579646, "learning_rate": 5.401788752822562e-06, "loss": 0.5472, "step": 8152 }, { "epoch": 0.66, "grad_norm": 0.970371527601638, "learning_rate": 5.399451246181118e-06, "loss": 0.5397, "step": 8153 }, { "epoch": 0.66, "grad_norm": 0.9526438115570307, "learning_rate": 5.397114058353623e-06, "loss": 0.5376, "step": 8154 }, { "epoch": 0.66, "grad_norm": 0.8734375021626799, "learning_rate": 5.39477718950204e-06, "loss": 0.4983, "step": 8155 }, { "epoch": 0.66, "grad_norm": 0.9938007613913521, "learning_rate": 5.3924406397883174e-06, "loss": 0.536, "step": 8156 }, { "epoch": 0.66, "grad_norm": 1.0030332858642188, "learning_rate": 5.390104409374364e-06, "loss": 0.5175, "step": 8157 }, { "epoch": 0.66, "grad_norm": 0.9435118882877143, "learning_rate": 5.3877684984220945e-06, "loss": 0.5013, "step": 8158 }, { "epoch": 0.66, "grad_norm": 0.9512703874552919, "learning_rate": 5.385432907093371e-06, "loss": 0.5388, "step": 8159 }, { "epoch": 0.66, "grad_norm": 0.873374334708962, "learning_rate": 5.383097635550057e-06, "loss": 0.5086, "step": 8160 }, { "epoch": 0.66, "grad_norm": 1.0269739332825232, "learning_rate": 5.380762683953978e-06, "loss": 0.464, "step": 8161 }, { "epoch": 0.66, "grad_norm": 0.8972458312913809, "learning_rate": 5.37842805246695e-06, "loss": 0.5318, "step": 8162 }, { "epoch": 0.66, "grad_norm": 0.987479872006018, "learning_rate": 5.376093741250758e-06, "loss": 0.522, "step": 8163 }, { "epoch": 0.66, "grad_norm": 0.8746417642079718, "learning_rate": 5.373759750467173e-06, "loss": 0.4816, "step": 8164 }, { "epoch": 0.66, "grad_norm": 0.9936009159063266, "learning_rate": 5.371426080277928e-06, "loss": 0.5823, "step": 8165 }, { "epoch": 0.66, "grad_norm": 0.8921448928229553, "learning_rate": 5.369092730844752e-06, "loss": 0.5245, "step": 8166 }, { "epoch": 0.66, "grad_norm": 0.8386162750995261, "learning_rate": 5.366759702329343e-06, "loss": 0.4778, "step": 8167 }, { "epoch": 0.66, "grad_norm": 0.8330787084771702, "learning_rate": 5.364426994893375e-06, "loss": 0.483, "step": 8168 }, { "epoch": 0.66, "grad_norm": 0.9387651680510244, "learning_rate": 5.362094608698505e-06, "loss": 0.5306, "step": 8169 }, { "epoch": 0.66, "grad_norm": 0.8747296879461391, "learning_rate": 5.3597625439063685e-06, "loss": 0.486, "step": 8170 }, { "epoch": 0.66, "grad_norm": 0.9628154040176343, "learning_rate": 5.357430800678568e-06, "loss": 0.5462, "step": 8171 }, { "epoch": 0.66, "grad_norm": 0.8612974069576437, "learning_rate": 5.3550993791766955e-06, "loss": 0.4286, "step": 8172 }, { "epoch": 0.66, "grad_norm": 1.026048945583325, "learning_rate": 5.352768279562315e-06, "loss": 0.5305, "step": 8173 }, { "epoch": 0.66, "grad_norm": 0.852733927613396, "learning_rate": 5.350437501996972e-06, "loss": 0.5464, "step": 8174 }, { "epoch": 0.66, "grad_norm": 0.9309496207123703, "learning_rate": 5.348107046642186e-06, "loss": 0.535, "step": 8175 }, { "epoch": 0.66, "grad_norm": 0.9011431585683592, "learning_rate": 5.345776913659458e-06, "loss": 0.4597, "step": 8176 }, { "epoch": 0.66, "grad_norm": 0.9514530227899883, "learning_rate": 5.343447103210257e-06, "loss": 0.4818, "step": 8177 }, { "epoch": 0.66, "grad_norm": 0.9556001417671066, "learning_rate": 5.341117615456044e-06, "loss": 0.482, "step": 8178 }, { "epoch": 0.66, "grad_norm": 0.9221886068743322, "learning_rate": 5.338788450558246e-06, "loss": 0.4494, "step": 8179 }, { "epoch": 0.66, "grad_norm": 0.9579423098386334, "learning_rate": 5.336459608678275e-06, "loss": 0.5036, "step": 8180 }, { "epoch": 0.66, "grad_norm": 0.9695762173510455, "learning_rate": 5.334131089977516e-06, "loss": 0.5367, "step": 8181 }, { "epoch": 0.66, "grad_norm": 0.9761452544087317, "learning_rate": 5.331802894617333e-06, "loss": 0.5363, "step": 8182 }, { "epoch": 0.67, "grad_norm": 0.9141851363212332, "learning_rate": 5.329475022759074e-06, "loss": 0.5249, "step": 8183 }, { "epoch": 0.67, "grad_norm": 0.9093173516423304, "learning_rate": 5.32714747456405e-06, "loss": 0.4814, "step": 8184 }, { "epoch": 0.67, "grad_norm": 0.8240847030191094, "learning_rate": 5.324820250193559e-06, "loss": 0.4586, "step": 8185 }, { "epoch": 0.67, "grad_norm": 0.9576069962900918, "learning_rate": 5.322493349808878e-06, "loss": 0.5164, "step": 8186 }, { "epoch": 0.67, "grad_norm": 0.8550129467805008, "learning_rate": 5.32016677357126e-06, "loss": 0.4515, "step": 8187 }, { "epoch": 0.67, "grad_norm": 0.989239559909075, "learning_rate": 5.3178405216419325e-06, "loss": 0.5694, "step": 8188 }, { "epoch": 0.67, "grad_norm": 0.8402449147196775, "learning_rate": 5.31551459418211e-06, "loss": 0.4815, "step": 8189 }, { "epoch": 0.67, "grad_norm": 0.929650535088433, "learning_rate": 5.313188991352964e-06, "loss": 0.4851, "step": 8190 }, { "epoch": 0.67, "grad_norm": 0.8608747172187374, "learning_rate": 5.310863713315666e-06, "loss": 0.5316, "step": 8191 }, { "epoch": 0.67, "grad_norm": 0.8601414518555403, "learning_rate": 5.308538760231352e-06, "loss": 0.48, "step": 8192 }, { "epoch": 0.67, "grad_norm": 0.8621843116643283, "learning_rate": 5.306214132261141e-06, "loss": 0.5108, "step": 8193 }, { "epoch": 0.67, "grad_norm": 0.9873625363606519, "learning_rate": 5.303889829566128e-06, "loss": 0.5105, "step": 8194 }, { "epoch": 0.67, "grad_norm": 0.9533788876320213, "learning_rate": 5.301565852307388e-06, "loss": 0.562, "step": 8195 }, { "epoch": 0.67, "grad_norm": 0.9941344908746977, "learning_rate": 5.299242200645959e-06, "loss": 0.5008, "step": 8196 }, { "epoch": 0.67, "grad_norm": 0.8709593447651172, "learning_rate": 5.296918874742882e-06, "loss": 0.3924, "step": 8197 }, { "epoch": 0.67, "grad_norm": 0.889954834129633, "learning_rate": 5.294595874759154e-06, "loss": 0.483, "step": 8198 }, { "epoch": 0.67, "grad_norm": 0.8876593394083251, "learning_rate": 5.292273200855758e-06, "loss": 0.4491, "step": 8199 }, { "epoch": 0.67, "grad_norm": 0.8524918627473157, "learning_rate": 5.2899508531936526e-06, "loss": 0.5124, "step": 8200 }, { "epoch": 0.67, "grad_norm": 0.9321934019395298, "learning_rate": 5.2876288319337785e-06, "loss": 0.5354, "step": 8201 }, { "epoch": 0.67, "grad_norm": 0.9706937675078717, "learning_rate": 5.285307137237039e-06, "loss": 0.5392, "step": 8202 }, { "epoch": 0.67, "grad_norm": 0.888202310462275, "learning_rate": 5.282985769264342e-06, "loss": 0.4573, "step": 8203 }, { "epoch": 0.67, "grad_norm": 0.9661150146788169, "learning_rate": 5.280664728176542e-06, "loss": 0.5915, "step": 8204 }, { "epoch": 0.67, "grad_norm": 0.8687295099552259, "learning_rate": 5.278344014134491e-06, "loss": 0.4291, "step": 8205 }, { "epoch": 0.67, "grad_norm": 0.9305137050651606, "learning_rate": 5.276023627299011e-06, "loss": 0.545, "step": 8206 }, { "epoch": 0.67, "grad_norm": 0.9249269297925938, "learning_rate": 5.273703567830908e-06, "loss": 0.5775, "step": 8207 }, { "epoch": 0.67, "grad_norm": 0.8719197817568556, "learning_rate": 5.271383835890947e-06, "loss": 0.5072, "step": 8208 }, { "epoch": 0.67, "grad_norm": 1.035278658434689, "learning_rate": 5.269064431639901e-06, "loss": 0.5533, "step": 8209 }, { "epoch": 0.67, "grad_norm": 0.8978940252138314, "learning_rate": 5.266745355238489e-06, "loss": 0.5206, "step": 8210 }, { "epoch": 0.67, "grad_norm": 0.9912583159987712, "learning_rate": 5.264426606847426e-06, "loss": 0.5126, "step": 8211 }, { "epoch": 0.67, "grad_norm": 0.9517846552457596, "learning_rate": 5.262108186627397e-06, "loss": 0.5587, "step": 8212 }, { "epoch": 0.67, "grad_norm": 0.8812396536972811, "learning_rate": 5.259790094739073e-06, "loss": 0.4878, "step": 8213 }, { "epoch": 0.67, "grad_norm": 0.9195417195160543, "learning_rate": 5.257472331343083e-06, "loss": 0.5044, "step": 8214 }, { "epoch": 0.67, "grad_norm": 0.8872421603933094, "learning_rate": 5.2551548966000635e-06, "loss": 0.5249, "step": 8215 }, { "epoch": 0.67, "grad_norm": 0.9310784935184504, "learning_rate": 5.252837790670595e-06, "loss": 0.5033, "step": 8216 }, { "epoch": 0.67, "grad_norm": 0.997219302064515, "learning_rate": 5.250521013715257e-06, "loss": 0.5841, "step": 8217 }, { "epoch": 0.67, "grad_norm": 0.9227493754424385, "learning_rate": 5.2482045658946e-06, "loss": 0.5427, "step": 8218 }, { "epoch": 0.67, "grad_norm": 0.8252507468536766, "learning_rate": 5.245888447369157e-06, "loss": 0.4472, "step": 8219 }, { "epoch": 0.67, "grad_norm": 0.8750538145933262, "learning_rate": 5.243572658299418e-06, "loss": 0.5236, "step": 8220 }, { "epoch": 0.67, "grad_norm": 0.8899187586334513, "learning_rate": 5.241257198845884e-06, "loss": 0.497, "step": 8221 }, { "epoch": 0.67, "grad_norm": 0.9948214678414993, "learning_rate": 5.238942069169e-06, "loss": 0.5945, "step": 8222 }, { "epoch": 0.67, "grad_norm": 0.9692887420393211, "learning_rate": 5.236627269429208e-06, "loss": 0.535, "step": 8223 }, { "epoch": 0.67, "grad_norm": 0.990515988397188, "learning_rate": 5.234312799786921e-06, "loss": 0.4657, "step": 8224 }, { "epoch": 0.67, "grad_norm": 0.9534115892704664, "learning_rate": 5.231998660402535e-06, "loss": 0.5732, "step": 8225 }, { "epoch": 0.67, "grad_norm": 1.0066116250736143, "learning_rate": 5.229684851436403e-06, "loss": 0.4618, "step": 8226 }, { "epoch": 0.67, "grad_norm": 1.0698888040079477, "learning_rate": 5.2273713730488886e-06, "loss": 0.4996, "step": 8227 }, { "epoch": 0.67, "grad_norm": 0.9414620310160213, "learning_rate": 5.2250582254003016e-06, "loss": 0.4974, "step": 8228 }, { "epoch": 0.67, "grad_norm": 0.9323123866798388, "learning_rate": 5.222745408650942e-06, "loss": 0.5358, "step": 8229 }, { "epoch": 0.67, "grad_norm": 0.9696890580161104, "learning_rate": 5.220432922961089e-06, "loss": 0.552, "step": 8230 }, { "epoch": 0.67, "grad_norm": 0.7697669490610657, "learning_rate": 5.218120768491e-06, "loss": 0.3773, "step": 8231 }, { "epoch": 0.67, "grad_norm": 0.822697108368092, "learning_rate": 5.215808945400891e-06, "loss": 0.5099, "step": 8232 }, { "epoch": 0.67, "grad_norm": 0.9367819561071206, "learning_rate": 5.213497453850986e-06, "loss": 0.4753, "step": 8233 }, { "epoch": 0.67, "grad_norm": 0.9265513771122531, "learning_rate": 5.21118629400146e-06, "loss": 0.4978, "step": 8234 }, { "epoch": 0.67, "grad_norm": 0.8839751231544198, "learning_rate": 5.208875466012475e-06, "loss": 0.4512, "step": 8235 }, { "epoch": 0.67, "grad_norm": 0.853117385808319, "learning_rate": 5.20656497004417e-06, "loss": 0.4687, "step": 8236 }, { "epoch": 0.67, "grad_norm": 0.8691450740963526, "learning_rate": 5.2042548062566654e-06, "loss": 0.4995, "step": 8237 }, { "epoch": 0.67, "grad_norm": 0.9239210980343396, "learning_rate": 5.2019449748100405e-06, "loss": 0.4635, "step": 8238 }, { "epoch": 0.67, "grad_norm": 0.8869835721887451, "learning_rate": 5.19963547586438e-06, "loss": 0.4768, "step": 8239 }, { "epoch": 0.67, "grad_norm": 0.8739268679710483, "learning_rate": 5.197326309579721e-06, "loss": 0.5038, "step": 8240 }, { "epoch": 0.67, "grad_norm": 0.7983910758111474, "learning_rate": 5.195017476116089e-06, "loss": 0.4672, "step": 8241 }, { "epoch": 0.67, "grad_norm": 0.9395625394613684, "learning_rate": 5.192708975633483e-06, "loss": 0.5245, "step": 8242 }, { "epoch": 0.67, "grad_norm": 0.8647830622368983, "learning_rate": 5.190400808291884e-06, "loss": 0.4765, "step": 8243 }, { "epoch": 0.67, "grad_norm": 0.8477915105418694, "learning_rate": 5.1880929742512355e-06, "loss": 0.4986, "step": 8244 }, { "epoch": 0.67, "grad_norm": 0.9637514843118539, "learning_rate": 5.185785473671484e-06, "loss": 0.5349, "step": 8245 }, { "epoch": 0.67, "grad_norm": 0.8844263857942123, "learning_rate": 5.183478306712525e-06, "loss": 0.4652, "step": 8246 }, { "epoch": 0.67, "grad_norm": 0.9204972924084194, "learning_rate": 5.181171473534248e-06, "loss": 0.5243, "step": 8247 }, { "epoch": 0.67, "grad_norm": 0.987097280515057, "learning_rate": 5.178864974296511e-06, "loss": 0.5505, "step": 8248 }, { "epoch": 0.67, "grad_norm": 0.8715329192132406, "learning_rate": 5.176558809159161e-06, "loss": 0.473, "step": 8249 }, { "epoch": 0.67, "grad_norm": 0.7745699602973506, "learning_rate": 5.174252978281999e-06, "loss": 0.449, "step": 8250 }, { "epoch": 0.67, "grad_norm": 0.9377200872321961, "learning_rate": 5.171947481824832e-06, "loss": 0.4785, "step": 8251 }, { "epoch": 0.67, "grad_norm": 1.0250797101577, "learning_rate": 5.16964231994742e-06, "loss": 0.527, "step": 8252 }, { "epoch": 0.67, "grad_norm": 0.9173853321475266, "learning_rate": 5.16733749280951e-06, "loss": 0.5303, "step": 8253 }, { "epoch": 0.67, "grad_norm": 0.8971832660446327, "learning_rate": 5.165033000570825e-06, "loss": 0.5138, "step": 8254 }, { "epoch": 0.67, "grad_norm": 0.9360969938703607, "learning_rate": 5.162728843391067e-06, "loss": 0.532, "step": 8255 }, { "epoch": 0.67, "grad_norm": 0.8989978474119811, "learning_rate": 5.160425021429904e-06, "loss": 0.4812, "step": 8256 }, { "epoch": 0.67, "grad_norm": 1.0203611815836584, "learning_rate": 5.158121534847e-06, "loss": 0.4886, "step": 8257 }, { "epoch": 0.67, "grad_norm": 0.9387756661800578, "learning_rate": 5.155818383801976e-06, "loss": 0.4807, "step": 8258 }, { "epoch": 0.67, "grad_norm": 0.9140901137656047, "learning_rate": 5.153515568454441e-06, "loss": 0.4608, "step": 8259 }, { "epoch": 0.67, "grad_norm": 0.8470184521435462, "learning_rate": 5.1512130889639785e-06, "loss": 0.4343, "step": 8260 }, { "epoch": 0.67, "grad_norm": 0.9058316582478934, "learning_rate": 5.148910945490152e-06, "loss": 0.5013, "step": 8261 }, { "epoch": 0.67, "grad_norm": 0.9659521040360318, "learning_rate": 5.1466091381924864e-06, "loss": 0.5635, "step": 8262 }, { "epoch": 0.67, "grad_norm": 0.8385312611403691, "learning_rate": 5.144307667230511e-06, "loss": 0.4924, "step": 8263 }, { "epoch": 0.67, "grad_norm": 0.8757450526978969, "learning_rate": 5.142006532763698e-06, "loss": 0.4793, "step": 8264 }, { "epoch": 0.67, "grad_norm": 0.9538110402970061, "learning_rate": 5.139705734951532e-06, "loss": 0.5155, "step": 8265 }, { "epoch": 0.67, "grad_norm": 0.9180442618190727, "learning_rate": 5.137405273953443e-06, "loss": 0.4495, "step": 8266 }, { "epoch": 0.67, "grad_norm": 0.9031867790761441, "learning_rate": 5.1351051499288565e-06, "loss": 0.462, "step": 8267 }, { "epoch": 0.67, "grad_norm": 0.8721581631393269, "learning_rate": 5.1328053630371656e-06, "loss": 0.4448, "step": 8268 }, { "epoch": 0.67, "grad_norm": 0.9379064954460689, "learning_rate": 5.130505913437751e-06, "loss": 0.4893, "step": 8269 }, { "epoch": 0.67, "grad_norm": 0.914567607388558, "learning_rate": 5.12820680128995e-06, "loss": 0.4544, "step": 8270 }, { "epoch": 0.67, "grad_norm": 0.9786738451139384, "learning_rate": 5.125908026753105e-06, "loss": 0.5393, "step": 8271 }, { "epoch": 0.67, "grad_norm": 0.9384087179971159, "learning_rate": 5.123609589986505e-06, "loss": 0.5539, "step": 8272 }, { "epoch": 0.67, "grad_norm": 0.874501938753754, "learning_rate": 5.121311491149437e-06, "loss": 0.4775, "step": 8273 }, { "epoch": 0.67, "grad_norm": 0.9130947014954597, "learning_rate": 5.119013730401152e-06, "loss": 0.4939, "step": 8274 }, { "epoch": 0.67, "grad_norm": 0.8959183379839931, "learning_rate": 5.116716307900893e-06, "loss": 0.4896, "step": 8275 }, { "epoch": 0.67, "grad_norm": 0.8765212972837662, "learning_rate": 5.114419223807854e-06, "loss": 0.4999, "step": 8276 }, { "epoch": 0.67, "grad_norm": 0.9158320029828727, "learning_rate": 5.112122478281236e-06, "loss": 0.4995, "step": 8277 }, { "epoch": 0.67, "grad_norm": 0.9464418970680727, "learning_rate": 5.109826071480191e-06, "loss": 0.4647, "step": 8278 }, { "epoch": 0.67, "grad_norm": 0.9579586965573048, "learning_rate": 5.107530003563862e-06, "loss": 0.4866, "step": 8279 }, { "epoch": 0.67, "grad_norm": 0.8705674475250073, "learning_rate": 5.105234274691364e-06, "loss": 0.4887, "step": 8280 }, { "epoch": 0.67, "grad_norm": 0.8972048362169255, "learning_rate": 5.1029388850217935e-06, "loss": 0.4719, "step": 8281 }, { "epoch": 0.67, "grad_norm": 0.9114081984949848, "learning_rate": 5.100643834714206e-06, "loss": 0.461, "step": 8282 }, { "epoch": 0.67, "grad_norm": 1.0612571737985634, "learning_rate": 5.098349123927664e-06, "loss": 0.5402, "step": 8283 }, { "epoch": 0.67, "grad_norm": 0.9419682913709696, "learning_rate": 5.096054752821174e-06, "loss": 0.4755, "step": 8284 }, { "epoch": 0.67, "grad_norm": 0.9728192014206865, "learning_rate": 5.093760721553742e-06, "loss": 0.4933, "step": 8285 }, { "epoch": 0.67, "grad_norm": 0.9092408942177812, "learning_rate": 5.091467030284339e-06, "loss": 0.5053, "step": 8286 }, { "epoch": 0.67, "grad_norm": 0.9806559724074531, "learning_rate": 5.089173679171922e-06, "loss": 0.5024, "step": 8287 }, { "epoch": 0.67, "grad_norm": 0.8201955970089637, "learning_rate": 5.086880668375404e-06, "loss": 0.44, "step": 8288 }, { "epoch": 0.67, "grad_norm": 0.8804686334163445, "learning_rate": 5.084587998053706e-06, "loss": 0.4512, "step": 8289 }, { "epoch": 0.67, "grad_norm": 0.9055907430616575, "learning_rate": 5.082295668365695e-06, "loss": 0.5823, "step": 8290 }, { "epoch": 0.67, "grad_norm": 0.9471393593487665, "learning_rate": 5.080003679470234e-06, "loss": 0.5007, "step": 8291 }, { "epoch": 0.67, "grad_norm": 0.9592584808386907, "learning_rate": 5.077712031526153e-06, "loss": 0.4993, "step": 8292 }, { "epoch": 0.67, "grad_norm": 0.969919058632812, "learning_rate": 5.075420724692266e-06, "loss": 0.5404, "step": 8293 }, { "epoch": 0.67, "grad_norm": 0.9528343829077746, "learning_rate": 5.073129759127346e-06, "loss": 0.4887, "step": 8294 }, { "epoch": 0.67, "grad_norm": 0.9123961547566919, "learning_rate": 5.070839134990173e-06, "loss": 0.5215, "step": 8295 }, { "epoch": 0.67, "grad_norm": 0.9159772154630276, "learning_rate": 5.0685488524394725e-06, "loss": 0.5017, "step": 8296 }, { "epoch": 0.67, "grad_norm": 1.0016846413150464, "learning_rate": 5.06625891163396e-06, "loss": 0.5566, "step": 8297 }, { "epoch": 0.67, "grad_norm": 0.8644745935064114, "learning_rate": 5.063969312732331e-06, "loss": 0.484, "step": 8298 }, { "epoch": 0.67, "grad_norm": 0.9849624018056626, "learning_rate": 5.0616800558932525e-06, "loss": 0.5766, "step": 8299 }, { "epoch": 0.67, "grad_norm": 0.740789537902068, "learning_rate": 5.059391141275358e-06, "loss": 0.4458, "step": 8300 }, { "epoch": 0.67, "grad_norm": 0.9474555248146016, "learning_rate": 5.057102569037284e-06, "loss": 0.4613, "step": 8301 }, { "epoch": 0.67, "grad_norm": 0.9662314646834963, "learning_rate": 5.054814339337613e-06, "loss": 0.5123, "step": 8302 }, { "epoch": 0.67, "grad_norm": 0.8568849751085164, "learning_rate": 5.052526452334923e-06, "loss": 0.4924, "step": 8303 }, { "epoch": 0.67, "grad_norm": 0.8870863599394765, "learning_rate": 5.050238908187759e-06, "loss": 0.5371, "step": 8304 }, { "epoch": 0.67, "grad_norm": 0.9210411495904444, "learning_rate": 5.047951707054655e-06, "loss": 0.5217, "step": 8305 }, { "epoch": 0.68, "grad_norm": 0.8181478158838038, "learning_rate": 5.0456648490940966e-06, "loss": 0.4833, "step": 8306 }, { "epoch": 0.68, "grad_norm": 1.0159089504437258, "learning_rate": 5.043378334464576e-06, "loss": 0.5152, "step": 8307 }, { "epoch": 0.68, "grad_norm": 0.9093263342954216, "learning_rate": 5.041092163324537e-06, "loss": 0.4997, "step": 8308 }, { "epoch": 0.68, "grad_norm": 0.7751798119901546, "learning_rate": 5.038806335832414e-06, "loss": 0.4602, "step": 8309 }, { "epoch": 0.68, "grad_norm": 0.94501594818902, "learning_rate": 5.036520852146609e-06, "loss": 0.481, "step": 8310 }, { "epoch": 0.68, "grad_norm": 0.8825734086585589, "learning_rate": 5.034235712425508e-06, "loss": 0.4882, "step": 8311 }, { "epoch": 0.68, "grad_norm": 0.9411659546887424, "learning_rate": 5.031950916827467e-06, "loss": 0.5628, "step": 8312 }, { "epoch": 0.68, "grad_norm": 0.7861532836500162, "learning_rate": 5.029666465510825e-06, "loss": 0.4508, "step": 8313 }, { "epoch": 0.68, "grad_norm": 0.8417521207534142, "learning_rate": 5.027382358633884e-06, "loss": 0.4638, "step": 8314 }, { "epoch": 0.68, "grad_norm": 0.9312435206636756, "learning_rate": 5.0250985963549356e-06, "loss": 0.5199, "step": 8315 }, { "epoch": 0.68, "grad_norm": 0.8892045252922159, "learning_rate": 5.0228151788322414e-06, "loss": 0.454, "step": 8316 }, { "epoch": 0.68, "grad_norm": 0.8297226180785294, "learning_rate": 5.020532106224041e-06, "loss": 0.4403, "step": 8317 }, { "epoch": 0.68, "grad_norm": 0.8955990912624906, "learning_rate": 5.018249378688547e-06, "loss": 0.4802, "step": 8318 }, { "epoch": 0.68, "grad_norm": 0.8838311608237159, "learning_rate": 5.0159669963839575e-06, "loss": 0.4684, "step": 8319 }, { "epoch": 0.68, "grad_norm": 0.9126526422811626, "learning_rate": 5.01368495946843e-06, "loss": 0.4477, "step": 8320 }, { "epoch": 0.68, "grad_norm": 0.9647439971830661, "learning_rate": 5.011403268100112e-06, "loss": 0.5284, "step": 8321 }, { "epoch": 0.68, "grad_norm": 0.9468468051520715, "learning_rate": 5.009121922437124e-06, "loss": 0.492, "step": 8322 }, { "epoch": 0.68, "grad_norm": 0.9635052588870198, "learning_rate": 5.006840922637559e-06, "loss": 0.5155, "step": 8323 }, { "epoch": 0.68, "grad_norm": 0.858159908275672, "learning_rate": 5.004560268859488e-06, "loss": 0.4753, "step": 8324 }, { "epoch": 0.68, "grad_norm": 0.8783926198443746, "learning_rate": 5.002279961260965e-06, "loss": 0.463, "step": 8325 }, { "epoch": 0.68, "grad_norm": 0.9444190542938617, "learning_rate": 5.000000000000003e-06, "loss": 0.5454, "step": 8326 }, { "epoch": 0.68, "grad_norm": 0.8049607873816147, "learning_rate": 4.9977203852346054e-06, "loss": 0.4467, "step": 8327 }, { "epoch": 0.68, "grad_norm": 0.8703985735991734, "learning_rate": 4.995441117122749e-06, "loss": 0.4628, "step": 8328 }, { "epoch": 0.68, "grad_norm": 0.8775384023172903, "learning_rate": 4.993162195822383e-06, "loss": 0.5367, "step": 8329 }, { "epoch": 0.68, "grad_norm": 0.9282200217981799, "learning_rate": 4.990883621491437e-06, "loss": 0.5325, "step": 8330 }, { "epoch": 0.68, "grad_norm": 0.9705331260749077, "learning_rate": 4.988605394287813e-06, "loss": 0.5236, "step": 8331 }, { "epoch": 0.68, "grad_norm": 0.8734560259788998, "learning_rate": 4.986327514369393e-06, "loss": 0.5211, "step": 8332 }, { "epoch": 0.68, "grad_norm": 0.8882083363563135, "learning_rate": 4.9840499818940255e-06, "loss": 0.478, "step": 8333 }, { "epoch": 0.68, "grad_norm": 0.9391766553099113, "learning_rate": 4.981772797019546e-06, "loss": 0.4778, "step": 8334 }, { "epoch": 0.68, "grad_norm": 0.8743280481822453, "learning_rate": 4.979495959903759e-06, "loss": 0.5005, "step": 8335 }, { "epoch": 0.68, "grad_norm": 1.0280600118997267, "learning_rate": 4.977219470704451e-06, "loss": 0.5358, "step": 8336 }, { "epoch": 0.68, "grad_norm": 0.8990860486061055, "learning_rate": 4.974943329579377e-06, "loss": 0.4788, "step": 8337 }, { "epoch": 0.68, "grad_norm": 0.8991675776632321, "learning_rate": 4.972667536686276e-06, "loss": 0.4737, "step": 8338 }, { "epoch": 0.68, "grad_norm": 1.0160340949846591, "learning_rate": 4.970392092182853e-06, "loss": 0.5754, "step": 8339 }, { "epoch": 0.68, "grad_norm": 0.8822072783767629, "learning_rate": 4.9681169962267975e-06, "loss": 0.4731, "step": 8340 }, { "epoch": 0.68, "grad_norm": 0.8928340994522213, "learning_rate": 4.96584224897577e-06, "loss": 0.5183, "step": 8341 }, { "epoch": 0.68, "grad_norm": 0.8628952774436416, "learning_rate": 4.963567850587408e-06, "loss": 0.483, "step": 8342 }, { "epoch": 0.68, "grad_norm": 0.894242138765183, "learning_rate": 4.961293801219328e-06, "loss": 0.5149, "step": 8343 }, { "epoch": 0.68, "grad_norm": 0.9176497901536859, "learning_rate": 4.959020101029122e-06, "loss": 0.5161, "step": 8344 }, { "epoch": 0.68, "grad_norm": 0.9679310502922771, "learning_rate": 4.956746750174344e-06, "loss": 0.5017, "step": 8345 }, { "epoch": 0.68, "grad_norm": 0.9750828264808324, "learning_rate": 4.954473748812551e-06, "loss": 0.4805, "step": 8346 }, { "epoch": 0.68, "grad_norm": 0.8381740441734827, "learning_rate": 4.9522010971012465e-06, "loss": 0.4963, "step": 8347 }, { "epoch": 0.68, "grad_norm": 0.8706129568474391, "learning_rate": 4.949928795197931e-06, "loss": 0.5171, "step": 8348 }, { "epoch": 0.68, "grad_norm": 0.8983833831441445, "learning_rate": 4.947656843260068e-06, "loss": 0.5004, "step": 8349 }, { "epoch": 0.68, "grad_norm": 0.893544769279935, "learning_rate": 4.9453852414451085e-06, "loss": 0.454, "step": 8350 }, { "epoch": 0.68, "grad_norm": 0.9605367288494783, "learning_rate": 4.943113989910462e-06, "loss": 0.5191, "step": 8351 }, { "epoch": 0.68, "grad_norm": 0.8768862838630231, "learning_rate": 4.940843088813537e-06, "loss": 0.4355, "step": 8352 }, { "epoch": 0.68, "grad_norm": 0.9872960457295382, "learning_rate": 4.938572538311696e-06, "loss": 0.4885, "step": 8353 }, { "epoch": 0.68, "grad_norm": 0.9488391552128462, "learning_rate": 4.936302338562288e-06, "loss": 0.5248, "step": 8354 }, { "epoch": 0.68, "grad_norm": 0.8430325210263615, "learning_rate": 4.934032489722637e-06, "loss": 0.4987, "step": 8355 }, { "epoch": 0.68, "grad_norm": 0.9887525415033993, "learning_rate": 4.931762991950043e-06, "loss": 0.5418, "step": 8356 }, { "epoch": 0.68, "grad_norm": 0.8868405672461215, "learning_rate": 4.929493845401772e-06, "loss": 0.512, "step": 8357 }, { "epoch": 0.68, "grad_norm": 0.9305561141732096, "learning_rate": 4.927225050235087e-06, "loss": 0.4856, "step": 8358 }, { "epoch": 0.68, "grad_norm": 0.9093606578240055, "learning_rate": 4.924956606607203e-06, "loss": 0.4728, "step": 8359 }, { "epoch": 0.68, "grad_norm": 0.9255114452581292, "learning_rate": 4.922688514675325e-06, "loss": 0.5543, "step": 8360 }, { "epoch": 0.68, "grad_norm": 0.9108840803155676, "learning_rate": 4.9204207745966285e-06, "loss": 0.4961, "step": 8361 }, { "epoch": 0.68, "grad_norm": 0.9470093265948771, "learning_rate": 4.918153386528271e-06, "loss": 0.5079, "step": 8362 }, { "epoch": 0.68, "grad_norm": 0.9337221774130737, "learning_rate": 4.915886350627368e-06, "loss": 0.5819, "step": 8363 }, { "epoch": 0.68, "grad_norm": 0.8493640279177445, "learning_rate": 4.91361966705104e-06, "loss": 0.4399, "step": 8364 }, { "epoch": 0.68, "grad_norm": 0.896803616770482, "learning_rate": 4.911353335956353e-06, "loss": 0.4911, "step": 8365 }, { "epoch": 0.68, "grad_norm": 0.9681357654854811, "learning_rate": 4.909087357500366e-06, "loss": 0.5298, "step": 8366 }, { "epoch": 0.68, "grad_norm": 0.9894347696243155, "learning_rate": 4.906821731840109e-06, "loss": 0.5156, "step": 8367 }, { "epoch": 0.68, "grad_norm": 1.0480262385449974, "learning_rate": 4.904556459132593e-06, "loss": 0.5048, "step": 8368 }, { "epoch": 0.68, "grad_norm": 0.9904266056570253, "learning_rate": 4.902291539534787e-06, "loss": 0.5019, "step": 8369 }, { "epoch": 0.68, "grad_norm": 0.8826399168957701, "learning_rate": 4.900026973203663e-06, "loss": 0.4766, "step": 8370 }, { "epoch": 0.68, "grad_norm": 0.9321355444330982, "learning_rate": 4.897762760296143e-06, "loss": 0.4984, "step": 8371 }, { "epoch": 0.68, "grad_norm": 0.9711002397447057, "learning_rate": 4.895498900969138e-06, "loss": 0.5301, "step": 8372 }, { "epoch": 0.68, "grad_norm": 0.8176926757979365, "learning_rate": 4.893235395379531e-06, "loss": 0.4271, "step": 8373 }, { "epoch": 0.68, "grad_norm": 0.9430190210323978, "learning_rate": 4.890972243684185e-06, "loss": 0.4597, "step": 8374 }, { "epoch": 0.68, "grad_norm": 0.8833110729017636, "learning_rate": 4.888709446039923e-06, "loss": 0.4718, "step": 8375 }, { "epoch": 0.68, "grad_norm": 0.93005358908339, "learning_rate": 4.8864470026035715e-06, "loss": 0.5218, "step": 8376 }, { "epoch": 0.68, "grad_norm": 0.8287871369359837, "learning_rate": 4.8841849135319015e-06, "loss": 0.4328, "step": 8377 }, { "epoch": 0.68, "grad_norm": 0.9779055683662576, "learning_rate": 4.881923178981681e-06, "loss": 0.4185, "step": 8378 }, { "epoch": 0.68, "grad_norm": 1.0164807398807587, "learning_rate": 4.879661799109644e-06, "loss": 0.4969, "step": 8379 }, { "epoch": 0.68, "grad_norm": 0.9724315338703285, "learning_rate": 4.877400774072506e-06, "loss": 0.5074, "step": 8380 }, { "epoch": 0.68, "grad_norm": 1.039312544558339, "learning_rate": 4.875140104026943e-06, "loss": 0.5116, "step": 8381 }, { "epoch": 0.68, "grad_norm": 0.9431211589333425, "learning_rate": 4.872879789129632e-06, "loss": 0.5218, "step": 8382 }, { "epoch": 0.68, "grad_norm": 0.8790106929243705, "learning_rate": 4.870619829537201e-06, "loss": 0.4876, "step": 8383 }, { "epoch": 0.68, "grad_norm": 1.0114065824703244, "learning_rate": 4.8683602254062665e-06, "loss": 0.5155, "step": 8384 }, { "epoch": 0.68, "grad_norm": 0.8819030896745794, "learning_rate": 4.866100976893416e-06, "loss": 0.5177, "step": 8385 }, { "epoch": 0.68, "grad_norm": 0.9055194541725219, "learning_rate": 4.863842084155217e-06, "loss": 0.4772, "step": 8386 }, { "epoch": 0.68, "grad_norm": 0.8911414355791749, "learning_rate": 4.8615835473482e-06, "loss": 0.5097, "step": 8387 }, { "epoch": 0.68, "grad_norm": 0.8504547122828845, "learning_rate": 4.859325366628892e-06, "loss": 0.4843, "step": 8388 }, { "epoch": 0.68, "grad_norm": 0.8963887036025582, "learning_rate": 4.8570675421537685e-06, "loss": 0.5021, "step": 8389 }, { "epoch": 0.68, "grad_norm": 0.9273190455880552, "learning_rate": 4.854810074079311e-06, "loss": 0.5287, "step": 8390 }, { "epoch": 0.68, "grad_norm": 0.8755536442112973, "learning_rate": 4.852552962561946e-06, "loss": 0.5171, "step": 8391 }, { "epoch": 0.68, "grad_norm": 0.8631816572604099, "learning_rate": 4.8502962077580965e-06, "loss": 0.4588, "step": 8392 }, { "epoch": 0.68, "grad_norm": 0.7915483046368798, "learning_rate": 4.848039809824151e-06, "loss": 0.4609, "step": 8393 }, { "epoch": 0.68, "grad_norm": 0.8926376451506014, "learning_rate": 4.845783768916482e-06, "loss": 0.5605, "step": 8394 }, { "epoch": 0.68, "grad_norm": 0.8692178271144266, "learning_rate": 4.843528085191418e-06, "loss": 0.4615, "step": 8395 }, { "epoch": 0.68, "grad_norm": 0.8715520588557414, "learning_rate": 4.841272758805291e-06, "loss": 0.4289, "step": 8396 }, { "epoch": 0.68, "grad_norm": 0.8749040524844592, "learning_rate": 4.839017789914382e-06, "loss": 0.4879, "step": 8397 }, { "epoch": 0.68, "grad_norm": 0.8892874514883294, "learning_rate": 4.836763178674963e-06, "loss": 0.5299, "step": 8398 }, { "epoch": 0.68, "grad_norm": 0.9678418266925191, "learning_rate": 4.8345089252432765e-06, "loss": 0.4949, "step": 8399 }, { "epoch": 0.68, "grad_norm": 1.0388938109902268, "learning_rate": 4.832255029775542e-06, "loss": 0.5974, "step": 8400 }, { "epoch": 0.68, "grad_norm": 0.883887908245481, "learning_rate": 4.830001492427943e-06, "loss": 0.4758, "step": 8401 }, { "epoch": 0.68, "grad_norm": 0.8568929953037968, "learning_rate": 4.827748313356664e-06, "loss": 0.4778, "step": 8402 }, { "epoch": 0.68, "grad_norm": 0.84966750040475, "learning_rate": 4.825495492717833e-06, "loss": 0.433, "step": 8403 }, { "epoch": 0.68, "grad_norm": 0.8802543271044627, "learning_rate": 4.823243030667576e-06, "loss": 0.5092, "step": 8404 }, { "epoch": 0.68, "grad_norm": 0.8394079105908062, "learning_rate": 4.8209909273619845e-06, "loss": 0.448, "step": 8405 }, { "epoch": 0.68, "grad_norm": 0.9688607104012111, "learning_rate": 4.818739182957131e-06, "loss": 0.5156, "step": 8406 }, { "epoch": 0.68, "grad_norm": 0.9174760388846913, "learning_rate": 4.816487797609051e-06, "loss": 0.5143, "step": 8407 }, { "epoch": 0.68, "grad_norm": 0.8252282392609436, "learning_rate": 4.814236771473774e-06, "loss": 0.4832, "step": 8408 }, { "epoch": 0.68, "grad_norm": 0.9581235332847905, "learning_rate": 4.811986104707288e-06, "loss": 0.4909, "step": 8409 }, { "epoch": 0.68, "grad_norm": 0.9153656263449683, "learning_rate": 4.8097357974655615e-06, "loss": 0.5051, "step": 8410 }, { "epoch": 0.68, "grad_norm": 0.8799875134221885, "learning_rate": 4.8074858499045405e-06, "loss": 0.4633, "step": 8411 }, { "epoch": 0.68, "grad_norm": 0.9366573540901902, "learning_rate": 4.8052362621801484e-06, "loss": 0.5065, "step": 8412 }, { "epoch": 0.68, "grad_norm": 0.8184587541847153, "learning_rate": 4.802987034448267e-06, "loss": 0.4333, "step": 8413 }, { "epoch": 0.68, "grad_norm": 0.8854510262214775, "learning_rate": 4.800738166864784e-06, "loss": 0.5147, "step": 8414 }, { "epoch": 0.68, "grad_norm": 0.8934024712409966, "learning_rate": 4.798489659585529e-06, "loss": 0.4496, "step": 8415 }, { "epoch": 0.68, "grad_norm": 0.8190161995760137, "learning_rate": 4.7962415127663265e-06, "loss": 0.4901, "step": 8416 }, { "epoch": 0.68, "grad_norm": 0.9948716753673862, "learning_rate": 4.7939937265629725e-06, "loss": 0.5582, "step": 8417 }, { "epoch": 0.68, "grad_norm": 0.9359470085761421, "learning_rate": 4.79174630113124e-06, "loss": 0.5093, "step": 8418 }, { "epoch": 0.68, "grad_norm": 0.8856683904774265, "learning_rate": 4.789499236626859e-06, "loss": 0.4876, "step": 8419 }, { "epoch": 0.68, "grad_norm": 0.8698069429816057, "learning_rate": 4.7872525332055685e-06, "loss": 0.4606, "step": 8420 }, { "epoch": 0.68, "grad_norm": 0.951276069893926, "learning_rate": 4.7850061910230495e-06, "loss": 0.5241, "step": 8421 }, { "epoch": 0.68, "grad_norm": 0.9954898094083914, "learning_rate": 4.782760210234976e-06, "loss": 0.5543, "step": 8422 }, { "epoch": 0.68, "grad_norm": 0.8824398322853885, "learning_rate": 4.780514590996992e-06, "loss": 0.4602, "step": 8423 }, { "epoch": 0.68, "grad_norm": 0.8618284654301703, "learning_rate": 4.778269333464721e-06, "loss": 0.4473, "step": 8424 }, { "epoch": 0.68, "grad_norm": 0.8492717540748159, "learning_rate": 4.776024437793746e-06, "loss": 0.4279, "step": 8425 }, { "epoch": 0.68, "grad_norm": 0.9246816255673895, "learning_rate": 4.773779904139652e-06, "loss": 0.4564, "step": 8426 }, { "epoch": 0.68, "grad_norm": 0.919388251924666, "learning_rate": 4.7715357326579705e-06, "loss": 0.4705, "step": 8427 }, { "epoch": 0.68, "grad_norm": 0.8265756691259849, "learning_rate": 4.769291923504226e-06, "loss": 0.4794, "step": 8428 }, { "epoch": 0.69, "grad_norm": 0.8666817154876048, "learning_rate": 4.76704847683391e-06, "loss": 0.4728, "step": 8429 }, { "epoch": 0.69, "grad_norm": 0.9208247524740416, "learning_rate": 4.764805392802497e-06, "loss": 0.4782, "step": 8430 }, { "epoch": 0.69, "grad_norm": 0.8166297428420718, "learning_rate": 4.7625626715654205e-06, "loss": 0.5053, "step": 8431 }, { "epoch": 0.69, "grad_norm": 0.9197309838095528, "learning_rate": 4.760320313278112e-06, "loss": 0.4857, "step": 8432 }, { "epoch": 0.69, "grad_norm": 0.8730053148337936, "learning_rate": 4.758078318095953e-06, "loss": 0.5088, "step": 8433 }, { "epoch": 0.69, "grad_norm": 0.9183706154217698, "learning_rate": 4.755836686174319e-06, "loss": 0.4954, "step": 8434 }, { "epoch": 0.69, "grad_norm": 0.7651626575366501, "learning_rate": 4.753595417668551e-06, "loss": 0.4731, "step": 8435 }, { "epoch": 0.69, "grad_norm": 0.8658156966163371, "learning_rate": 4.75135451273397e-06, "loss": 0.4705, "step": 8436 }, { "epoch": 0.69, "grad_norm": 0.8977597260123354, "learning_rate": 4.749113971525858e-06, "loss": 0.515, "step": 8437 }, { "epoch": 0.69, "grad_norm": 1.0343449984371662, "learning_rate": 4.746873794199498e-06, "loss": 0.569, "step": 8438 }, { "epoch": 0.69, "grad_norm": 0.8206778279220518, "learning_rate": 4.744633980910122e-06, "loss": 0.4751, "step": 8439 }, { "epoch": 0.69, "grad_norm": 0.9921478178561506, "learning_rate": 4.742394531812949e-06, "loss": 0.5699, "step": 8440 }, { "epoch": 0.69, "grad_norm": 0.9236598458945292, "learning_rate": 4.74015544706317e-06, "loss": 0.5102, "step": 8441 }, { "epoch": 0.69, "grad_norm": 0.9029506730016744, "learning_rate": 4.737916726815958e-06, "loss": 0.4348, "step": 8442 }, { "epoch": 0.69, "grad_norm": 0.8466552613302887, "learning_rate": 4.7356783712264405e-06, "loss": 0.503, "step": 8443 }, { "epoch": 0.69, "grad_norm": 0.8989842391438065, "learning_rate": 4.733440380449752e-06, "loss": 0.4691, "step": 8444 }, { "epoch": 0.69, "grad_norm": 0.9437254308309423, "learning_rate": 4.731202754640969e-06, "loss": 0.4926, "step": 8445 }, { "epoch": 0.69, "grad_norm": 0.8748975753828981, "learning_rate": 4.728965493955162e-06, "loss": 0.4764, "step": 8446 }, { "epoch": 0.69, "grad_norm": 0.8641293354033068, "learning_rate": 4.72672859854737e-06, "loss": 0.4868, "step": 8447 }, { "epoch": 0.69, "grad_norm": 0.9008577718286481, "learning_rate": 4.724492068572614e-06, "loss": 0.5056, "step": 8448 }, { "epoch": 0.69, "grad_norm": 0.947620089849962, "learning_rate": 4.722255904185869e-06, "loss": 0.5184, "step": 8449 }, { "epoch": 0.69, "grad_norm": 0.9941821471275278, "learning_rate": 4.720020105542117e-06, "loss": 0.5347, "step": 8450 }, { "epoch": 0.69, "grad_norm": 0.8432679680464656, "learning_rate": 4.717784672796285e-06, "loss": 0.5014, "step": 8451 }, { "epoch": 0.69, "grad_norm": 0.8928988386363494, "learning_rate": 4.715549606103289e-06, "loss": 0.4781, "step": 8452 }, { "epoch": 0.69, "grad_norm": 1.0339335317195717, "learning_rate": 4.7133149056180185e-06, "loss": 0.5745, "step": 8453 }, { "epoch": 0.69, "grad_norm": 0.8003162042838572, "learning_rate": 4.7110805714953385e-06, "loss": 0.4694, "step": 8454 }, { "epoch": 0.69, "grad_norm": 0.8461276414515637, "learning_rate": 4.708846603890077e-06, "loss": 0.4899, "step": 8455 }, { "epoch": 0.69, "grad_norm": 0.8958768222352202, "learning_rate": 4.70661300295706e-06, "loss": 0.4872, "step": 8456 }, { "epoch": 0.69, "grad_norm": 0.9505494510294067, "learning_rate": 4.704379768851063e-06, "loss": 0.5211, "step": 8457 }, { "epoch": 0.69, "grad_norm": 0.8700792444455523, "learning_rate": 4.702146901726851e-06, "loss": 0.4902, "step": 8458 }, { "epoch": 0.69, "grad_norm": 0.9200429978046982, "learning_rate": 4.69991440173916e-06, "loss": 0.5211, "step": 8459 }, { "epoch": 0.69, "grad_norm": 0.8938147931028365, "learning_rate": 4.697682269042698e-06, "loss": 0.5195, "step": 8460 }, { "epoch": 0.69, "grad_norm": 0.8550642884636459, "learning_rate": 4.695450503792153e-06, "loss": 0.4923, "step": 8461 }, { "epoch": 0.69, "grad_norm": 1.0662928591320688, "learning_rate": 4.693219106142186e-06, "loss": 0.577, "step": 8462 }, { "epoch": 0.69, "grad_norm": 0.8924981782282854, "learning_rate": 4.690988076247425e-06, "loss": 0.4816, "step": 8463 }, { "epoch": 0.69, "grad_norm": 0.903820363407162, "learning_rate": 4.68875741426248e-06, "loss": 0.5177, "step": 8464 }, { "epoch": 0.69, "grad_norm": 1.0387163445050478, "learning_rate": 4.686527120341936e-06, "loss": 0.5164, "step": 8465 }, { "epoch": 0.69, "grad_norm": 0.9481820813313033, "learning_rate": 4.68429719464035e-06, "loss": 0.5185, "step": 8466 }, { "epoch": 0.69, "grad_norm": 0.8594449777348103, "learning_rate": 4.6820676373122535e-06, "loss": 0.4656, "step": 8467 }, { "epoch": 0.69, "grad_norm": 0.8502875164783491, "learning_rate": 4.679838448512155e-06, "loss": 0.4476, "step": 8468 }, { "epoch": 0.69, "grad_norm": 0.9414262894799519, "learning_rate": 4.677609628394529e-06, "loss": 0.4923, "step": 8469 }, { "epoch": 0.69, "grad_norm": 0.8068791737650673, "learning_rate": 4.675381177113837e-06, "loss": 0.4124, "step": 8470 }, { "epoch": 0.69, "grad_norm": 0.909949833574802, "learning_rate": 4.673153094824505e-06, "loss": 0.4937, "step": 8471 }, { "epoch": 0.69, "grad_norm": 0.9170348465696564, "learning_rate": 4.670925381680938e-06, "loss": 0.5086, "step": 8472 }, { "epoch": 0.69, "grad_norm": 0.9315108992160059, "learning_rate": 4.668698037837517e-06, "loss": 0.5393, "step": 8473 }, { "epoch": 0.69, "grad_norm": 0.873085812567832, "learning_rate": 4.666471063448595e-06, "loss": 0.5344, "step": 8474 }, { "epoch": 0.69, "grad_norm": 1.0079423729232555, "learning_rate": 4.664244458668496e-06, "loss": 0.5647, "step": 8475 }, { "epoch": 0.69, "grad_norm": 0.8207324962622179, "learning_rate": 4.662018223651521e-06, "loss": 0.4896, "step": 8476 }, { "epoch": 0.69, "grad_norm": 0.9446632078134728, "learning_rate": 4.65979235855195e-06, "loss": 0.5198, "step": 8477 }, { "epoch": 0.69, "grad_norm": 0.8965188809159295, "learning_rate": 4.65756686352403e-06, "loss": 0.467, "step": 8478 }, { "epoch": 0.69, "grad_norm": 0.8649659172553711, "learning_rate": 4.655341738721989e-06, "loss": 0.4583, "step": 8479 }, { "epoch": 0.69, "grad_norm": 0.9028168849235375, "learning_rate": 4.653116984300024e-06, "loss": 0.5207, "step": 8480 }, { "epoch": 0.69, "grad_norm": 0.8951444423118171, "learning_rate": 4.6508926004123145e-06, "loss": 0.4682, "step": 8481 }, { "epoch": 0.69, "grad_norm": 0.8852410562883132, "learning_rate": 4.648668587212998e-06, "loss": 0.4784, "step": 8482 }, { "epoch": 0.69, "grad_norm": 0.9167657976916679, "learning_rate": 4.646444944856202e-06, "loss": 0.5031, "step": 8483 }, { "epoch": 0.69, "grad_norm": 0.9260651434554181, "learning_rate": 4.644221673496023e-06, "loss": 0.5693, "step": 8484 }, { "epoch": 0.69, "grad_norm": 1.0223575682469483, "learning_rate": 4.641998773286531e-06, "loss": 0.4936, "step": 8485 }, { "epoch": 0.69, "grad_norm": 0.8672998551519863, "learning_rate": 4.639776244381772e-06, "loss": 0.491, "step": 8486 }, { "epoch": 0.69, "grad_norm": 0.8243916096199051, "learning_rate": 4.637554086935768e-06, "loss": 0.49, "step": 8487 }, { "epoch": 0.69, "grad_norm": 0.8811146797443113, "learning_rate": 4.635332301102507e-06, "loss": 0.4524, "step": 8488 }, { "epoch": 0.69, "grad_norm": 1.0189452727877433, "learning_rate": 4.633110887035957e-06, "loss": 0.5636, "step": 8489 }, { "epoch": 0.69, "grad_norm": 0.9832362112575355, "learning_rate": 4.630889844890063e-06, "loss": 0.4915, "step": 8490 }, { "epoch": 0.69, "grad_norm": 0.9609507067698836, "learning_rate": 4.628669174818741e-06, "loss": 0.5044, "step": 8491 }, { "epoch": 0.69, "grad_norm": 0.8886354571444008, "learning_rate": 4.6264488769758795e-06, "loss": 0.4868, "step": 8492 }, { "epoch": 0.69, "grad_norm": 0.9127612427732726, "learning_rate": 4.6242289515153495e-06, "loss": 0.5326, "step": 8493 }, { "epoch": 0.69, "grad_norm": 0.9293163321558532, "learning_rate": 4.622009398590976e-06, "loss": 0.4987, "step": 8494 }, { "epoch": 0.69, "grad_norm": 0.8595017051048842, "learning_rate": 4.619790218356589e-06, "loss": 0.4478, "step": 8495 }, { "epoch": 0.69, "grad_norm": 0.932634641983612, "learning_rate": 4.617571410965964e-06, "loss": 0.5015, "step": 8496 }, { "epoch": 0.69, "grad_norm": 0.9429550807269602, "learning_rate": 4.615352976572867e-06, "loss": 0.4838, "step": 8497 }, { "epoch": 0.69, "grad_norm": 0.9593082113581994, "learning_rate": 4.613134915331031e-06, "loss": 0.4767, "step": 8498 }, { "epoch": 0.69, "grad_norm": 0.8782623572538939, "learning_rate": 4.610917227394172e-06, "loss": 0.4841, "step": 8499 }, { "epoch": 0.69, "grad_norm": 0.9823091850599716, "learning_rate": 4.608699912915961e-06, "loss": 0.5373, "step": 8500 }, { "epoch": 0.69, "grad_norm": 0.9240946447050321, "learning_rate": 4.606482972050072e-06, "loss": 0.493, "step": 8501 }, { "epoch": 0.69, "grad_norm": 0.9995884422788283, "learning_rate": 4.604266404950124e-06, "loss": 0.4942, "step": 8502 }, { "epoch": 0.69, "grad_norm": 0.9315818382316459, "learning_rate": 4.60205021176973e-06, "loss": 0.5339, "step": 8503 }, { "epoch": 0.69, "grad_norm": 0.9313904947633198, "learning_rate": 4.599834392662467e-06, "loss": 0.5309, "step": 8504 }, { "epoch": 0.69, "grad_norm": 0.9234364248702774, "learning_rate": 4.5976189477818945e-06, "loss": 0.5381, "step": 8505 }, { "epoch": 0.69, "grad_norm": 0.9642841600311677, "learning_rate": 4.5954038772815305e-06, "loss": 0.5149, "step": 8506 }, { "epoch": 0.69, "grad_norm": 0.9758857960131119, "learning_rate": 4.5931891813148895e-06, "loss": 0.5616, "step": 8507 }, { "epoch": 0.69, "grad_norm": 0.9939874774025128, "learning_rate": 4.5909748600354395e-06, "loss": 0.5817, "step": 8508 }, { "epoch": 0.69, "grad_norm": 0.883662600989568, "learning_rate": 4.588760913596635e-06, "loss": 0.4887, "step": 8509 }, { "epoch": 0.69, "grad_norm": 1.0144586260482555, "learning_rate": 4.586547342151898e-06, "loss": 0.5689, "step": 8510 }, { "epoch": 0.69, "grad_norm": 0.9213786833800294, "learning_rate": 4.584334145854633e-06, "loss": 0.5044, "step": 8511 }, { "epoch": 0.69, "grad_norm": 0.8710235090027155, "learning_rate": 4.582121324858201e-06, "loss": 0.475, "step": 8512 }, { "epoch": 0.69, "grad_norm": 0.9984563099572079, "learning_rate": 4.579908879315962e-06, "loss": 0.5219, "step": 8513 }, { "epoch": 0.69, "grad_norm": 0.907665700346533, "learning_rate": 4.577696809381222e-06, "loss": 0.5097, "step": 8514 }, { "epoch": 0.69, "grad_norm": 0.8598035995585397, "learning_rate": 4.5754851152072935e-06, "loss": 0.4853, "step": 8515 }, { "epoch": 0.69, "grad_norm": 0.9093082298469464, "learning_rate": 4.573273796947429e-06, "loss": 0.5327, "step": 8516 }, { "epoch": 0.69, "grad_norm": 0.8808884117504743, "learning_rate": 4.571062854754878e-06, "loss": 0.5258, "step": 8517 }, { "epoch": 0.69, "grad_norm": 0.8533527701048154, "learning_rate": 4.568852288782854e-06, "loss": 0.4881, "step": 8518 }, { "epoch": 0.69, "grad_norm": 0.9163197742017514, "learning_rate": 4.566642099184555e-06, "loss": 0.5018, "step": 8519 }, { "epoch": 0.69, "grad_norm": 0.8215761882741016, "learning_rate": 4.564432286113131e-06, "loss": 0.4834, "step": 8520 }, { "epoch": 0.69, "grad_norm": 0.7933050125474624, "learning_rate": 4.562222849721735e-06, "loss": 0.4094, "step": 8521 }, { "epoch": 0.69, "grad_norm": 0.8897154668309718, "learning_rate": 4.560013790163469e-06, "loss": 0.5105, "step": 8522 }, { "epoch": 0.69, "grad_norm": 0.855198442809487, "learning_rate": 4.557805107591421e-06, "loss": 0.455, "step": 8523 }, { "epoch": 0.69, "grad_norm": 0.9524596667633722, "learning_rate": 4.555596802158653e-06, "loss": 0.5091, "step": 8524 }, { "epoch": 0.69, "grad_norm": 0.950194938204538, "learning_rate": 4.553388874018201e-06, "loss": 0.5583, "step": 8525 }, { "epoch": 0.69, "grad_norm": 0.9400851744243489, "learning_rate": 4.551181323323062e-06, "loss": 0.4941, "step": 8526 }, { "epoch": 0.69, "grad_norm": 0.9146266111978486, "learning_rate": 4.548974150226231e-06, "loss": 0.4986, "step": 8527 }, { "epoch": 0.69, "grad_norm": 0.9660153299413448, "learning_rate": 4.546767354880653e-06, "loss": 0.5393, "step": 8528 }, { "epoch": 0.69, "grad_norm": 0.9297055371592443, "learning_rate": 4.544560937439258e-06, "loss": 0.4953, "step": 8529 }, { "epoch": 0.69, "grad_norm": 0.9696637231242728, "learning_rate": 4.542354898054953e-06, "loss": 0.5172, "step": 8530 }, { "epoch": 0.69, "grad_norm": 0.9218306554310379, "learning_rate": 4.540149236880616e-06, "loss": 0.4773, "step": 8531 }, { "epoch": 0.69, "grad_norm": 0.9486063599967517, "learning_rate": 4.537943954069088e-06, "loss": 0.546, "step": 8532 }, { "epoch": 0.69, "grad_norm": 0.9924791760385479, "learning_rate": 4.535739049773206e-06, "loss": 0.5536, "step": 8533 }, { "epoch": 0.69, "grad_norm": 0.8468685421929781, "learning_rate": 4.533534524145756e-06, "loss": 0.4782, "step": 8534 }, { "epoch": 0.69, "grad_norm": 0.9512118915905358, "learning_rate": 4.531330377339516e-06, "loss": 0.4956, "step": 8535 }, { "epoch": 0.69, "grad_norm": 0.9716515137110536, "learning_rate": 4.529126609507229e-06, "loss": 0.5597, "step": 8536 }, { "epoch": 0.69, "grad_norm": 0.8938265448169714, "learning_rate": 4.52692322080162e-06, "loss": 0.4546, "step": 8537 }, { "epoch": 0.69, "grad_norm": 0.8921292523190358, "learning_rate": 4.52472021137537e-06, "loss": 0.4938, "step": 8538 }, { "epoch": 0.69, "grad_norm": 0.977254514957181, "learning_rate": 4.5225175813811585e-06, "loss": 0.4989, "step": 8539 }, { "epoch": 0.69, "grad_norm": 0.9901426180974761, "learning_rate": 4.520315330971617e-06, "loss": 0.523, "step": 8540 }, { "epoch": 0.69, "grad_norm": 0.9762476438745803, "learning_rate": 4.518113460299364e-06, "loss": 0.5033, "step": 8541 }, { "epoch": 0.69, "grad_norm": 0.9511493492501386, "learning_rate": 4.515911969516985e-06, "loss": 0.5201, "step": 8542 }, { "epoch": 0.69, "grad_norm": 0.9544821754504096, "learning_rate": 4.513710858777045e-06, "loss": 0.5167, "step": 8543 }, { "epoch": 0.69, "grad_norm": 1.0477136437004189, "learning_rate": 4.5115101282320695e-06, "loss": 0.5641, "step": 8544 }, { "epoch": 0.69, "grad_norm": 0.9212158833631946, "learning_rate": 4.509309778034582e-06, "loss": 0.4567, "step": 8545 }, { "epoch": 0.69, "grad_norm": 0.9859534975875366, "learning_rate": 4.507109808337051e-06, "loss": 0.5537, "step": 8546 }, { "epoch": 0.69, "grad_norm": 0.8521980359414838, "learning_rate": 4.504910219291941e-06, "loss": 0.4825, "step": 8547 }, { "epoch": 0.69, "grad_norm": 0.8726401564149688, "learning_rate": 4.502711011051676e-06, "loss": 0.4641, "step": 8548 }, { "epoch": 0.69, "grad_norm": 0.8850970445322484, "learning_rate": 4.500512183768666e-06, "loss": 0.4365, "step": 8549 }, { "epoch": 0.69, "grad_norm": 0.8528034962174915, "learning_rate": 4.498313737595277e-06, "loss": 0.4914, "step": 8550 }, { "epoch": 0.69, "grad_norm": 0.8943513341597491, "learning_rate": 4.4961156726838725e-06, "loss": 0.5079, "step": 8551 }, { "epoch": 0.7, "grad_norm": 0.8238291990363437, "learning_rate": 4.493917989186768e-06, "loss": 0.5233, "step": 8552 }, { "epoch": 0.7, "grad_norm": 0.935896226406557, "learning_rate": 4.491720687256261e-06, "loss": 0.492, "step": 8553 }, { "epoch": 0.7, "grad_norm": 0.8779507327285243, "learning_rate": 4.489523767044625e-06, "loss": 0.4805, "step": 8554 }, { "epoch": 0.7, "grad_norm": 0.998133203070941, "learning_rate": 4.487327228704108e-06, "loss": 0.5005, "step": 8555 }, { "epoch": 0.7, "grad_norm": 0.9016653665148767, "learning_rate": 4.4851310723869176e-06, "loss": 0.4611, "step": 8556 }, { "epoch": 0.7, "grad_norm": 0.9136562820143127, "learning_rate": 4.482935298245259e-06, "loss": 0.4689, "step": 8557 }, { "epoch": 0.7, "grad_norm": 0.990290997862627, "learning_rate": 4.480739906431287e-06, "loss": 0.5298, "step": 8558 }, { "epoch": 0.7, "grad_norm": 0.9447926331667892, "learning_rate": 4.478544897097144e-06, "loss": 0.521, "step": 8559 }, { "epoch": 0.7, "grad_norm": 0.8593961200741635, "learning_rate": 4.476350270394942e-06, "loss": 0.4391, "step": 8560 }, { "epoch": 0.7, "grad_norm": 0.8770524066755371, "learning_rate": 4.47415602647677e-06, "loss": 0.5104, "step": 8561 }, { "epoch": 0.7, "grad_norm": 0.9107096575839747, "learning_rate": 4.471962165494678e-06, "loss": 0.5393, "step": 8562 }, { "epoch": 0.7, "grad_norm": 0.9230544531802135, "learning_rate": 4.469768687600711e-06, "loss": 0.4659, "step": 8563 }, { "epoch": 0.7, "grad_norm": 0.9017244828244761, "learning_rate": 4.467575592946865e-06, "loss": 0.5099, "step": 8564 }, { "epoch": 0.7, "grad_norm": 0.9072309195504163, "learning_rate": 4.465382881685122e-06, "loss": 0.4922, "step": 8565 }, { "epoch": 0.7, "grad_norm": 0.9127834367641511, "learning_rate": 4.463190553967438e-06, "loss": 0.531, "step": 8566 }, { "epoch": 0.7, "grad_norm": 0.9630119394437752, "learning_rate": 4.46099860994574e-06, "loss": 0.4919, "step": 8567 }, { "epoch": 0.7, "grad_norm": 1.402515032284407, "learning_rate": 4.458807049771918e-06, "loss": 0.5254, "step": 8568 }, { "epoch": 0.7, "grad_norm": 0.979726220736822, "learning_rate": 4.4566158735978605e-06, "loss": 0.5463, "step": 8569 }, { "epoch": 0.7, "grad_norm": 0.9475301973882784, "learning_rate": 4.454425081575402e-06, "loss": 0.5353, "step": 8570 }, { "epoch": 0.7, "grad_norm": 0.939575507723578, "learning_rate": 4.452234673856366e-06, "loss": 0.5089, "step": 8571 }, { "epoch": 0.7, "grad_norm": 0.9487876723711443, "learning_rate": 4.450044650592547e-06, "loss": 0.5164, "step": 8572 }, { "epoch": 0.7, "grad_norm": 0.9522260285452261, "learning_rate": 4.447855011935714e-06, "loss": 0.5307, "step": 8573 }, { "epoch": 0.7, "grad_norm": 0.8633787236123219, "learning_rate": 4.4456657580375966e-06, "loss": 0.4582, "step": 8574 }, { "epoch": 0.7, "grad_norm": 0.9049310398365067, "learning_rate": 4.443476889049923e-06, "loss": 0.5123, "step": 8575 }, { "epoch": 0.7, "grad_norm": 0.9042445194944134, "learning_rate": 4.441288405124368e-06, "loss": 0.4971, "step": 8576 }, { "epoch": 0.7, "grad_norm": 0.9061722854430694, "learning_rate": 4.439100306412596e-06, "loss": 0.5561, "step": 8577 }, { "epoch": 0.7, "grad_norm": 0.9681196865332937, "learning_rate": 4.436912593066241e-06, "loss": 0.5445, "step": 8578 }, { "epoch": 0.7, "grad_norm": 0.9045333714159708, "learning_rate": 4.434725265236912e-06, "loss": 0.4744, "step": 8579 }, { "epoch": 0.7, "grad_norm": 0.9791358579263623, "learning_rate": 4.4325383230761785e-06, "loss": 0.5647, "step": 8580 }, { "epoch": 0.7, "grad_norm": 0.9190795854611128, "learning_rate": 4.430351766735609e-06, "loss": 0.4574, "step": 8581 }, { "epoch": 0.7, "grad_norm": 0.9443589825222185, "learning_rate": 4.428165596366717e-06, "loss": 0.4894, "step": 8582 }, { "epoch": 0.7, "grad_norm": 1.010699088324618, "learning_rate": 4.425979812121008e-06, "loss": 0.5239, "step": 8583 }, { "epoch": 0.7, "grad_norm": 1.0005524797056764, "learning_rate": 4.423794414149953e-06, "loss": 0.5694, "step": 8584 }, { "epoch": 0.7, "grad_norm": 0.9028625873596521, "learning_rate": 4.421609402605003e-06, "loss": 0.5101, "step": 8585 }, { "epoch": 0.7, "grad_norm": 0.9555413133270083, "learning_rate": 4.419424777637565e-06, "loss": 0.4983, "step": 8586 }, { "epoch": 0.7, "grad_norm": 1.018225022093693, "learning_rate": 4.4172405393990495e-06, "loss": 0.4761, "step": 8587 }, { "epoch": 0.7, "grad_norm": 0.9019231468353708, "learning_rate": 4.415056688040807e-06, "loss": 0.509, "step": 8588 }, { "epoch": 0.7, "grad_norm": 0.8944092264707802, "learning_rate": 4.412873223714184e-06, "loss": 0.4492, "step": 8589 }, { "epoch": 0.7, "grad_norm": 1.0205877351895996, "learning_rate": 4.41069014657049e-06, "loss": 0.5679, "step": 8590 }, { "epoch": 0.7, "grad_norm": 0.9175443585321001, "learning_rate": 4.408507456761014e-06, "loss": 0.4681, "step": 8591 }, { "epoch": 0.7, "grad_norm": 0.941166738333424, "learning_rate": 4.4063251544370055e-06, "loss": 0.5679, "step": 8592 }, { "epoch": 0.7, "grad_norm": 0.9257517601389404, "learning_rate": 4.404143239749709e-06, "loss": 0.5314, "step": 8593 }, { "epoch": 0.7, "grad_norm": 0.9041427478022955, "learning_rate": 4.401961712850318e-06, "loss": 0.4892, "step": 8594 }, { "epoch": 0.7, "grad_norm": 0.8909288731891772, "learning_rate": 4.399780573890016e-06, "loss": 0.4575, "step": 8595 }, { "epoch": 0.7, "grad_norm": 0.9788352851978374, "learning_rate": 4.397599823019953e-06, "loss": 0.4932, "step": 8596 }, { "epoch": 0.7, "grad_norm": 0.9628780507902193, "learning_rate": 4.395419460391256e-06, "loss": 0.5098, "step": 8597 }, { "epoch": 0.7, "grad_norm": 0.9626038196010682, "learning_rate": 4.393239486155011e-06, "loss": 0.5357, "step": 8598 }, { "epoch": 0.7, "grad_norm": 0.9024459623483713, "learning_rate": 4.391059900462305e-06, "loss": 0.5328, "step": 8599 }, { "epoch": 0.7, "grad_norm": 0.9620848702073997, "learning_rate": 4.3888807034641686e-06, "loss": 0.4771, "step": 8600 }, { "epoch": 0.7, "grad_norm": 0.973843994968412, "learning_rate": 4.386701895311622e-06, "loss": 0.4848, "step": 8601 }, { "epoch": 0.7, "grad_norm": 0.9457771770198199, "learning_rate": 4.384523476155657e-06, "loss": 0.5132, "step": 8602 }, { "epoch": 0.7, "grad_norm": 0.929037612513199, "learning_rate": 4.382345446147236e-06, "loss": 0.5633, "step": 8603 }, { "epoch": 0.7, "grad_norm": 0.8646828981519963, "learning_rate": 4.380167805437285e-06, "loss": 0.4819, "step": 8604 }, { "epoch": 0.7, "grad_norm": 0.9185236496323183, "learning_rate": 4.377990554176729e-06, "loss": 0.499, "step": 8605 }, { "epoch": 0.7, "grad_norm": 0.9021456393842846, "learning_rate": 4.375813692516437e-06, "loss": 0.493, "step": 8606 }, { "epoch": 0.7, "grad_norm": 0.9828541683033288, "learning_rate": 4.3736372206072666e-06, "loss": 0.5731, "step": 8607 }, { "epoch": 0.7, "grad_norm": 0.9002309994632519, "learning_rate": 4.371461138600047e-06, "loss": 0.554, "step": 8608 }, { "epoch": 0.7, "grad_norm": 0.960763045179828, "learning_rate": 4.369285446645578e-06, "loss": 0.5608, "step": 8609 }, { "epoch": 0.7, "grad_norm": 0.9058200343498363, "learning_rate": 4.367110144894633e-06, "loss": 0.5084, "step": 8610 }, { "epoch": 0.7, "grad_norm": 0.9395399780793992, "learning_rate": 4.364935233497962e-06, "loss": 0.5184, "step": 8611 }, { "epoch": 0.7, "grad_norm": 0.9215093750756229, "learning_rate": 4.362760712606278e-06, "loss": 0.4846, "step": 8612 }, { "epoch": 0.7, "grad_norm": 0.8610037956075174, "learning_rate": 4.360586582370275e-06, "loss": 0.4386, "step": 8613 }, { "epoch": 0.7, "grad_norm": 0.882746044843972, "learning_rate": 4.35841284294062e-06, "loss": 0.4848, "step": 8614 }, { "epoch": 0.7, "grad_norm": 0.8953735482621343, "learning_rate": 4.356239494467952e-06, "loss": 0.4674, "step": 8615 }, { "epoch": 0.7, "grad_norm": 0.943203974943108, "learning_rate": 4.35406653710288e-06, "loss": 0.492, "step": 8616 }, { "epoch": 0.7, "grad_norm": 0.8983064274161381, "learning_rate": 4.351893970995994e-06, "loss": 0.4969, "step": 8617 }, { "epoch": 0.7, "grad_norm": 0.9973530547501277, "learning_rate": 4.349721796297841e-06, "loss": 0.5303, "step": 8618 }, { "epoch": 0.7, "grad_norm": 0.9117438767939997, "learning_rate": 4.347550013158956e-06, "loss": 0.4307, "step": 8619 }, { "epoch": 0.7, "grad_norm": 1.0095913131582344, "learning_rate": 4.345378621729842e-06, "loss": 0.5226, "step": 8620 }, { "epoch": 0.7, "grad_norm": 0.8285377532732942, "learning_rate": 4.343207622160973e-06, "loss": 0.4997, "step": 8621 }, { "epoch": 0.7, "grad_norm": 0.9936775538811486, "learning_rate": 4.341037014602799e-06, "loss": 0.5532, "step": 8622 }, { "epoch": 0.7, "grad_norm": 0.8750442052146093, "learning_rate": 4.338866799205744e-06, "loss": 0.4762, "step": 8623 }, { "epoch": 0.7, "grad_norm": 1.012281747560933, "learning_rate": 4.3366969761201935e-06, "loss": 0.5108, "step": 8624 }, { "epoch": 0.7, "grad_norm": 0.9792624934234849, "learning_rate": 4.334527545496521e-06, "loss": 0.5235, "step": 8625 }, { "epoch": 0.7, "grad_norm": 0.9781064179748855, "learning_rate": 4.332358507485064e-06, "loss": 0.5202, "step": 8626 }, { "epoch": 0.7, "grad_norm": 0.9132213170356942, "learning_rate": 4.330189862236134e-06, "loss": 0.5103, "step": 8627 }, { "epoch": 0.7, "grad_norm": 0.8954379103279083, "learning_rate": 4.328021609900018e-06, "loss": 0.5055, "step": 8628 }, { "epoch": 0.7, "grad_norm": 0.9164004427300606, "learning_rate": 4.3258537506269735e-06, "loss": 0.4858, "step": 8629 }, { "epoch": 0.7, "grad_norm": 0.951325657558114, "learning_rate": 4.3236862845672355e-06, "loss": 0.5028, "step": 8630 }, { "epoch": 0.7, "grad_norm": 0.8636461833908563, "learning_rate": 4.3215192118709984e-06, "loss": 0.5028, "step": 8631 }, { "epoch": 0.7, "grad_norm": 1.1844359900170125, "learning_rate": 4.319352532688444e-06, "loss": 0.4569, "step": 8632 }, { "epoch": 0.7, "grad_norm": 0.9495900125986644, "learning_rate": 4.317186247169719e-06, "loss": 0.5037, "step": 8633 }, { "epoch": 0.7, "grad_norm": 0.9685745342821419, "learning_rate": 4.315020355464947e-06, "loss": 0.5319, "step": 8634 }, { "epoch": 0.7, "grad_norm": 0.8690042344461989, "learning_rate": 4.312854857724222e-06, "loss": 0.4636, "step": 8635 }, { "epoch": 0.7, "grad_norm": 0.9177292653684901, "learning_rate": 4.3106897540976154e-06, "loss": 0.5345, "step": 8636 }, { "epoch": 0.7, "grad_norm": 0.9735025676432103, "learning_rate": 4.308525044735158e-06, "loss": 0.5682, "step": 8637 }, { "epoch": 0.7, "grad_norm": 0.9660018093666137, "learning_rate": 4.306360729786867e-06, "loss": 0.5515, "step": 8638 }, { "epoch": 0.7, "grad_norm": 0.9595466170331142, "learning_rate": 4.304196809402726e-06, "loss": 0.4858, "step": 8639 }, { "epoch": 0.7, "grad_norm": 0.9330526134964297, "learning_rate": 4.302033283732695e-06, "loss": 0.4753, "step": 8640 }, { "epoch": 0.7, "grad_norm": 0.9228600220437362, "learning_rate": 4.299870152926703e-06, "loss": 0.4527, "step": 8641 }, { "epoch": 0.7, "grad_norm": 0.946059941678235, "learning_rate": 4.297707417134653e-06, "loss": 0.5222, "step": 8642 }, { "epoch": 0.7, "grad_norm": 0.9401672436336672, "learning_rate": 4.295545076506422e-06, "loss": 0.5587, "step": 8643 }, { "epoch": 0.7, "grad_norm": 0.9230695826194234, "learning_rate": 4.293383131191861e-06, "loss": 0.5095, "step": 8644 }, { "epoch": 0.7, "grad_norm": 0.9264723424024507, "learning_rate": 4.291221581340783e-06, "loss": 0.5024, "step": 8645 }, { "epoch": 0.7, "grad_norm": 0.7676869641906288, "learning_rate": 4.2890604271029855e-06, "loss": 0.444, "step": 8646 }, { "epoch": 0.7, "grad_norm": 0.9593290265798053, "learning_rate": 4.286899668628235e-06, "loss": 0.5213, "step": 8647 }, { "epoch": 0.7, "grad_norm": 0.9010995858897504, "learning_rate": 4.28473930606627e-06, "loss": 0.5134, "step": 8648 }, { "epoch": 0.7, "grad_norm": 0.9418499445234443, "learning_rate": 4.282579339566802e-06, "loss": 0.509, "step": 8649 }, { "epoch": 0.7, "grad_norm": 0.9286350494558199, "learning_rate": 4.280419769279518e-06, "loss": 0.5012, "step": 8650 }, { "epoch": 0.7, "grad_norm": 0.8983920940568924, "learning_rate": 4.278260595354067e-06, "loss": 0.5336, "step": 8651 }, { "epoch": 0.7, "grad_norm": 0.8294771280671885, "learning_rate": 4.276101817940082e-06, "loss": 0.4598, "step": 8652 }, { "epoch": 0.7, "grad_norm": 0.8855833075909199, "learning_rate": 4.273943437187163e-06, "loss": 0.4714, "step": 8653 }, { "epoch": 0.7, "grad_norm": 0.9034299559561075, "learning_rate": 4.271785453244886e-06, "loss": 0.4397, "step": 8654 }, { "epoch": 0.7, "grad_norm": 0.9608987870079996, "learning_rate": 4.269627866262794e-06, "loss": 0.5578, "step": 8655 }, { "epoch": 0.7, "grad_norm": 0.8949249243243168, "learning_rate": 4.267470676390414e-06, "loss": 0.4699, "step": 8656 }, { "epoch": 0.7, "grad_norm": 0.8901792843107094, "learning_rate": 4.2653138837772265e-06, "loss": 0.5048, "step": 8657 }, { "epoch": 0.7, "grad_norm": 0.9389581626819746, "learning_rate": 4.2631574885727e-06, "loss": 0.5082, "step": 8658 }, { "epoch": 0.7, "grad_norm": 0.9186421948955802, "learning_rate": 4.261001490926272e-06, "loss": 0.52, "step": 8659 }, { "epoch": 0.7, "grad_norm": 0.9401889077750639, "learning_rate": 4.25884589098735e-06, "loss": 0.4897, "step": 8660 }, { "epoch": 0.7, "grad_norm": 0.9039686624889018, "learning_rate": 4.256690688905315e-06, "loss": 0.5232, "step": 8661 }, { "epoch": 0.7, "grad_norm": 0.9773294716052752, "learning_rate": 4.254535884829524e-06, "loss": 0.5253, "step": 8662 }, { "epoch": 0.7, "grad_norm": 0.7950138750683371, "learning_rate": 4.252381478909293e-06, "loss": 0.4714, "step": 8663 }, { "epoch": 0.7, "grad_norm": 1.0925343616353147, "learning_rate": 4.2502274712939355e-06, "loss": 0.5992, "step": 8664 }, { "epoch": 0.7, "grad_norm": 0.8961489935419249, "learning_rate": 4.24807386213271e-06, "loss": 0.4854, "step": 8665 }, { "epoch": 0.7, "grad_norm": 0.8461885044508616, "learning_rate": 4.245920651574864e-06, "loss": 0.4316, "step": 8666 }, { "epoch": 0.7, "grad_norm": 0.8998814032194048, "learning_rate": 4.243767839769612e-06, "loss": 0.4834, "step": 8667 }, { "epoch": 0.7, "grad_norm": 0.8852003798729836, "learning_rate": 4.241615426866148e-06, "loss": 0.4867, "step": 8668 }, { "epoch": 0.7, "grad_norm": 0.9462793357999046, "learning_rate": 4.239463413013619e-06, "loss": 0.5006, "step": 8669 }, { "epoch": 0.7, "grad_norm": 0.8870614184576234, "learning_rate": 4.237311798361175e-06, "loss": 0.5095, "step": 8670 }, { "epoch": 0.7, "grad_norm": 0.8429600883646559, "learning_rate": 4.235160583057905e-06, "loss": 0.5015, "step": 8671 }, { "epoch": 0.7, "grad_norm": 0.948909062706813, "learning_rate": 4.233009767252896e-06, "loss": 0.5001, "step": 8672 }, { "epoch": 0.7, "grad_norm": 0.9127038362884151, "learning_rate": 4.230859351095193e-06, "loss": 0.5077, "step": 8673 }, { "epoch": 0.7, "grad_norm": 0.9305603826139454, "learning_rate": 4.2287093347338245e-06, "loss": 0.5122, "step": 8674 }, { "epoch": 0.71, "grad_norm": 0.8877865608484397, "learning_rate": 4.226559718317773e-06, "loss": 0.4166, "step": 8675 }, { "epoch": 0.71, "grad_norm": 0.9078446718113264, "learning_rate": 4.224410501996018e-06, "loss": 0.5177, "step": 8676 }, { "epoch": 0.71, "grad_norm": 0.8757235975774688, "learning_rate": 4.222261685917489e-06, "loss": 0.4597, "step": 8677 }, { "epoch": 0.71, "grad_norm": 0.9691724908290217, "learning_rate": 4.220113270231101e-06, "loss": 0.5702, "step": 8678 }, { "epoch": 0.71, "grad_norm": 0.9633912836541412, "learning_rate": 4.217965255085737e-06, "loss": 0.5597, "step": 8679 }, { "epoch": 0.71, "grad_norm": 0.8084054151127114, "learning_rate": 4.215817640630254e-06, "loss": 0.4092, "step": 8680 }, { "epoch": 0.71, "grad_norm": 0.9928820511774611, "learning_rate": 4.2136704270134725e-06, "loss": 0.5033, "step": 8681 }, { "epoch": 0.71, "grad_norm": 0.9771886848828312, "learning_rate": 4.2115236143842046e-06, "loss": 0.522, "step": 8682 }, { "epoch": 0.71, "grad_norm": 0.9075441822519663, "learning_rate": 4.209377202891212e-06, "loss": 0.4654, "step": 8683 }, { "epoch": 0.71, "grad_norm": 0.9183635386993099, "learning_rate": 4.207231192683243e-06, "loss": 0.5257, "step": 8684 }, { "epoch": 0.71, "grad_norm": 0.8588744030939618, "learning_rate": 4.205085583909014e-06, "loss": 0.4934, "step": 8685 }, { "epoch": 0.71, "grad_norm": 0.9081390911988166, "learning_rate": 4.2029403767172175e-06, "loss": 0.4586, "step": 8686 }, { "epoch": 0.71, "grad_norm": 0.8660306671818201, "learning_rate": 4.200795571256504e-06, "loss": 0.4527, "step": 8687 }, { "epoch": 0.71, "grad_norm": 0.9407829774136649, "learning_rate": 4.19865116767552e-06, "loss": 0.528, "step": 8688 }, { "epoch": 0.71, "grad_norm": 0.8590314558726129, "learning_rate": 4.196507166122862e-06, "loss": 0.4732, "step": 8689 }, { "epoch": 0.71, "grad_norm": 0.8731436134748567, "learning_rate": 4.1943635667471095e-06, "loss": 0.4915, "step": 8690 }, { "epoch": 0.71, "grad_norm": 0.8511358601352428, "learning_rate": 4.192220369696811e-06, "loss": 0.4785, "step": 8691 }, { "epoch": 0.71, "grad_norm": 1.068335180497299, "learning_rate": 4.190077575120493e-06, "loss": 0.5777, "step": 8692 }, { "epoch": 0.71, "grad_norm": 1.0273366727449977, "learning_rate": 4.187935183166641e-06, "loss": 0.5265, "step": 8693 }, { "epoch": 0.71, "grad_norm": 0.9019956570633716, "learning_rate": 4.1857931939837305e-06, "loss": 0.4879, "step": 8694 }, { "epoch": 0.71, "grad_norm": 0.8957925039835313, "learning_rate": 4.18365160772019e-06, "loss": 0.4607, "step": 8695 }, { "epoch": 0.71, "grad_norm": 0.8908531131530597, "learning_rate": 4.1815104245244364e-06, "loss": 0.5171, "step": 8696 }, { "epoch": 0.71, "grad_norm": 0.9426170847860915, "learning_rate": 4.179369644544849e-06, "loss": 0.4745, "step": 8697 }, { "epoch": 0.71, "grad_norm": 0.9223070695579061, "learning_rate": 4.177229267929785e-06, "loss": 0.5021, "step": 8698 }, { "epoch": 0.71, "grad_norm": 1.0669590657342318, "learning_rate": 4.17508929482756e-06, "loss": 0.5615, "step": 8699 }, { "epoch": 0.71, "grad_norm": 0.9170041445449919, "learning_rate": 4.172949725386488e-06, "loss": 0.4175, "step": 8700 }, { "epoch": 0.71, "grad_norm": 0.8346287732255635, "learning_rate": 4.170810559754829e-06, "loss": 0.4755, "step": 8701 }, { "epoch": 0.71, "grad_norm": 0.825095119982811, "learning_rate": 4.168671798080826e-06, "loss": 0.4639, "step": 8702 }, { "epoch": 0.71, "grad_norm": 0.9463481324513929, "learning_rate": 4.166533440512696e-06, "loss": 0.4778, "step": 8703 }, { "epoch": 0.71, "grad_norm": 1.1810244437305535, "learning_rate": 4.164395487198628e-06, "loss": 0.5336, "step": 8704 }, { "epoch": 0.71, "grad_norm": 1.0187738412895564, "learning_rate": 4.1622579382867686e-06, "loss": 0.5458, "step": 8705 }, { "epoch": 0.71, "grad_norm": 0.9659152585832806, "learning_rate": 4.160120793925264e-06, "loss": 0.5135, "step": 8706 }, { "epoch": 0.71, "grad_norm": 0.9033350716520201, "learning_rate": 4.157984054262205e-06, "loss": 0.5468, "step": 8707 }, { "epoch": 0.71, "grad_norm": 1.01327241202795, "learning_rate": 4.155847719445669e-06, "loss": 0.5685, "step": 8708 }, { "epoch": 0.71, "grad_norm": 0.8969467151084107, "learning_rate": 4.1537117896237026e-06, "loss": 0.4953, "step": 8709 }, { "epoch": 0.71, "grad_norm": 0.8190720523141264, "learning_rate": 4.151576264944326e-06, "loss": 0.5048, "step": 8710 }, { "epoch": 0.71, "grad_norm": 0.8853197781726923, "learning_rate": 4.14944114555552e-06, "loss": 0.511, "step": 8711 }, { "epoch": 0.71, "grad_norm": 0.9285788886413802, "learning_rate": 4.14730643160526e-06, "loss": 0.5235, "step": 8712 }, { "epoch": 0.71, "grad_norm": 0.9257368811326977, "learning_rate": 4.14517212324147e-06, "loss": 0.4874, "step": 8713 }, { "epoch": 0.71, "grad_norm": 0.881845676784708, "learning_rate": 4.143038220612058e-06, "loss": 0.4813, "step": 8714 }, { "epoch": 0.71, "grad_norm": 0.9902780238114853, "learning_rate": 4.140904723864903e-06, "loss": 0.5445, "step": 8715 }, { "epoch": 0.71, "grad_norm": 1.04868441240058, "learning_rate": 4.138771633147856e-06, "loss": 0.508, "step": 8716 }, { "epoch": 0.71, "grad_norm": 0.8659370475684836, "learning_rate": 4.13663894860873e-06, "loss": 0.5388, "step": 8717 }, { "epoch": 0.71, "grad_norm": 0.8893485951346831, "learning_rate": 4.13450667039533e-06, "loss": 0.4901, "step": 8718 }, { "epoch": 0.71, "grad_norm": 0.8054311327775409, "learning_rate": 4.132374798655413e-06, "loss": 0.4823, "step": 8719 }, { "epoch": 0.71, "grad_norm": 1.0250186548566913, "learning_rate": 4.130243333536718e-06, "loss": 0.5844, "step": 8720 }, { "epoch": 0.71, "grad_norm": 0.9198453007877438, "learning_rate": 4.128112275186952e-06, "loss": 0.5061, "step": 8721 }, { "epoch": 0.71, "grad_norm": 0.9542831606083931, "learning_rate": 4.125981623753801e-06, "loss": 0.5177, "step": 8722 }, { "epoch": 0.71, "grad_norm": 0.9138929453969917, "learning_rate": 4.1238513793849065e-06, "loss": 0.4643, "step": 8723 }, { "epoch": 0.71, "grad_norm": 1.008733247589399, "learning_rate": 4.121721542227906e-06, "loss": 0.4846, "step": 8724 }, { "epoch": 0.71, "grad_norm": 0.9343475870206165, "learning_rate": 4.1195921124303864e-06, "loss": 0.5228, "step": 8725 }, { "epoch": 0.71, "grad_norm": 0.9956146315203084, "learning_rate": 4.117463090139916e-06, "loss": 0.5167, "step": 8726 }, { "epoch": 0.71, "grad_norm": 0.9457657258776898, "learning_rate": 4.1153344755040355e-06, "loss": 0.5152, "step": 8727 }, { "epoch": 0.71, "grad_norm": 0.8830292824760735, "learning_rate": 4.11320626867026e-06, "loss": 0.4626, "step": 8728 }, { "epoch": 0.71, "grad_norm": 0.9490427326245271, "learning_rate": 4.111078469786062e-06, "loss": 0.4836, "step": 8729 }, { "epoch": 0.71, "grad_norm": 0.9092871656552273, "learning_rate": 4.10895107899891e-06, "loss": 0.458, "step": 8730 }, { "epoch": 0.71, "grad_norm": 0.93005774304165, "learning_rate": 4.106824096456217e-06, "loss": 0.4641, "step": 8731 }, { "epoch": 0.71, "grad_norm": 0.8860861090356404, "learning_rate": 4.104697522305388e-06, "loss": 0.5386, "step": 8732 }, { "epoch": 0.71, "grad_norm": 0.846867975757357, "learning_rate": 4.102571356693793e-06, "loss": 0.4506, "step": 8733 }, { "epoch": 0.71, "grad_norm": 0.9016018547200599, "learning_rate": 4.100445599768774e-06, "loss": 0.4755, "step": 8734 }, { "epoch": 0.71, "grad_norm": 0.9656100627050762, "learning_rate": 4.098320251677637e-06, "loss": 0.5042, "step": 8735 }, { "epoch": 0.71, "grad_norm": 0.8760587893762447, "learning_rate": 4.096195312567677e-06, "loss": 0.5041, "step": 8736 }, { "epoch": 0.71, "grad_norm": 0.9051519692958128, "learning_rate": 4.094070782586141e-06, "loss": 0.4741, "step": 8737 }, { "epoch": 0.71, "grad_norm": 0.9101216832063238, "learning_rate": 4.091946661880262e-06, "loss": 0.5247, "step": 8738 }, { "epoch": 0.71, "grad_norm": 0.9338495810228892, "learning_rate": 4.089822950597239e-06, "loss": 0.4601, "step": 8739 }, { "epoch": 0.71, "grad_norm": 0.9623678853244182, "learning_rate": 4.087699648884248e-06, "loss": 0.5257, "step": 8740 }, { "epoch": 0.71, "grad_norm": 1.3681873907347593, "learning_rate": 4.085576756888418e-06, "loss": 0.49, "step": 8741 }, { "epoch": 0.71, "grad_norm": 0.8947410753937106, "learning_rate": 4.083454274756881e-06, "loss": 0.5437, "step": 8742 }, { "epoch": 0.71, "grad_norm": 0.8560471468894103, "learning_rate": 4.081332202636711e-06, "loss": 0.553, "step": 8743 }, { "epoch": 0.71, "grad_norm": 0.8614050583496167, "learning_rate": 4.07921054067497e-06, "loss": 0.5101, "step": 8744 }, { "epoch": 0.71, "grad_norm": 0.8858829368669646, "learning_rate": 4.0770892890186854e-06, "loss": 0.5019, "step": 8745 }, { "epoch": 0.71, "grad_norm": 0.8358142772584893, "learning_rate": 4.074968447814865e-06, "loss": 0.4682, "step": 8746 }, { "epoch": 0.71, "grad_norm": 1.008198929727442, "learning_rate": 4.072848017210467e-06, "loss": 0.4819, "step": 8747 }, { "epoch": 0.71, "grad_norm": 0.831564582604135, "learning_rate": 4.070727997352451e-06, "loss": 0.4379, "step": 8748 }, { "epoch": 0.71, "grad_norm": 0.9825779377624262, "learning_rate": 4.068608388387722e-06, "loss": 0.5053, "step": 8749 }, { "epoch": 0.71, "grad_norm": 1.0133746817461498, "learning_rate": 4.066489190463171e-06, "loss": 0.4938, "step": 8750 }, { "epoch": 0.71, "grad_norm": 1.0040275358581465, "learning_rate": 4.0643704037256556e-06, "loss": 0.5035, "step": 8751 }, { "epoch": 0.71, "grad_norm": 0.8856134943574181, "learning_rate": 4.0622520283220115e-06, "loss": 0.491, "step": 8752 }, { "epoch": 0.71, "grad_norm": 0.8965321999288322, "learning_rate": 4.060134064399026e-06, "loss": 0.4345, "step": 8753 }, { "epoch": 0.71, "grad_norm": 0.8186774390161647, "learning_rate": 4.05801651210349e-06, "loss": 0.4117, "step": 8754 }, { "epoch": 0.71, "grad_norm": 1.0365583157244873, "learning_rate": 4.0558993715821335e-06, "loss": 0.5259, "step": 8755 }, { "epoch": 0.71, "grad_norm": 0.8711117721422899, "learning_rate": 4.053782642981679e-06, "loss": 0.4699, "step": 8756 }, { "epoch": 0.71, "grad_norm": 0.8808213604888707, "learning_rate": 4.0516663264488145e-06, "loss": 0.4718, "step": 8757 }, { "epoch": 0.71, "grad_norm": 0.9377918559791827, "learning_rate": 4.049550422130196e-06, "loss": 0.4345, "step": 8758 }, { "epoch": 0.71, "grad_norm": 0.8839879672109973, "learning_rate": 4.047434930172456e-06, "loss": 0.5011, "step": 8759 }, { "epoch": 0.71, "grad_norm": 0.8693425125764165, "learning_rate": 4.045319850722198e-06, "loss": 0.4625, "step": 8760 }, { "epoch": 0.71, "grad_norm": 0.9119674025694322, "learning_rate": 4.04320518392599e-06, "loss": 0.5342, "step": 8761 }, { "epoch": 0.71, "grad_norm": 1.7183687244172643, "learning_rate": 4.041090929930378e-06, "loss": 0.5476, "step": 8762 }, { "epoch": 0.71, "grad_norm": 0.9880235645483642, "learning_rate": 4.03897708888188e-06, "loss": 0.5248, "step": 8763 }, { "epoch": 0.71, "grad_norm": 0.871714760467819, "learning_rate": 4.036863660926982e-06, "loss": 0.4717, "step": 8764 }, { "epoch": 0.71, "grad_norm": 0.9458534642219324, "learning_rate": 4.0347506462121434e-06, "loss": 0.5022, "step": 8765 }, { "epoch": 0.71, "grad_norm": 0.893834833687743, "learning_rate": 4.032638044883796e-06, "loss": 0.4513, "step": 8766 }, { "epoch": 0.71, "grad_norm": 0.9164768678852234, "learning_rate": 4.0305258570883336e-06, "loss": 0.4428, "step": 8767 }, { "epoch": 0.71, "grad_norm": 0.9575711407404988, "learning_rate": 4.028414082972141e-06, "loss": 0.4718, "step": 8768 }, { "epoch": 0.71, "grad_norm": 1.1116004691640229, "learning_rate": 4.026302722681551e-06, "loss": 0.498, "step": 8769 }, { "epoch": 0.71, "grad_norm": 0.8936242693860171, "learning_rate": 4.024191776362884e-06, "loss": 0.4714, "step": 8770 }, { "epoch": 0.71, "grad_norm": 0.9006703505518066, "learning_rate": 4.022081244162428e-06, "loss": 0.5088, "step": 8771 }, { "epoch": 0.71, "grad_norm": 0.9518530865834943, "learning_rate": 4.019971126226442e-06, "loss": 0.5148, "step": 8772 }, { "epoch": 0.71, "grad_norm": 0.8983615821416384, "learning_rate": 4.017861422701144e-06, "loss": 0.5111, "step": 8773 }, { "epoch": 0.71, "grad_norm": 0.9018554754900465, "learning_rate": 4.015752133732752e-06, "loss": 0.5303, "step": 8774 }, { "epoch": 0.71, "grad_norm": 0.9615507125371773, "learning_rate": 4.013643259467426e-06, "loss": 0.5036, "step": 8775 }, { "epoch": 0.71, "grad_norm": 0.9221345131864341, "learning_rate": 4.011534800051311e-06, "loss": 0.509, "step": 8776 }, { "epoch": 0.71, "grad_norm": 0.9346443887263562, "learning_rate": 4.0094267556305236e-06, "loss": 0.5485, "step": 8777 }, { "epoch": 0.71, "grad_norm": 0.8919882717782461, "learning_rate": 4.0073191263511475e-06, "loss": 0.4993, "step": 8778 }, { "epoch": 0.71, "grad_norm": 0.9302969938319966, "learning_rate": 4.005211912359241e-06, "loss": 0.4702, "step": 8779 }, { "epoch": 0.71, "grad_norm": 0.9522390605496788, "learning_rate": 4.003105113800835e-06, "loss": 0.5209, "step": 8780 }, { "epoch": 0.71, "grad_norm": 0.8053579275810232, "learning_rate": 4.000998730821922e-06, "loss": 0.454, "step": 8781 }, { "epoch": 0.71, "grad_norm": 0.9899449056444444, "learning_rate": 3.998892763568476e-06, "loss": 0.5019, "step": 8782 }, { "epoch": 0.71, "grad_norm": 0.9943263088056388, "learning_rate": 3.996787212186438e-06, "loss": 0.5298, "step": 8783 }, { "epoch": 0.71, "grad_norm": 0.9242626375286663, "learning_rate": 3.994682076821721e-06, "loss": 0.501, "step": 8784 }, { "epoch": 0.71, "grad_norm": 0.9675664947615202, "learning_rate": 3.99257735762021e-06, "loss": 0.515, "step": 8785 }, { "epoch": 0.71, "grad_norm": 0.8811697102442329, "learning_rate": 3.990473054727764e-06, "loss": 0.4832, "step": 8786 }, { "epoch": 0.71, "grad_norm": 0.9777855258544456, "learning_rate": 3.988369168290199e-06, "loss": 0.5334, "step": 8787 }, { "epoch": 0.71, "grad_norm": 1.0989247905108661, "learning_rate": 3.98626569845332e-06, "loss": 0.5236, "step": 8788 }, { "epoch": 0.71, "grad_norm": 1.0143168723303095, "learning_rate": 3.984162645362893e-06, "loss": 0.5851, "step": 8789 }, { "epoch": 0.71, "grad_norm": 0.8706688945273519, "learning_rate": 3.98206000916466e-06, "loss": 0.5129, "step": 8790 }, { "epoch": 0.71, "grad_norm": 1.0117501135253928, "learning_rate": 3.97995779000433e-06, "loss": 0.4835, "step": 8791 }, { "epoch": 0.71, "grad_norm": 0.9361724235863483, "learning_rate": 3.977855988027585e-06, "loss": 0.5167, "step": 8792 }, { "epoch": 0.71, "grad_norm": 0.9441390871206049, "learning_rate": 3.975754603380082e-06, "loss": 0.52, "step": 8793 }, { "epoch": 0.71, "grad_norm": 0.9325045022867406, "learning_rate": 3.973653636207437e-06, "loss": 0.501, "step": 8794 }, { "epoch": 0.71, "grad_norm": 0.8737033160786428, "learning_rate": 3.971553086655251e-06, "loss": 0.5012, "step": 8795 }, { "epoch": 0.71, "grad_norm": 0.8631823070347903, "learning_rate": 3.969452954869089e-06, "loss": 0.5286, "step": 8796 }, { "epoch": 0.71, "grad_norm": 0.8817665001884151, "learning_rate": 3.967353240994487e-06, "loss": 0.4844, "step": 8797 }, { "epoch": 0.72, "grad_norm": 0.9297465007856393, "learning_rate": 3.9652539451769554e-06, "loss": 0.4972, "step": 8798 }, { "epoch": 0.72, "grad_norm": 1.0959093161662927, "learning_rate": 3.963155067561976e-06, "loss": 0.4475, "step": 8799 }, { "epoch": 0.72, "grad_norm": 0.8978766746333792, "learning_rate": 3.961056608294992e-06, "loss": 0.4925, "step": 8800 }, { "epoch": 0.72, "grad_norm": 0.9327436321983399, "learning_rate": 3.958958567521428e-06, "loss": 0.4779, "step": 8801 }, { "epoch": 0.72, "grad_norm": 0.923248650172493, "learning_rate": 3.956860945386677e-06, "loss": 0.4828, "step": 8802 }, { "epoch": 0.72, "grad_norm": 0.9605785520965624, "learning_rate": 3.954763742036103e-06, "loss": 0.5256, "step": 8803 }, { "epoch": 0.72, "grad_norm": 0.8488022308694663, "learning_rate": 3.952666957615039e-06, "loss": 0.4822, "step": 8804 }, { "epoch": 0.72, "grad_norm": 1.012607013833751, "learning_rate": 3.950570592268794e-06, "loss": 0.4769, "step": 8805 }, { "epoch": 0.72, "grad_norm": 0.8621331876085766, "learning_rate": 3.948474646142638e-06, "loss": 0.4088, "step": 8806 }, { "epoch": 0.72, "grad_norm": 0.9353129675040686, "learning_rate": 3.946379119381822e-06, "loss": 0.4666, "step": 8807 }, { "epoch": 0.72, "grad_norm": 0.928181887357087, "learning_rate": 3.9442840121315625e-06, "loss": 0.5957, "step": 8808 }, { "epoch": 0.72, "grad_norm": 0.8097859720618114, "learning_rate": 3.94218932453705e-06, "loss": 0.3988, "step": 8809 }, { "epoch": 0.72, "grad_norm": 0.8555817269549301, "learning_rate": 3.940095056743444e-06, "loss": 0.4944, "step": 8810 }, { "epoch": 0.72, "grad_norm": 0.8688428830218954, "learning_rate": 3.938001208895878e-06, "loss": 0.5357, "step": 8811 }, { "epoch": 0.72, "grad_norm": 0.8943758455626842, "learning_rate": 3.935907781139446e-06, "loss": 0.495, "step": 8812 }, { "epoch": 0.72, "grad_norm": 0.9526621111614739, "learning_rate": 3.933814773619232e-06, "loss": 0.4879, "step": 8813 }, { "epoch": 0.72, "grad_norm": 0.8697916155567965, "learning_rate": 3.93172218648027e-06, "loss": 0.474, "step": 8814 }, { "epoch": 0.72, "grad_norm": 1.009523382502148, "learning_rate": 3.929630019867579e-06, "loss": 0.4285, "step": 8815 }, { "epoch": 0.72, "grad_norm": 1.0902489236515984, "learning_rate": 3.927538273926141e-06, "loss": 0.4748, "step": 8816 }, { "epoch": 0.72, "grad_norm": 0.9224848262996478, "learning_rate": 3.92544694880092e-06, "loss": 0.4925, "step": 8817 }, { "epoch": 0.72, "grad_norm": 0.928690974890286, "learning_rate": 3.923356044636829e-06, "loss": 0.449, "step": 8818 }, { "epoch": 0.72, "grad_norm": 1.0201833658121, "learning_rate": 3.921265561578781e-06, "loss": 0.548, "step": 8819 }, { "epoch": 0.72, "grad_norm": 0.9215773732856094, "learning_rate": 3.919175499771635e-06, "loss": 0.4728, "step": 8820 }, { "epoch": 0.72, "grad_norm": 0.9183799946338584, "learning_rate": 3.917085859360234e-06, "loss": 0.4792, "step": 8821 }, { "epoch": 0.72, "grad_norm": 0.9020914043322761, "learning_rate": 3.9149966404893854e-06, "loss": 0.4599, "step": 8822 }, { "epoch": 0.72, "grad_norm": 1.0179660059920808, "learning_rate": 3.912907843303877e-06, "loss": 0.5157, "step": 8823 }, { "epoch": 0.72, "grad_norm": 0.8934013813007406, "learning_rate": 3.910819467948448e-06, "loss": 0.4946, "step": 8824 }, { "epoch": 0.72, "grad_norm": 0.9137460578656958, "learning_rate": 3.908731514567836e-06, "loss": 0.4284, "step": 8825 }, { "epoch": 0.72, "grad_norm": 0.8983840220601542, "learning_rate": 3.906643983306724e-06, "loss": 0.4622, "step": 8826 }, { "epoch": 0.72, "grad_norm": 0.8299218289902504, "learning_rate": 3.904556874309779e-06, "loss": 0.4446, "step": 8827 }, { "epoch": 0.72, "grad_norm": 0.8824793357068349, "learning_rate": 3.902470187721636e-06, "loss": 0.4705, "step": 8828 }, { "epoch": 0.72, "grad_norm": 0.9958803430976835, "learning_rate": 3.900383923686905e-06, "loss": 0.5064, "step": 8829 }, { "epoch": 0.72, "grad_norm": 0.908448281142226, "learning_rate": 3.898298082350149e-06, "loss": 0.5247, "step": 8830 }, { "epoch": 0.72, "grad_norm": 0.9807265834771793, "learning_rate": 3.896212663855932e-06, "loss": 0.5566, "step": 8831 }, { "epoch": 0.72, "grad_norm": 0.918857203642734, "learning_rate": 3.894127668348759e-06, "loss": 0.4687, "step": 8832 }, { "epoch": 0.72, "grad_norm": 0.9019988377097433, "learning_rate": 3.892043095973123e-06, "loss": 0.4991, "step": 8833 }, { "epoch": 0.72, "grad_norm": 0.9049263772974447, "learning_rate": 3.889958946873482e-06, "loss": 0.4594, "step": 8834 }, { "epoch": 0.72, "grad_norm": 1.1347811419110758, "learning_rate": 3.887875221194271e-06, "loss": 0.5158, "step": 8835 }, { "epoch": 0.72, "grad_norm": 0.827489198544077, "learning_rate": 3.885791919079878e-06, "loss": 0.4375, "step": 8836 }, { "epoch": 0.72, "grad_norm": 0.8444464095083356, "learning_rate": 3.883709040674688e-06, "loss": 0.4174, "step": 8837 }, { "epoch": 0.72, "grad_norm": 0.9260301120795823, "learning_rate": 3.881626586123034e-06, "loss": 0.4922, "step": 8838 }, { "epoch": 0.72, "grad_norm": 0.9377917778876699, "learning_rate": 3.8795445555692305e-06, "loss": 0.5823, "step": 8839 }, { "epoch": 0.72, "grad_norm": 0.9395718084838521, "learning_rate": 3.87746294915756e-06, "loss": 0.5049, "step": 8840 }, { "epoch": 0.72, "grad_norm": 0.9314406826381335, "learning_rate": 3.87538176703228e-06, "loss": 0.4928, "step": 8841 }, { "epoch": 0.72, "grad_norm": 0.9542956906849946, "learning_rate": 3.873301009337604e-06, "loss": 0.4691, "step": 8842 }, { "epoch": 0.72, "grad_norm": 0.8688695931633011, "learning_rate": 3.871220676217742e-06, "loss": 0.4676, "step": 8843 }, { "epoch": 0.72, "grad_norm": 0.986603792422116, "learning_rate": 3.869140767816846e-06, "loss": 0.5367, "step": 8844 }, { "epoch": 0.72, "grad_norm": 0.9600355737891579, "learning_rate": 3.867061284279058e-06, "loss": 0.5112, "step": 8845 }, { "epoch": 0.72, "grad_norm": 0.9573543631323532, "learning_rate": 3.864982225748481e-06, "loss": 0.5086, "step": 8846 }, { "epoch": 0.72, "grad_norm": 0.8153846929151147, "learning_rate": 3.862903592369199e-06, "loss": 0.455, "step": 8847 }, { "epoch": 0.72, "grad_norm": 0.9317872841841982, "learning_rate": 3.860825384285247e-06, "loss": 0.537, "step": 8848 }, { "epoch": 0.72, "grad_norm": 0.9100160868925283, "learning_rate": 3.858747601640658e-06, "loss": 0.4817, "step": 8849 }, { "epoch": 0.72, "grad_norm": 0.8958109692096803, "learning_rate": 3.856670244579409e-06, "loss": 0.4736, "step": 8850 }, { "epoch": 0.72, "grad_norm": 0.8956550140399889, "learning_rate": 3.854593313245463e-06, "loss": 0.4674, "step": 8851 }, { "epoch": 0.72, "grad_norm": 0.9936883431069679, "learning_rate": 3.852516807782749e-06, "loss": 0.5141, "step": 8852 }, { "epoch": 0.72, "grad_norm": 0.9475637588463383, "learning_rate": 3.850440728335171e-06, "loss": 0.4938, "step": 8853 }, { "epoch": 0.72, "grad_norm": 0.9297870299422121, "learning_rate": 3.848365075046589e-06, "loss": 0.526, "step": 8854 }, { "epoch": 0.72, "grad_norm": 0.8497992598755991, "learning_rate": 3.846289848060858e-06, "loss": 0.4832, "step": 8855 }, { "epoch": 0.72, "grad_norm": 0.9046064998492238, "learning_rate": 3.844215047521779e-06, "loss": 0.5173, "step": 8856 }, { "epoch": 0.72, "grad_norm": 1.0890530082437015, "learning_rate": 3.842140673573136e-06, "loss": 0.5348, "step": 8857 }, { "epoch": 0.72, "grad_norm": 1.002209227608526, "learning_rate": 3.840066726358683e-06, "loss": 0.4426, "step": 8858 }, { "epoch": 0.72, "grad_norm": 0.8119903241278402, "learning_rate": 3.837993206022146e-06, "loss": 0.4799, "step": 8859 }, { "epoch": 0.72, "grad_norm": 0.9668063740346428, "learning_rate": 3.8359201127072065e-06, "loss": 0.524, "step": 8860 }, { "epoch": 0.72, "grad_norm": 0.8836622588003586, "learning_rate": 3.8338474465575425e-06, "loss": 0.5279, "step": 8861 }, { "epoch": 0.72, "grad_norm": 0.9555312989641757, "learning_rate": 3.831775207716778e-06, "loss": 0.4856, "step": 8862 }, { "epoch": 0.72, "grad_norm": 0.9065472201631909, "learning_rate": 3.82970339632852e-06, "loss": 0.5127, "step": 8863 }, { "epoch": 0.72, "grad_norm": 0.8870497018587068, "learning_rate": 3.827632012536344e-06, "loss": 0.4403, "step": 8864 }, { "epoch": 0.72, "grad_norm": 0.8824030645406618, "learning_rate": 3.825561056483798e-06, "loss": 0.5422, "step": 8865 }, { "epoch": 0.72, "grad_norm": 0.8760984152703735, "learning_rate": 3.823490528314387e-06, "loss": 0.4916, "step": 8866 }, { "epoch": 0.72, "grad_norm": 0.9023598070151023, "learning_rate": 3.821420428171611e-06, "loss": 0.5103, "step": 8867 }, { "epoch": 0.72, "grad_norm": 0.9606187506813286, "learning_rate": 3.819350756198915e-06, "loss": 0.5248, "step": 8868 }, { "epoch": 0.72, "grad_norm": 0.9106250445068397, "learning_rate": 3.81728151253973e-06, "loss": 0.3984, "step": 8869 }, { "epoch": 0.72, "grad_norm": 0.8406825255854373, "learning_rate": 3.815212697337451e-06, "loss": 0.533, "step": 8870 }, { "epoch": 0.72, "grad_norm": 0.8989819504597925, "learning_rate": 3.8131443107354503e-06, "loss": 0.5037, "step": 8871 }, { "epoch": 0.72, "grad_norm": 0.7929612535636658, "learning_rate": 3.8110763528770543e-06, "loss": 0.3884, "step": 8872 }, { "epoch": 0.72, "grad_norm": 0.814849985403055, "learning_rate": 3.8090088239055843e-06, "loss": 0.4299, "step": 8873 }, { "epoch": 0.72, "grad_norm": 0.857194615734218, "learning_rate": 3.8069417239643082e-06, "loss": 0.4467, "step": 8874 }, { "epoch": 0.72, "grad_norm": 0.8907631497574886, "learning_rate": 3.804875053196477e-06, "loss": 0.4932, "step": 8875 }, { "epoch": 0.72, "grad_norm": 0.8830049019992933, "learning_rate": 3.80280881174531e-06, "loss": 0.486, "step": 8876 }, { "epoch": 0.72, "grad_norm": 0.9476380674101876, "learning_rate": 3.800742999753999e-06, "loss": 0.4813, "step": 8877 }, { "epoch": 0.72, "grad_norm": 0.9612638714238807, "learning_rate": 3.7986776173656927e-06, "loss": 0.4955, "step": 8878 }, { "epoch": 0.72, "grad_norm": 0.9275579157713626, "learning_rate": 3.7966126647235326e-06, "loss": 0.4744, "step": 8879 }, { "epoch": 0.72, "grad_norm": 0.9605038116462703, "learning_rate": 3.79454814197061e-06, "loss": 0.4821, "step": 8880 }, { "epoch": 0.72, "grad_norm": 0.871832545094725, "learning_rate": 3.792484049249996e-06, "loss": 0.5148, "step": 8881 }, { "epoch": 0.72, "grad_norm": 1.0045689213531521, "learning_rate": 3.790420386704733e-06, "loss": 0.5913, "step": 8882 }, { "epoch": 0.72, "grad_norm": 0.8229711090759823, "learning_rate": 3.788357154477831e-06, "loss": 0.4665, "step": 8883 }, { "epoch": 0.72, "grad_norm": 0.92957969362573, "learning_rate": 3.786294352712262e-06, "loss": 0.4311, "step": 8884 }, { "epoch": 0.72, "grad_norm": 0.8949166464230011, "learning_rate": 3.784231981550991e-06, "loss": 0.4892, "step": 8885 }, { "epoch": 0.72, "grad_norm": 0.9962607277158397, "learning_rate": 3.782170041136922e-06, "loss": 0.5495, "step": 8886 }, { "epoch": 0.72, "grad_norm": 0.958883598870349, "learning_rate": 3.7801085316129615e-06, "loss": 0.5129, "step": 8887 }, { "epoch": 0.72, "grad_norm": 0.9222815789807826, "learning_rate": 3.778047453121958e-06, "loss": 0.5116, "step": 8888 }, { "epoch": 0.72, "grad_norm": 1.030209109733231, "learning_rate": 3.7759868058067483e-06, "loss": 0.5329, "step": 8889 }, { "epoch": 0.72, "grad_norm": 1.0035102607125828, "learning_rate": 3.773926589810133e-06, "loss": 0.4956, "step": 8890 }, { "epoch": 0.72, "grad_norm": 0.8946525169233979, "learning_rate": 3.7718668052748842e-06, "loss": 0.4211, "step": 8891 }, { "epoch": 0.72, "grad_norm": 0.7977790198384506, "learning_rate": 3.7698074523437355e-06, "loss": 0.4643, "step": 8892 }, { "epoch": 0.72, "grad_norm": 1.0180235740982717, "learning_rate": 3.7677485311594107e-06, "loss": 0.5146, "step": 8893 }, { "epoch": 0.72, "grad_norm": 0.9956941872006838, "learning_rate": 3.7656900418645826e-06, "loss": 0.5284, "step": 8894 }, { "epoch": 0.72, "grad_norm": 0.8680531550447771, "learning_rate": 3.763631984601903e-06, "loss": 0.4795, "step": 8895 }, { "epoch": 0.72, "grad_norm": 0.8941870294776818, "learning_rate": 3.7615743595139965e-06, "loss": 0.4795, "step": 8896 }, { "epoch": 0.72, "grad_norm": 0.911033857549809, "learning_rate": 3.759517166743456e-06, "loss": 0.5178, "step": 8897 }, { "epoch": 0.72, "grad_norm": 0.9492113344650935, "learning_rate": 3.7574604064328336e-06, "loss": 0.4921, "step": 8898 }, { "epoch": 0.72, "grad_norm": 0.877478301058727, "learning_rate": 3.7554040787246746e-06, "loss": 0.4545, "step": 8899 }, { "epoch": 0.72, "grad_norm": 0.9549658963774811, "learning_rate": 3.7533481837614717e-06, "loss": 0.5422, "step": 8900 }, { "epoch": 0.72, "grad_norm": 0.8911457825497752, "learning_rate": 3.7512927216856987e-06, "loss": 0.5134, "step": 8901 }, { "epoch": 0.72, "grad_norm": 0.8854910809568246, "learning_rate": 3.7492376926397966e-06, "loss": 0.4683, "step": 8902 }, { "epoch": 0.72, "grad_norm": 0.9407344486002542, "learning_rate": 3.7471830967661815e-06, "loss": 0.489, "step": 8903 }, { "epoch": 0.72, "grad_norm": 0.8656497076194637, "learning_rate": 3.745128934207225e-06, "loss": 0.4724, "step": 8904 }, { "epoch": 0.72, "grad_norm": 0.8441131056004215, "learning_rate": 3.743075205105292e-06, "loss": 0.4291, "step": 8905 }, { "epoch": 0.72, "grad_norm": 0.9530959715206083, "learning_rate": 3.7410219096026944e-06, "loss": 0.5696, "step": 8906 }, { "epoch": 0.72, "grad_norm": 0.9786935110086086, "learning_rate": 3.7389690478417273e-06, "loss": 0.4892, "step": 8907 }, { "epoch": 0.72, "grad_norm": 0.9564410548228618, "learning_rate": 3.7369166199646502e-06, "loss": 0.492, "step": 8908 }, { "epoch": 0.72, "grad_norm": 0.9658684056753355, "learning_rate": 3.7348646261137e-06, "loss": 0.5069, "step": 8909 }, { "epoch": 0.72, "grad_norm": 0.8958230878728628, "learning_rate": 3.732813066431068e-06, "loss": 0.4948, "step": 8910 }, { "epoch": 0.72, "grad_norm": 0.9869591315367217, "learning_rate": 3.730761941058938e-06, "loss": 0.5447, "step": 8911 }, { "epoch": 0.72, "grad_norm": 0.92782933804282, "learning_rate": 3.7287112501394406e-06, "loss": 0.5225, "step": 8912 }, { "epoch": 0.72, "grad_norm": 0.9217797291013208, "learning_rate": 3.7266609938146912e-06, "loss": 0.4856, "step": 8913 }, { "epoch": 0.72, "grad_norm": 0.9822645901036696, "learning_rate": 3.724611172226771e-06, "loss": 0.457, "step": 8914 }, { "epoch": 0.72, "grad_norm": 0.8908297362107798, "learning_rate": 3.722561785517732e-06, "loss": 0.4382, "step": 8915 }, { "epoch": 0.72, "grad_norm": 1.0721806990810707, "learning_rate": 3.7205128338295884e-06, "loss": 0.4757, "step": 8916 }, { "epoch": 0.72, "grad_norm": 0.8454484712366738, "learning_rate": 3.718464317304341e-06, "loss": 0.455, "step": 8917 }, { "epoch": 0.72, "grad_norm": 0.8733536681919967, "learning_rate": 3.716416236083942e-06, "loss": 0.4972, "step": 8918 }, { "epoch": 0.72, "grad_norm": 0.9702493580649202, "learning_rate": 3.7143685903103242e-06, "loss": 0.5163, "step": 8919 }, { "epoch": 0.72, "grad_norm": 0.8858387912709786, "learning_rate": 3.7123213801253876e-06, "loss": 0.4982, "step": 8920 }, { "epoch": 0.73, "grad_norm": 0.9127452804890583, "learning_rate": 3.7102746056710025e-06, "loss": 0.4876, "step": 8921 }, { "epoch": 0.73, "grad_norm": 0.8992842307594613, "learning_rate": 3.708228267089008e-06, "loss": 0.5092, "step": 8922 }, { "epoch": 0.73, "grad_norm": 0.8843837452095896, "learning_rate": 3.706182364521217e-06, "loss": 0.4668, "step": 8923 }, { "epoch": 0.73, "grad_norm": 1.0186044909447514, "learning_rate": 3.704136898109403e-06, "loss": 0.5457, "step": 8924 }, { "epoch": 0.73, "grad_norm": 0.9663204881717411, "learning_rate": 3.7020918679953166e-06, "loss": 0.5576, "step": 8925 }, { "epoch": 0.73, "grad_norm": 0.9004867321863538, "learning_rate": 3.7000472743206773e-06, "loss": 0.4949, "step": 8926 }, { "epoch": 0.73, "grad_norm": 0.8848182522515886, "learning_rate": 3.698003117227175e-06, "loss": 0.5097, "step": 8927 }, { "epoch": 0.73, "grad_norm": 0.9150649446475405, "learning_rate": 3.6959593968564654e-06, "loss": 0.454, "step": 8928 }, { "epoch": 0.73, "grad_norm": 0.8919034910686628, "learning_rate": 3.6939161133501823e-06, "loss": 0.5015, "step": 8929 }, { "epoch": 0.73, "grad_norm": 0.953865934457871, "learning_rate": 3.691873266849916e-06, "loss": 0.5266, "step": 8930 }, { "epoch": 0.73, "grad_norm": 0.9061766912229933, "learning_rate": 3.6898308574972365e-06, "loss": 0.5061, "step": 8931 }, { "epoch": 0.73, "grad_norm": 1.1822247801890384, "learning_rate": 3.6877888854336808e-06, "loss": 0.5964, "step": 8932 }, { "epoch": 0.73, "grad_norm": 0.8468554061541979, "learning_rate": 3.6857473508007567e-06, "loss": 0.4568, "step": 8933 }, { "epoch": 0.73, "grad_norm": 0.9397863791139718, "learning_rate": 3.6837062537399414e-06, "loss": 0.5087, "step": 8934 }, { "epoch": 0.73, "grad_norm": 0.9095373378132872, "learning_rate": 3.6816655943926825e-06, "loss": 0.498, "step": 8935 }, { "epoch": 0.73, "grad_norm": 0.885010856329452, "learning_rate": 3.6796253729003905e-06, "loss": 0.5331, "step": 8936 }, { "epoch": 0.73, "grad_norm": 1.0204269253203586, "learning_rate": 3.6775855894044543e-06, "loss": 0.5541, "step": 8937 }, { "epoch": 0.73, "grad_norm": 0.947069376220401, "learning_rate": 3.6755462440462288e-06, "loss": 0.4936, "step": 8938 }, { "epoch": 0.73, "grad_norm": 0.9047457184730575, "learning_rate": 3.673507336967038e-06, "loss": 0.5206, "step": 8939 }, { "epoch": 0.73, "grad_norm": 1.045100952717416, "learning_rate": 3.6714688683081778e-06, "loss": 0.4995, "step": 8940 }, { "epoch": 0.73, "grad_norm": 0.9339554233023288, "learning_rate": 3.669430838210911e-06, "loss": 0.4846, "step": 8941 }, { "epoch": 0.73, "grad_norm": 0.9311600655124447, "learning_rate": 3.6673932468164763e-06, "loss": 0.5243, "step": 8942 }, { "epoch": 0.73, "grad_norm": 0.9600652156787634, "learning_rate": 3.6653560942660694e-06, "loss": 0.5025, "step": 8943 }, { "epoch": 0.73, "grad_norm": 0.875907664889498, "learning_rate": 3.663319380700865e-06, "loss": 0.4649, "step": 8944 }, { "epoch": 0.73, "grad_norm": 0.9481821720808001, "learning_rate": 3.661283106262008e-06, "loss": 0.4747, "step": 8945 }, { "epoch": 0.73, "grad_norm": 1.0168558120772304, "learning_rate": 3.659247271090609e-06, "loss": 0.4481, "step": 8946 }, { "epoch": 0.73, "grad_norm": 0.9162316079435071, "learning_rate": 3.6572118753277495e-06, "loss": 0.4684, "step": 8947 }, { "epoch": 0.73, "grad_norm": 0.9225979331389788, "learning_rate": 3.655176919114485e-06, "loss": 0.4973, "step": 8948 }, { "epoch": 0.73, "grad_norm": 0.9014094862874904, "learning_rate": 3.6531424025918284e-06, "loss": 0.5053, "step": 8949 }, { "epoch": 0.73, "grad_norm": 0.970179655664484, "learning_rate": 3.651108325900773e-06, "loss": 0.466, "step": 8950 }, { "epoch": 0.73, "grad_norm": 1.032032574074345, "learning_rate": 3.6490746891822806e-06, "loss": 0.5263, "step": 8951 }, { "epoch": 0.73, "grad_norm": 0.9129906619640558, "learning_rate": 3.647041492577278e-06, "loss": 0.5223, "step": 8952 }, { "epoch": 0.73, "grad_norm": 0.8550106008853208, "learning_rate": 3.645008736226664e-06, "loss": 0.4532, "step": 8953 }, { "epoch": 0.73, "grad_norm": 0.801384404024512, "learning_rate": 3.6429764202713124e-06, "loss": 0.4851, "step": 8954 }, { "epoch": 0.73, "grad_norm": 0.9889555676924645, "learning_rate": 3.6409445448520533e-06, "loss": 0.5049, "step": 8955 }, { "epoch": 0.73, "grad_norm": 0.9403531883891941, "learning_rate": 3.6389131101096953e-06, "loss": 0.536, "step": 8956 }, { "epoch": 0.73, "grad_norm": 1.0502677111502992, "learning_rate": 3.6368821161850176e-06, "loss": 0.5614, "step": 8957 }, { "epoch": 0.73, "grad_norm": 0.8630171779379783, "learning_rate": 3.6348515632187643e-06, "loss": 0.4034, "step": 8958 }, { "epoch": 0.73, "grad_norm": 0.9047027156195153, "learning_rate": 3.6328214513516523e-06, "loss": 0.4744, "step": 8959 }, { "epoch": 0.73, "grad_norm": 0.916757086523125, "learning_rate": 3.6307917807243697e-06, "loss": 0.4842, "step": 8960 }, { "epoch": 0.73, "grad_norm": 0.9940495108617984, "learning_rate": 3.6287625514775602e-06, "loss": 0.5406, "step": 8961 }, { "epoch": 0.73, "grad_norm": 0.9253795932773297, "learning_rate": 3.626733763751861e-06, "loss": 0.4978, "step": 8962 }, { "epoch": 0.73, "grad_norm": 0.9150118745581817, "learning_rate": 3.624705417687856e-06, "loss": 0.4843, "step": 8963 }, { "epoch": 0.73, "grad_norm": 0.8946597973175461, "learning_rate": 3.6226775134261106e-06, "loss": 0.455, "step": 8964 }, { "epoch": 0.73, "grad_norm": 0.8485633591778271, "learning_rate": 3.620650051107156e-06, "loss": 0.5026, "step": 8965 }, { "epoch": 0.73, "grad_norm": 0.8989951788879078, "learning_rate": 3.6186230308714985e-06, "loss": 0.4281, "step": 8966 }, { "epoch": 0.73, "grad_norm": 0.9932172971365646, "learning_rate": 3.6165964528595988e-06, "loss": 0.4487, "step": 8967 }, { "epoch": 0.73, "grad_norm": 0.9419138818036944, "learning_rate": 3.6145703172119085e-06, "loss": 0.5094, "step": 8968 }, { "epoch": 0.73, "grad_norm": 1.0242264441554165, "learning_rate": 3.6125446240688276e-06, "loss": 0.533, "step": 8969 }, { "epoch": 0.73, "grad_norm": 0.9880175132776277, "learning_rate": 3.61051937357074e-06, "loss": 0.5336, "step": 8970 }, { "epoch": 0.73, "grad_norm": 0.9623432157150716, "learning_rate": 3.6084945658579918e-06, "loss": 0.5216, "step": 8971 }, { "epoch": 0.73, "grad_norm": 0.9547697834135409, "learning_rate": 3.606470201070904e-06, "loss": 0.4688, "step": 8972 }, { "epoch": 0.73, "grad_norm": 0.9583453259256163, "learning_rate": 3.6044462793497526e-06, "loss": 0.5612, "step": 8973 }, { "epoch": 0.73, "grad_norm": 0.9720258126741251, "learning_rate": 3.6024228008348096e-06, "loss": 0.4992, "step": 8974 }, { "epoch": 0.73, "grad_norm": 0.9474217582347544, "learning_rate": 3.600399765666287e-06, "loss": 0.5311, "step": 8975 }, { "epoch": 0.73, "grad_norm": 0.9902690886560699, "learning_rate": 3.5983771739843855e-06, "loss": 0.5334, "step": 8976 }, { "epoch": 0.73, "grad_norm": 0.9613087078021019, "learning_rate": 3.596355025929267e-06, "loss": 0.4961, "step": 8977 }, { "epoch": 0.73, "grad_norm": 0.9757830970674216, "learning_rate": 3.594333321641068e-06, "loss": 0.5468, "step": 8978 }, { "epoch": 0.73, "grad_norm": 0.9347073297449352, "learning_rate": 3.5923120612598828e-06, "loss": 0.4568, "step": 8979 }, { "epoch": 0.73, "grad_norm": 0.9029241676866938, "learning_rate": 3.590291244925793e-06, "loss": 0.4688, "step": 8980 }, { "epoch": 0.73, "grad_norm": 0.8488345626658037, "learning_rate": 3.588270872778833e-06, "loss": 0.4678, "step": 8981 }, { "epoch": 0.73, "grad_norm": 0.9033708830753971, "learning_rate": 3.5862509449590135e-06, "loss": 0.4801, "step": 8982 }, { "epoch": 0.73, "grad_norm": 0.9888788145320088, "learning_rate": 3.5842314616063134e-06, "loss": 0.5105, "step": 8983 }, { "epoch": 0.73, "grad_norm": 0.8813975859723068, "learning_rate": 3.582212422860687e-06, "loss": 0.4764, "step": 8984 }, { "epoch": 0.73, "grad_norm": 1.0035285428927048, "learning_rate": 3.5801938288620395e-06, "loss": 0.4795, "step": 8985 }, { "epoch": 0.73, "grad_norm": 0.9402018886597426, "learning_rate": 3.5781756797502733e-06, "loss": 0.4551, "step": 8986 }, { "epoch": 0.73, "grad_norm": 0.9275798940011335, "learning_rate": 3.576157975665232e-06, "loss": 0.5709, "step": 8987 }, { "epoch": 0.73, "grad_norm": 0.88546274771075, "learning_rate": 3.5741407167467444e-06, "loss": 0.4388, "step": 8988 }, { "epoch": 0.73, "grad_norm": 0.8964212413484605, "learning_rate": 3.5721239031346067e-06, "loss": 0.4633, "step": 8989 }, { "epoch": 0.73, "grad_norm": 0.8716362942550768, "learning_rate": 3.5701075349685842e-06, "loss": 0.4732, "step": 8990 }, { "epoch": 0.73, "grad_norm": 0.872765870527491, "learning_rate": 3.568091612388399e-06, "loss": 0.4433, "step": 8991 }, { "epoch": 0.73, "grad_norm": 0.8806035542732507, "learning_rate": 3.566076135533767e-06, "loss": 0.4762, "step": 8992 }, { "epoch": 0.73, "grad_norm": 0.9063579768886735, "learning_rate": 3.5640611045443485e-06, "loss": 0.5312, "step": 8993 }, { "epoch": 0.73, "grad_norm": 1.1252243359435834, "learning_rate": 3.5620465195597865e-06, "loss": 0.4534, "step": 8994 }, { "epoch": 0.73, "grad_norm": 0.9411728999230131, "learning_rate": 3.5600323807196912e-06, "loss": 0.4822, "step": 8995 }, { "epoch": 0.73, "grad_norm": 0.9028802160371076, "learning_rate": 3.5580186881636414e-06, "loss": 0.519, "step": 8996 }, { "epoch": 0.73, "grad_norm": 0.918337987256864, "learning_rate": 3.5560054420311776e-06, "loss": 0.4788, "step": 8997 }, { "epoch": 0.73, "grad_norm": 0.9989192754355186, "learning_rate": 3.5539926424618265e-06, "loss": 0.5386, "step": 8998 }, { "epoch": 0.73, "grad_norm": 0.838803831436354, "learning_rate": 3.551980289595064e-06, "loss": 0.4483, "step": 8999 }, { "epoch": 0.73, "grad_norm": 0.8445468941893491, "learning_rate": 3.5499683835703493e-06, "loss": 0.4687, "step": 9000 }, { "epoch": 0.73, "grad_norm": 0.848857920123972, "learning_rate": 3.547956924527103e-06, "loss": 0.5277, "step": 9001 }, { "epoch": 0.73, "grad_norm": 0.8680873155519754, "learning_rate": 3.5459459126047226e-06, "loss": 0.4873, "step": 9002 }, { "epoch": 0.73, "grad_norm": 0.9469250103942338, "learning_rate": 3.5439353479425597e-06, "loss": 0.5117, "step": 9003 }, { "epoch": 0.73, "grad_norm": 1.0264133283896932, "learning_rate": 3.5419252306799567e-06, "loss": 0.5101, "step": 9004 }, { "epoch": 0.73, "grad_norm": 0.8621328186046997, "learning_rate": 3.539915560956204e-06, "loss": 0.4558, "step": 9005 }, { "epoch": 0.73, "grad_norm": 0.8616535313317472, "learning_rate": 3.5379063389105727e-06, "loss": 0.4193, "step": 9006 }, { "epoch": 0.73, "grad_norm": 0.8940689750239936, "learning_rate": 3.5358975646823002e-06, "loss": 0.5044, "step": 9007 }, { "epoch": 0.73, "grad_norm": 0.9983698752868682, "learning_rate": 3.533889238410596e-06, "loss": 0.5353, "step": 9008 }, { "epoch": 0.73, "grad_norm": 1.1563379381969516, "learning_rate": 3.5318813602346257e-06, "loss": 0.5731, "step": 9009 }, { "epoch": 0.73, "grad_norm": 0.8336786353501896, "learning_rate": 3.529873930293546e-06, "loss": 0.457, "step": 9010 }, { "epoch": 0.73, "grad_norm": 0.9314020854618265, "learning_rate": 3.5278669487264583e-06, "loss": 0.5305, "step": 9011 }, { "epoch": 0.73, "grad_norm": 0.9373727014930834, "learning_rate": 3.525860415672456e-06, "loss": 0.5118, "step": 9012 }, { "epoch": 0.73, "grad_norm": 1.036601676300659, "learning_rate": 3.523854331270582e-06, "loss": 0.5551, "step": 9013 }, { "epoch": 0.73, "grad_norm": 0.9470780237472647, "learning_rate": 3.5218486956598573e-06, "loss": 0.4952, "step": 9014 }, { "epoch": 0.73, "grad_norm": 0.9371280542011889, "learning_rate": 3.5198435089792726e-06, "loss": 0.5261, "step": 9015 }, { "epoch": 0.73, "grad_norm": 0.9707691354773395, "learning_rate": 3.5178387713677886e-06, "loss": 0.4942, "step": 9016 }, { "epoch": 0.73, "grad_norm": 0.9204704483209045, "learning_rate": 3.515834482964321e-06, "loss": 0.5014, "step": 9017 }, { "epoch": 0.73, "grad_norm": 0.8861801504688388, "learning_rate": 3.5138306439077784e-06, "loss": 0.4825, "step": 9018 }, { "epoch": 0.73, "grad_norm": 0.8744753752055189, "learning_rate": 3.5118272543370157e-06, "loss": 0.4632, "step": 9019 }, { "epoch": 0.73, "grad_norm": 0.9304866500057495, "learning_rate": 3.5098243143908694e-06, "loss": 0.5482, "step": 9020 }, { "epoch": 0.73, "grad_norm": 0.9796429237511085, "learning_rate": 3.507821824208142e-06, "loss": 0.5174, "step": 9021 }, { "epoch": 0.73, "grad_norm": 0.8162021792821621, "learning_rate": 3.5058197839276064e-06, "loss": 0.4494, "step": 9022 }, { "epoch": 0.73, "grad_norm": 0.8470310632620339, "learning_rate": 3.5038181936879932e-06, "loss": 0.4983, "step": 9023 }, { "epoch": 0.73, "grad_norm": 0.9326230422525369, "learning_rate": 3.5018170536280237e-06, "loss": 0.4876, "step": 9024 }, { "epoch": 0.73, "grad_norm": 1.081302621542103, "learning_rate": 3.4998163638863646e-06, "loss": 0.5806, "step": 9025 }, { "epoch": 0.73, "grad_norm": 0.8398561267240168, "learning_rate": 3.4978161246016664e-06, "loss": 0.5022, "step": 9026 }, { "epoch": 0.73, "grad_norm": 1.008510871462587, "learning_rate": 3.495816335912543e-06, "loss": 0.5308, "step": 9027 }, { "epoch": 0.73, "grad_norm": 0.813490241806617, "learning_rate": 3.493816997957582e-06, "loss": 0.3922, "step": 9028 }, { "epoch": 0.73, "grad_norm": 0.8522965620275039, "learning_rate": 3.4918181108753247e-06, "loss": 0.447, "step": 9029 }, { "epoch": 0.73, "grad_norm": 1.0595297523859928, "learning_rate": 3.4898196748043065e-06, "loss": 0.4964, "step": 9030 }, { "epoch": 0.73, "grad_norm": 0.9004845069622794, "learning_rate": 3.4878216898830074e-06, "loss": 0.4881, "step": 9031 }, { "epoch": 0.73, "grad_norm": 0.9295585565321477, "learning_rate": 3.4858241562498884e-06, "loss": 0.5394, "step": 9032 }, { "epoch": 0.73, "grad_norm": 0.9078302483781503, "learning_rate": 3.4838270740433776e-06, "loss": 0.4755, "step": 9033 }, { "epoch": 0.73, "grad_norm": 0.891665135254238, "learning_rate": 3.4818304434018734e-06, "loss": 0.4798, "step": 9034 }, { "epoch": 0.73, "grad_norm": 0.8578424459037337, "learning_rate": 3.4798342644637327e-06, "loss": 0.5138, "step": 9035 }, { "epoch": 0.73, "grad_norm": 0.8970354823693978, "learning_rate": 3.4778385373672996e-06, "loss": 0.4402, "step": 9036 }, { "epoch": 0.73, "grad_norm": 0.8832316717638571, "learning_rate": 3.4758432622508677e-06, "loss": 0.4979, "step": 9037 }, { "epoch": 0.73, "grad_norm": 0.8935845014449028, "learning_rate": 3.4738484392527107e-06, "loss": 0.4677, "step": 9038 }, { "epoch": 0.73, "grad_norm": 0.9709035988811506, "learning_rate": 3.471854068511068e-06, "loss": 0.4925, "step": 9039 }, { "epoch": 0.73, "grad_norm": 0.99935079521221, "learning_rate": 3.4698601501641517e-06, "loss": 0.4836, "step": 9040 }, { "epoch": 0.73, "grad_norm": 0.9496127903701702, "learning_rate": 3.4678666843501276e-06, "loss": 0.4915, "step": 9041 }, { "epoch": 0.73, "grad_norm": 0.8638931125885806, "learning_rate": 3.465873671207155e-06, "loss": 0.5133, "step": 9042 }, { "epoch": 0.73, "grad_norm": 0.8278711857326407, "learning_rate": 3.4638811108733383e-06, "loss": 0.4489, "step": 9043 }, { "epoch": 0.74, "grad_norm": 0.9039654110449227, "learning_rate": 3.4618890034867626e-06, "loss": 0.5066, "step": 9044 }, { "epoch": 0.74, "grad_norm": 0.9173666067331488, "learning_rate": 3.4598973491854804e-06, "loss": 0.473, "step": 9045 }, { "epoch": 0.74, "grad_norm": 1.0228975566013427, "learning_rate": 3.4579061481075137e-06, "loss": 0.5098, "step": 9046 }, { "epoch": 0.74, "grad_norm": 0.8442960933660839, "learning_rate": 3.455915400390841e-06, "loss": 0.4608, "step": 9047 }, { "epoch": 0.74, "grad_norm": 0.9276664713771727, "learning_rate": 3.4539251061734337e-06, "loss": 0.5207, "step": 9048 }, { "epoch": 0.74, "grad_norm": 0.8647425352503317, "learning_rate": 3.451935265593207e-06, "loss": 0.5113, "step": 9049 }, { "epoch": 0.74, "grad_norm": 0.9182509306051623, "learning_rate": 3.449945878788058e-06, "loss": 0.4752, "step": 9050 }, { "epoch": 0.74, "grad_norm": 0.9667703242017811, "learning_rate": 3.4479569458958494e-06, "loss": 0.5247, "step": 9051 }, { "epoch": 0.74, "grad_norm": 0.890490488057399, "learning_rate": 3.4459684670544157e-06, "loss": 0.4682, "step": 9052 }, { "epoch": 0.74, "grad_norm": 0.8894062584761423, "learning_rate": 3.4439804424015486e-06, "loss": 0.4623, "step": 9053 }, { "epoch": 0.74, "grad_norm": 1.1678266481326605, "learning_rate": 3.4419928720750274e-06, "loss": 0.5092, "step": 9054 }, { "epoch": 0.74, "grad_norm": 0.9180142808067063, "learning_rate": 3.44000575621258e-06, "loss": 0.4445, "step": 9055 }, { "epoch": 0.74, "grad_norm": 0.9747151736279898, "learning_rate": 3.4380190949519155e-06, "loss": 0.5326, "step": 9056 }, { "epoch": 0.74, "grad_norm": 0.8864171290008381, "learning_rate": 3.4360328884307058e-06, "loss": 0.4892, "step": 9057 }, { "epoch": 0.74, "grad_norm": 0.9903037454617845, "learning_rate": 3.4340471367865992e-06, "loss": 0.5742, "step": 9058 }, { "epoch": 0.74, "grad_norm": 0.8513222506288766, "learning_rate": 3.432061840157196e-06, "loss": 0.5226, "step": 9059 }, { "epoch": 0.74, "grad_norm": 1.0307240084812919, "learning_rate": 3.4300769986800863e-06, "loss": 0.5663, "step": 9060 }, { "epoch": 0.74, "grad_norm": 0.9968384356366111, "learning_rate": 3.4280926124928115e-06, "loss": 0.5288, "step": 9061 }, { "epoch": 0.74, "grad_norm": 0.9519071916218125, "learning_rate": 3.4261086817328882e-06, "loss": 0.4699, "step": 9062 }, { "epoch": 0.74, "grad_norm": 0.8450654754196741, "learning_rate": 3.424125206537803e-06, "loss": 0.4618, "step": 9063 }, { "epoch": 0.74, "grad_norm": 0.8698269817828128, "learning_rate": 3.422142187045011e-06, "loss": 0.4738, "step": 9064 }, { "epoch": 0.74, "grad_norm": 0.8777962455889311, "learning_rate": 3.4201596233919243e-06, "loss": 0.455, "step": 9065 }, { "epoch": 0.74, "grad_norm": 0.9974300937915439, "learning_rate": 3.418177515715947e-06, "loss": 0.6206, "step": 9066 }, { "epoch": 0.74, "grad_norm": 0.9838505746206851, "learning_rate": 3.416195864154426e-06, "loss": 0.525, "step": 9067 }, { "epoch": 0.74, "grad_norm": 1.0055224851762832, "learning_rate": 3.414214668844691e-06, "loss": 0.4833, "step": 9068 }, { "epoch": 0.74, "grad_norm": 1.012178258142545, "learning_rate": 3.4122339299240383e-06, "loss": 0.5122, "step": 9069 }, { "epoch": 0.74, "grad_norm": 0.9477903204487034, "learning_rate": 3.410253647529731e-06, "loss": 0.5254, "step": 9070 }, { "epoch": 0.74, "grad_norm": 0.9954868910750313, "learning_rate": 3.408273821799001e-06, "loss": 0.4825, "step": 9071 }, { "epoch": 0.74, "grad_norm": 0.9441065536533816, "learning_rate": 3.4062944528690512e-06, "loss": 0.4791, "step": 9072 }, { "epoch": 0.74, "grad_norm": 0.9837226393405486, "learning_rate": 3.4043155408770435e-06, "loss": 0.5217, "step": 9073 }, { "epoch": 0.74, "grad_norm": 0.883534359457053, "learning_rate": 3.4023370859601192e-06, "loss": 0.4569, "step": 9074 }, { "epoch": 0.74, "grad_norm": 0.8829348070197951, "learning_rate": 3.400359088255383e-06, "loss": 0.4665, "step": 9075 }, { "epoch": 0.74, "grad_norm": 0.9242326361724409, "learning_rate": 3.3983815478999073e-06, "loss": 0.4844, "step": 9076 }, { "epoch": 0.74, "grad_norm": 0.8751730630958149, "learning_rate": 3.396404465030735e-06, "loss": 0.5159, "step": 9077 }, { "epoch": 0.74, "grad_norm": 0.9524856623633947, "learning_rate": 3.3944278397848797e-06, "loss": 0.4986, "step": 9078 }, { "epoch": 0.74, "grad_norm": 0.9643769255032821, "learning_rate": 3.3924516722993115e-06, "loss": 0.5314, "step": 9079 }, { "epoch": 0.74, "grad_norm": 0.9979175564097665, "learning_rate": 3.3904759627109828e-06, "loss": 0.5332, "step": 9080 }, { "epoch": 0.74, "grad_norm": 0.9073140095491039, "learning_rate": 3.388500711156807e-06, "loss": 0.4793, "step": 9081 }, { "epoch": 0.74, "grad_norm": 0.8235618593206219, "learning_rate": 3.3865259177736663e-06, "loss": 0.4428, "step": 9082 }, { "epoch": 0.74, "grad_norm": 0.945033382039681, "learning_rate": 3.3845515826984143e-06, "loss": 0.576, "step": 9083 }, { "epoch": 0.74, "grad_norm": 0.8513977017652784, "learning_rate": 3.3825777060678734e-06, "loss": 0.4843, "step": 9084 }, { "epoch": 0.74, "grad_norm": 0.938115084213264, "learning_rate": 3.380604288018824e-06, "loss": 0.481, "step": 9085 }, { "epoch": 0.74, "grad_norm": 0.9257780529818783, "learning_rate": 3.3786313286880257e-06, "loss": 0.4521, "step": 9086 }, { "epoch": 0.74, "grad_norm": 0.9387396351436141, "learning_rate": 3.3766588282122037e-06, "loss": 0.458, "step": 9087 }, { "epoch": 0.74, "grad_norm": 0.9144270665316344, "learning_rate": 3.3746867867280496e-06, "loss": 0.5025, "step": 9088 }, { "epoch": 0.74, "grad_norm": 0.8722399311563757, "learning_rate": 3.3727152043722257e-06, "loss": 0.4524, "step": 9089 }, { "epoch": 0.74, "grad_norm": 0.955180665187194, "learning_rate": 3.3707440812813584e-06, "loss": 0.515, "step": 9090 }, { "epoch": 0.74, "grad_norm": 1.0856676436543693, "learning_rate": 3.3687734175920505e-06, "loss": 0.5452, "step": 9091 }, { "epoch": 0.74, "grad_norm": 0.9367234887166933, "learning_rate": 3.366803213440859e-06, "loss": 0.5332, "step": 9092 }, { "epoch": 0.74, "grad_norm": 0.9966112933019008, "learning_rate": 3.3648334689643214e-06, "loss": 0.4963, "step": 9093 }, { "epoch": 0.74, "grad_norm": 0.9522114454224588, "learning_rate": 3.3628641842989384e-06, "loss": 0.4583, "step": 9094 }, { "epoch": 0.74, "grad_norm": 0.8942989570626828, "learning_rate": 3.36089535958118e-06, "loss": 0.5099, "step": 9095 }, { "epoch": 0.74, "grad_norm": 0.924628658722338, "learning_rate": 3.3589269949474856e-06, "loss": 0.4925, "step": 9096 }, { "epoch": 0.74, "grad_norm": 0.890733017360491, "learning_rate": 3.356959090534262e-06, "loss": 0.4837, "step": 9097 }, { "epoch": 0.74, "grad_norm": 0.9854852437262702, "learning_rate": 3.3549916464778787e-06, "loss": 0.4994, "step": 9098 }, { "epoch": 0.74, "grad_norm": 0.9036658514233938, "learning_rate": 3.35302466291468e-06, "loss": 0.5012, "step": 9099 }, { "epoch": 0.74, "grad_norm": 0.9687654855435422, "learning_rate": 3.3510581399809762e-06, "loss": 0.4729, "step": 9100 }, { "epoch": 0.74, "grad_norm": 1.0067087474395098, "learning_rate": 3.3490920778130455e-06, "loss": 0.5084, "step": 9101 }, { "epoch": 0.74, "grad_norm": 0.9463734582922495, "learning_rate": 3.3471264765471346e-06, "loss": 0.5594, "step": 9102 }, { "epoch": 0.74, "grad_norm": 1.0202413779029968, "learning_rate": 3.3451613363194603e-06, "loss": 0.5247, "step": 9103 }, { "epoch": 0.74, "grad_norm": 0.8987810339759174, "learning_rate": 3.3431966572662e-06, "loss": 0.5141, "step": 9104 }, { "epoch": 0.74, "grad_norm": 0.8590651835433493, "learning_rate": 3.341232439523506e-06, "loss": 0.5129, "step": 9105 }, { "epoch": 0.74, "grad_norm": 0.9656965528739612, "learning_rate": 3.339268683227499e-06, "loss": 0.5474, "step": 9106 }, { "epoch": 0.74, "grad_norm": 0.8168953824209993, "learning_rate": 3.3373053885142636e-06, "loss": 0.4938, "step": 9107 }, { "epoch": 0.74, "grad_norm": 0.9425801989390152, "learning_rate": 3.335342555519855e-06, "loss": 0.5095, "step": 9108 }, { "epoch": 0.74, "grad_norm": 1.0167412583769833, "learning_rate": 3.3333801843802994e-06, "loss": 0.5314, "step": 9109 }, { "epoch": 0.74, "grad_norm": 0.8834281023977991, "learning_rate": 3.331418275231576e-06, "loss": 0.4723, "step": 9110 }, { "epoch": 0.74, "grad_norm": 0.9367658540260484, "learning_rate": 3.3294568282096586e-06, "loss": 0.4902, "step": 9111 }, { "epoch": 0.74, "grad_norm": 1.0125504424161789, "learning_rate": 3.3274958434504625e-06, "loss": 0.5003, "step": 9112 }, { "epoch": 0.74, "grad_norm": 0.8158336140834846, "learning_rate": 3.3255353210898866e-06, "loss": 0.4647, "step": 9113 }, { "epoch": 0.74, "grad_norm": 0.8216839414525001, "learning_rate": 3.3235752612637917e-06, "loss": 0.496, "step": 9114 }, { "epoch": 0.74, "grad_norm": 0.98806789784039, "learning_rate": 3.3216156641080134e-06, "loss": 0.4611, "step": 9115 }, { "epoch": 0.74, "grad_norm": 0.8943357113254146, "learning_rate": 3.319656529758339e-06, "loss": 0.5236, "step": 9116 }, { "epoch": 0.74, "grad_norm": 0.9066027966449727, "learning_rate": 3.317697858350548e-06, "loss": 0.4432, "step": 9117 }, { "epoch": 0.74, "grad_norm": 0.8431741448497101, "learning_rate": 3.3157396500203655e-06, "loss": 0.4948, "step": 9118 }, { "epoch": 0.74, "grad_norm": 0.9503603522033455, "learning_rate": 3.3137819049034957e-06, "loss": 0.4786, "step": 9119 }, { "epoch": 0.74, "grad_norm": 0.9381882710223372, "learning_rate": 3.31182462313561e-06, "loss": 0.5003, "step": 9120 }, { "epoch": 0.74, "grad_norm": 0.8472589492270928, "learning_rate": 3.309867804852348e-06, "loss": 0.4728, "step": 9121 }, { "epoch": 0.74, "grad_norm": 0.9965341501914862, "learning_rate": 3.3079114501893063e-06, "loss": 0.5548, "step": 9122 }, { "epoch": 0.74, "grad_norm": 0.9186851430217279, "learning_rate": 3.3059555592820726e-06, "loss": 0.4724, "step": 9123 }, { "epoch": 0.74, "grad_norm": 0.9803717640615625, "learning_rate": 3.3040001322661772e-06, "loss": 0.5643, "step": 9124 }, { "epoch": 0.74, "grad_norm": 1.011552371194042, "learning_rate": 3.3020451692771337e-06, "loss": 0.537, "step": 9125 }, { "epoch": 0.74, "grad_norm": 0.9319617645191561, "learning_rate": 3.3000906704504176e-06, "loss": 0.5305, "step": 9126 }, { "epoch": 0.74, "grad_norm": 0.98808305950421, "learning_rate": 3.2981366359214806e-06, "loss": 0.5001, "step": 9127 }, { "epoch": 0.74, "grad_norm": 1.028054694369749, "learning_rate": 3.296183065825722e-06, "loss": 0.5232, "step": 9128 }, { "epoch": 0.74, "grad_norm": 0.9378023811252546, "learning_rate": 3.294229960298537e-06, "loss": 0.5227, "step": 9129 }, { "epoch": 0.74, "grad_norm": 0.8934671402668811, "learning_rate": 3.2922773194752653e-06, "loss": 0.4626, "step": 9130 }, { "epoch": 0.74, "grad_norm": 0.8791588094117161, "learning_rate": 3.2903251434912265e-06, "loss": 0.4498, "step": 9131 }, { "epoch": 0.74, "grad_norm": 0.9675333837599392, "learning_rate": 3.288373432481703e-06, "loss": 0.5532, "step": 9132 }, { "epoch": 0.74, "grad_norm": 0.9734440759086094, "learning_rate": 3.28642218658195e-06, "loss": 0.4815, "step": 9133 }, { "epoch": 0.74, "grad_norm": 0.9726973591593845, "learning_rate": 3.2844714059271788e-06, "loss": 0.5354, "step": 9134 }, { "epoch": 0.74, "grad_norm": 0.8416426012871814, "learning_rate": 3.2825210906525885e-06, "loss": 0.4334, "step": 9135 }, { "epoch": 0.74, "grad_norm": 0.854242262963165, "learning_rate": 3.2805712408933223e-06, "loss": 0.428, "step": 9136 }, { "epoch": 0.74, "grad_norm": 0.9023628780098923, "learning_rate": 3.278621856784514e-06, "loss": 0.4703, "step": 9137 }, { "epoch": 0.74, "grad_norm": 0.8885723574629798, "learning_rate": 3.2766729384612473e-06, "loss": 0.4343, "step": 9138 }, { "epoch": 0.74, "grad_norm": 1.0417131985652142, "learning_rate": 3.2747244860585823e-06, "loss": 0.5197, "step": 9139 }, { "epoch": 0.74, "grad_norm": 0.8936134291252534, "learning_rate": 3.272776499711545e-06, "loss": 0.4929, "step": 9140 }, { "epoch": 0.74, "grad_norm": 1.0327830703736356, "learning_rate": 3.270828979555133e-06, "loss": 0.5942, "step": 9141 }, { "epoch": 0.74, "grad_norm": 0.950809911721351, "learning_rate": 3.2688819257242963e-06, "loss": 0.4976, "step": 9142 }, { "epoch": 0.74, "grad_norm": 0.9363799676811368, "learning_rate": 3.266935338353978e-06, "loss": 0.572, "step": 9143 }, { "epoch": 0.74, "grad_norm": 0.893436845428354, "learning_rate": 3.2649892175790667e-06, "loss": 0.4879, "step": 9144 }, { "epoch": 0.74, "grad_norm": 0.9416979646344061, "learning_rate": 3.2630435635344283e-06, "loss": 0.4463, "step": 9145 }, { "epoch": 0.74, "grad_norm": 0.9346917071558514, "learning_rate": 3.261098376354894e-06, "loss": 0.5248, "step": 9146 }, { "epoch": 0.74, "grad_norm": 0.9589850809495569, "learning_rate": 3.259153656175269e-06, "loss": 0.4937, "step": 9147 }, { "epoch": 0.74, "grad_norm": 0.8429765478620248, "learning_rate": 3.2572094031303103e-06, "loss": 0.4373, "step": 9148 }, { "epoch": 0.74, "grad_norm": 0.8736249174868056, "learning_rate": 3.255265617354766e-06, "loss": 0.4693, "step": 9149 }, { "epoch": 0.74, "grad_norm": 0.8879306103161134, "learning_rate": 3.253322298983327e-06, "loss": 0.4574, "step": 9150 }, { "epoch": 0.74, "grad_norm": 0.9695827910172448, "learning_rate": 3.25137944815067e-06, "loss": 0.5195, "step": 9151 }, { "epoch": 0.74, "grad_norm": 0.8645467946324216, "learning_rate": 3.2494370649914296e-06, "loss": 0.4675, "step": 9152 }, { "epoch": 0.74, "grad_norm": 1.0658706154975952, "learning_rate": 3.2474951496402175e-06, "loss": 0.567, "step": 9153 }, { "epoch": 0.74, "grad_norm": 0.8517465790920219, "learning_rate": 3.245553702231595e-06, "loss": 0.4684, "step": 9154 }, { "epoch": 0.74, "grad_norm": 1.0026469376848923, "learning_rate": 3.243612722900117e-06, "loss": 0.4907, "step": 9155 }, { "epoch": 0.74, "grad_norm": 0.9615501023379072, "learning_rate": 3.2416722117802803e-06, "loss": 0.5011, "step": 9156 }, { "epoch": 0.74, "grad_norm": 0.8908905189634679, "learning_rate": 3.2397321690065643e-06, "loss": 0.4729, "step": 9157 }, { "epoch": 0.74, "grad_norm": 0.9676487859660207, "learning_rate": 3.2377925947134137e-06, "loss": 0.4864, "step": 9158 }, { "epoch": 0.74, "grad_norm": 0.9666007142870987, "learning_rate": 3.235853489035241e-06, "loss": 0.5508, "step": 9159 }, { "epoch": 0.74, "grad_norm": 1.0605811965729126, "learning_rate": 3.2339148521064146e-06, "loss": 0.5257, "step": 9160 }, { "epoch": 0.74, "grad_norm": 1.0201172769043783, "learning_rate": 3.2319766840612954e-06, "loss": 0.4734, "step": 9161 }, { "epoch": 0.74, "grad_norm": 0.8410082186099735, "learning_rate": 3.230038985034184e-06, "loss": 0.4574, "step": 9162 }, { "epoch": 0.74, "grad_norm": 1.0832135483157863, "learning_rate": 3.2281017551593665e-06, "loss": 0.4434, "step": 9163 }, { "epoch": 0.74, "grad_norm": 1.0581813148791377, "learning_rate": 3.2261649945710916e-06, "loss": 0.5907, "step": 9164 }, { "epoch": 0.74, "grad_norm": 0.908821683888088, "learning_rate": 3.2242287034035756e-06, "loss": 0.4716, "step": 9165 }, { "epoch": 0.74, "grad_norm": 0.8791225901790715, "learning_rate": 3.222292881790996e-06, "loss": 0.4546, "step": 9166 }, { "epoch": 0.75, "grad_norm": 0.9342783962101158, "learning_rate": 3.2203575298675126e-06, "loss": 0.5162, "step": 9167 }, { "epoch": 0.75, "grad_norm": 0.9890614420190501, "learning_rate": 3.2184226477672366e-06, "loss": 0.5029, "step": 9168 }, { "epoch": 0.75, "grad_norm": 0.9082676055397952, "learning_rate": 3.2164882356242555e-06, "loss": 0.4896, "step": 9169 }, { "epoch": 0.75, "grad_norm": 0.9268421651069393, "learning_rate": 3.2145542935726224e-06, "loss": 0.4767, "step": 9170 }, { "epoch": 0.75, "grad_norm": 0.8961329360018759, "learning_rate": 3.212620821746362e-06, "loss": 0.4751, "step": 9171 }, { "epoch": 0.75, "grad_norm": 0.8984912218661776, "learning_rate": 3.2106878202794513e-06, "loss": 0.5043, "step": 9172 }, { "epoch": 0.75, "grad_norm": 0.9263734336016696, "learning_rate": 3.2087552893058594e-06, "loss": 0.4616, "step": 9173 }, { "epoch": 0.75, "grad_norm": 0.992989819188374, "learning_rate": 3.206823228959498e-06, "loss": 0.5415, "step": 9174 }, { "epoch": 0.75, "grad_norm": 0.96993392568458, "learning_rate": 3.2048916393742622e-06, "loss": 0.5095, "step": 9175 }, { "epoch": 0.75, "grad_norm": 0.9049184006651029, "learning_rate": 3.2029605206840088e-06, "loss": 0.4609, "step": 9176 }, { "epoch": 0.75, "grad_norm": 0.8877311555149989, "learning_rate": 3.201029873022565e-06, "loss": 0.4444, "step": 9177 }, { "epoch": 0.75, "grad_norm": 0.8661027546548056, "learning_rate": 3.1990996965237143e-06, "loss": 0.408, "step": 9178 }, { "epoch": 0.75, "grad_norm": 0.9130267941869538, "learning_rate": 3.1971699913212272e-06, "loss": 0.4907, "step": 9179 }, { "epoch": 0.75, "grad_norm": 0.9096717689460737, "learning_rate": 3.1952407575488243e-06, "loss": 0.5102, "step": 9180 }, { "epoch": 0.75, "grad_norm": 0.8384692208379614, "learning_rate": 3.1933119953402e-06, "loss": 0.4654, "step": 9181 }, { "epoch": 0.75, "grad_norm": 0.916298509249038, "learning_rate": 3.1913837048290176e-06, "loss": 0.4712, "step": 9182 }, { "epoch": 0.75, "grad_norm": 1.0794714537379706, "learning_rate": 3.189455886148908e-06, "loss": 0.509, "step": 9183 }, { "epoch": 0.75, "grad_norm": 0.9145557666072167, "learning_rate": 3.1875285394334575e-06, "loss": 0.5139, "step": 9184 }, { "epoch": 0.75, "grad_norm": 0.9588017745471442, "learning_rate": 3.1856016648162435e-06, "loss": 0.511, "step": 9185 }, { "epoch": 0.75, "grad_norm": 0.93977533850814, "learning_rate": 3.1836752624307878e-06, "loss": 0.4925, "step": 9186 }, { "epoch": 0.75, "grad_norm": 0.9905180889851486, "learning_rate": 3.1817493324105884e-06, "loss": 0.5343, "step": 9187 }, { "epoch": 0.75, "grad_norm": 0.9626382947667614, "learning_rate": 3.179823874889113e-06, "loss": 0.5192, "step": 9188 }, { "epoch": 0.75, "grad_norm": 1.008409226761721, "learning_rate": 3.1778988899997977e-06, "loss": 0.576, "step": 9189 }, { "epoch": 0.75, "grad_norm": 0.9254016781809032, "learning_rate": 3.175974377876031e-06, "loss": 0.5201, "step": 9190 }, { "epoch": 0.75, "grad_norm": 0.8827104917802043, "learning_rate": 3.1740503386511933e-06, "loss": 0.5004, "step": 9191 }, { "epoch": 0.75, "grad_norm": 0.9195045711783788, "learning_rate": 3.17212677245861e-06, "loss": 0.4723, "step": 9192 }, { "epoch": 0.75, "grad_norm": 0.9178177294623786, "learning_rate": 3.1702036794315837e-06, "loss": 0.52, "step": 9193 }, { "epoch": 0.75, "grad_norm": 0.930776420999054, "learning_rate": 3.1682810597033853e-06, "loss": 0.5447, "step": 9194 }, { "epoch": 0.75, "grad_norm": 0.8603670066733564, "learning_rate": 3.1663589134072537e-06, "loss": 0.539, "step": 9195 }, { "epoch": 0.75, "grad_norm": 0.9266579862289025, "learning_rate": 3.16443724067638e-06, "loss": 0.4666, "step": 9196 }, { "epoch": 0.75, "grad_norm": 0.8913826793152476, "learning_rate": 3.1625160416439503e-06, "loss": 0.4906, "step": 9197 }, { "epoch": 0.75, "grad_norm": 0.8448070499801077, "learning_rate": 3.1605953164430904e-06, "loss": 0.4222, "step": 9198 }, { "epoch": 0.75, "grad_norm": 0.8777398534519062, "learning_rate": 3.1586750652069077e-06, "loss": 0.5065, "step": 9199 }, { "epoch": 0.75, "grad_norm": 0.8780703673114545, "learning_rate": 3.156755288068475e-06, "loss": 0.494, "step": 9200 }, { "epoch": 0.75, "grad_norm": 1.002260390936769, "learning_rate": 3.1548359851608344e-06, "loss": 0.5149, "step": 9201 }, { "epoch": 0.75, "grad_norm": 0.9969350471910058, "learning_rate": 3.1529171566169825e-06, "loss": 0.5127, "step": 9202 }, { "epoch": 0.75, "grad_norm": 0.8782140061192855, "learning_rate": 3.1509988025699046e-06, "loss": 0.4238, "step": 9203 }, { "epoch": 0.75, "grad_norm": 0.9204540185363628, "learning_rate": 3.14908092315253e-06, "loss": 0.5102, "step": 9204 }, { "epoch": 0.75, "grad_norm": 0.8708557332916683, "learning_rate": 3.147163518497772e-06, "loss": 0.4824, "step": 9205 }, { "epoch": 0.75, "grad_norm": 0.9061520741038792, "learning_rate": 3.145246588738503e-06, "loss": 0.4841, "step": 9206 }, { "epoch": 0.75, "grad_norm": 0.9419277444337851, "learning_rate": 3.1433301340075694e-06, "loss": 0.4981, "step": 9207 }, { "epoch": 0.75, "grad_norm": 1.0182335668415685, "learning_rate": 3.1414141544377686e-06, "loss": 0.5721, "step": 9208 }, { "epoch": 0.75, "grad_norm": 0.9456494049006637, "learning_rate": 3.1394986501618897e-06, "loss": 0.4902, "step": 9209 }, { "epoch": 0.75, "grad_norm": 0.9234880089818398, "learning_rate": 3.1375836213126653e-06, "loss": 0.5534, "step": 9210 }, { "epoch": 0.75, "grad_norm": 0.8922445520981266, "learning_rate": 3.135669068022811e-06, "loss": 0.4862, "step": 9211 }, { "epoch": 0.75, "grad_norm": 1.0138485298431035, "learning_rate": 3.1337549904249996e-06, "loss": 0.5239, "step": 9212 }, { "epoch": 0.75, "grad_norm": 0.8779972974608878, "learning_rate": 3.1318413886518804e-06, "loss": 0.4709, "step": 9213 }, { "epoch": 0.75, "grad_norm": 0.9511817289467243, "learning_rate": 3.129928262836055e-06, "loss": 0.5514, "step": 9214 }, { "epoch": 0.75, "grad_norm": 0.9160031996320639, "learning_rate": 3.1280156131101136e-06, "loss": 0.4835, "step": 9215 }, { "epoch": 0.75, "grad_norm": 0.9136188010687992, "learning_rate": 3.1261034396065924e-06, "loss": 0.4356, "step": 9216 }, { "epoch": 0.75, "grad_norm": 0.9672584722550528, "learning_rate": 3.1241917424580047e-06, "loss": 0.5193, "step": 9217 }, { "epoch": 0.75, "grad_norm": 0.8269303323982298, "learning_rate": 3.122280521796831e-06, "loss": 0.4713, "step": 9218 }, { "epoch": 0.75, "grad_norm": 0.9173435143365514, "learning_rate": 3.1203697777555163e-06, "loss": 0.5053, "step": 9219 }, { "epoch": 0.75, "grad_norm": 0.8772695738063876, "learning_rate": 3.1184595104664726e-06, "loss": 0.5159, "step": 9220 }, { "epoch": 0.75, "grad_norm": 0.8953996339227956, "learning_rate": 3.1165497200620863e-06, "loss": 0.4851, "step": 9221 }, { "epoch": 0.75, "grad_norm": 0.9178968916849854, "learning_rate": 3.114640406674694e-06, "loss": 0.4623, "step": 9222 }, { "epoch": 0.75, "grad_norm": 0.9705439786022239, "learning_rate": 3.1127315704366144e-06, "loss": 0.4735, "step": 9223 }, { "epoch": 0.75, "grad_norm": 1.0537737416285637, "learning_rate": 3.1108232114801283e-06, "loss": 0.5305, "step": 9224 }, { "epoch": 0.75, "grad_norm": 1.006997561164009, "learning_rate": 3.108915329937483e-06, "loss": 0.4649, "step": 9225 }, { "epoch": 0.75, "grad_norm": 0.9484263275516899, "learning_rate": 3.1070079259408934e-06, "loss": 0.5857, "step": 9226 }, { "epoch": 0.75, "grad_norm": 0.8931141053668399, "learning_rate": 3.1051009996225434e-06, "loss": 0.433, "step": 9227 }, { "epoch": 0.75, "grad_norm": 0.9955212573195641, "learning_rate": 3.1031945511145744e-06, "loss": 0.4869, "step": 9228 }, { "epoch": 0.75, "grad_norm": 0.9445210297345634, "learning_rate": 3.101288580549107e-06, "loss": 0.5201, "step": 9229 }, { "epoch": 0.75, "grad_norm": 0.8845297197394515, "learning_rate": 3.09938308805822e-06, "loss": 0.4468, "step": 9230 }, { "epoch": 0.75, "grad_norm": 0.8677616975176144, "learning_rate": 3.0974780737739653e-06, "loss": 0.4696, "step": 9231 }, { "epoch": 0.75, "grad_norm": 0.8440699624266844, "learning_rate": 3.095573537828357e-06, "loss": 0.4719, "step": 9232 }, { "epoch": 0.75, "grad_norm": 1.0118764043381994, "learning_rate": 3.0936694803533817e-06, "loss": 0.5053, "step": 9233 }, { "epoch": 0.75, "grad_norm": 1.010118539516588, "learning_rate": 3.091765901480983e-06, "loss": 0.5169, "step": 9234 }, { "epoch": 0.75, "grad_norm": 0.8893225432422138, "learning_rate": 3.0898628013430787e-06, "loss": 0.4511, "step": 9235 }, { "epoch": 0.75, "grad_norm": 0.9298359284103193, "learning_rate": 3.087960180071553e-06, "loss": 0.5121, "step": 9236 }, { "epoch": 0.75, "grad_norm": 0.899101273379794, "learning_rate": 3.0860580377982563e-06, "loss": 0.5377, "step": 9237 }, { "epoch": 0.75, "grad_norm": 0.9002401268153558, "learning_rate": 3.084156374655005e-06, "loss": 0.4642, "step": 9238 }, { "epoch": 0.75, "grad_norm": 0.9113145964585156, "learning_rate": 3.0822551907735833e-06, "loss": 0.5161, "step": 9239 }, { "epoch": 0.75, "grad_norm": 0.8565877087319691, "learning_rate": 3.080354486285743e-06, "loss": 0.4561, "step": 9240 }, { "epoch": 0.75, "grad_norm": 0.9296165703485347, "learning_rate": 3.078454261323196e-06, "loss": 0.4638, "step": 9241 }, { "epoch": 0.75, "grad_norm": 0.9871738649945768, "learning_rate": 3.076554516017629e-06, "loss": 0.5422, "step": 9242 }, { "epoch": 0.75, "grad_norm": 0.9845398816258983, "learning_rate": 3.074655250500693e-06, "loss": 0.5418, "step": 9243 }, { "epoch": 0.75, "grad_norm": 0.8907187812517292, "learning_rate": 3.0727564649040066e-06, "loss": 0.4575, "step": 9244 }, { "epoch": 0.75, "grad_norm": 0.9005922717320375, "learning_rate": 3.0708581593591513e-06, "loss": 0.4844, "step": 9245 }, { "epoch": 0.75, "grad_norm": 0.8811211339067954, "learning_rate": 3.068960333997684e-06, "loss": 0.4851, "step": 9246 }, { "epoch": 0.75, "grad_norm": 0.8297490979293182, "learning_rate": 3.0670629889511128e-06, "loss": 0.4663, "step": 9247 }, { "epoch": 0.75, "grad_norm": 0.9965530095100212, "learning_rate": 3.0651661243509277e-06, "loss": 0.465, "step": 9248 }, { "epoch": 0.75, "grad_norm": 0.9304897978087547, "learning_rate": 3.063269740328579e-06, "loss": 0.5054, "step": 9249 }, { "epoch": 0.75, "grad_norm": 1.247859611749849, "learning_rate": 3.0613738370154853e-06, "loss": 0.5959, "step": 9250 }, { "epoch": 0.75, "grad_norm": 0.8559408643557311, "learning_rate": 3.059478414543029e-06, "loss": 0.4542, "step": 9251 }, { "epoch": 0.75, "grad_norm": 0.9487040433145538, "learning_rate": 3.0575834730425658e-06, "loss": 0.443, "step": 9252 }, { "epoch": 0.75, "grad_norm": 1.0418904384512588, "learning_rate": 3.0556890126454075e-06, "loss": 0.5321, "step": 9253 }, { "epoch": 0.75, "grad_norm": 0.9142524061866827, "learning_rate": 3.0537950334828405e-06, "loss": 0.5283, "step": 9254 }, { "epoch": 0.75, "grad_norm": 0.9849848109141258, "learning_rate": 3.051901535686116e-06, "loss": 0.5214, "step": 9255 }, { "epoch": 0.75, "grad_norm": 0.9474074774382758, "learning_rate": 3.0500085193864525e-06, "loss": 0.4811, "step": 9256 }, { "epoch": 0.75, "grad_norm": 0.9814885554674336, "learning_rate": 3.0481159847150343e-06, "loss": 0.5196, "step": 9257 }, { "epoch": 0.75, "grad_norm": 0.9491077327581923, "learning_rate": 3.046223931803015e-06, "loss": 0.4665, "step": 9258 }, { "epoch": 0.75, "grad_norm": 0.9047139713494868, "learning_rate": 3.044332360781502e-06, "loss": 0.4961, "step": 9259 }, { "epoch": 0.75, "grad_norm": 0.9322500438401069, "learning_rate": 3.0424412717815943e-06, "loss": 0.4989, "step": 9260 }, { "epoch": 0.75, "grad_norm": 1.048631041096126, "learning_rate": 3.040550664934332e-06, "loss": 0.5397, "step": 9261 }, { "epoch": 0.75, "grad_norm": 0.9549270032708144, "learning_rate": 3.0386605403707347e-06, "loss": 0.5324, "step": 9262 }, { "epoch": 0.75, "grad_norm": 0.9334118783548825, "learning_rate": 3.036770898221787e-06, "loss": 0.4805, "step": 9263 }, { "epoch": 0.75, "grad_norm": 0.8760160616514998, "learning_rate": 3.0348817386184403e-06, "loss": 0.523, "step": 9264 }, { "epoch": 0.75, "grad_norm": 0.8602677591239977, "learning_rate": 3.0329930616916114e-06, "loss": 0.4556, "step": 9265 }, { "epoch": 0.75, "grad_norm": 0.942749923235785, "learning_rate": 3.0311048675721865e-06, "loss": 0.4959, "step": 9266 }, { "epoch": 0.75, "grad_norm": 0.967553145530537, "learning_rate": 3.02921715639101e-06, "loss": 0.5025, "step": 9267 }, { "epoch": 0.75, "grad_norm": 0.875163075806297, "learning_rate": 3.0273299282789004e-06, "loss": 0.4791, "step": 9268 }, { "epoch": 0.75, "grad_norm": 0.9610020745516507, "learning_rate": 3.025443183366643e-06, "loss": 0.5296, "step": 9269 }, { "epoch": 0.75, "grad_norm": 1.0015074057092335, "learning_rate": 3.023556921784987e-06, "loss": 0.5247, "step": 9270 }, { "epoch": 0.75, "grad_norm": 0.93960616711754, "learning_rate": 3.021671143664647e-06, "loss": 0.481, "step": 9271 }, { "epoch": 0.75, "grad_norm": 0.9447586684544488, "learning_rate": 3.019785849136311e-06, "loss": 0.5358, "step": 9272 }, { "epoch": 0.75, "grad_norm": 0.9784661202493571, "learning_rate": 3.0179010383306208e-06, "loss": 0.4865, "step": 9273 }, { "epoch": 0.75, "grad_norm": 0.9071899625936168, "learning_rate": 3.0160167113781945e-06, "loss": 0.5029, "step": 9274 }, { "epoch": 0.75, "grad_norm": 0.839349146751607, "learning_rate": 3.014132868409617e-06, "loss": 0.5041, "step": 9275 }, { "epoch": 0.75, "grad_norm": 0.8927128357241134, "learning_rate": 3.012249509555435e-06, "loss": 0.4717, "step": 9276 }, { "epoch": 0.75, "grad_norm": 0.9196859640341792, "learning_rate": 3.0103666349461624e-06, "loss": 0.5086, "step": 9277 }, { "epoch": 0.75, "grad_norm": 1.0067864835070655, "learning_rate": 3.008484244712286e-06, "loss": 0.4523, "step": 9278 }, { "epoch": 0.75, "grad_norm": 0.933143092486881, "learning_rate": 3.0066023389842446e-06, "loss": 0.482, "step": 9279 }, { "epoch": 0.75, "grad_norm": 0.9386096596080008, "learning_rate": 3.004720917892464e-06, "loss": 0.4981, "step": 9280 }, { "epoch": 0.75, "grad_norm": 0.9007385757794655, "learning_rate": 3.0028399815673147e-06, "loss": 0.4798, "step": 9281 }, { "epoch": 0.75, "grad_norm": 1.080130380478513, "learning_rate": 3.0009595301391494e-06, "loss": 0.5987, "step": 9282 }, { "epoch": 0.75, "grad_norm": 1.0126379378704609, "learning_rate": 2.999079563738281e-06, "loss": 0.494, "step": 9283 }, { "epoch": 0.75, "grad_norm": 0.9617860043838014, "learning_rate": 2.9972000824949908e-06, "loss": 0.5165, "step": 9284 }, { "epoch": 0.75, "grad_norm": 0.8559597205644912, "learning_rate": 2.9953210865395176e-06, "loss": 0.4558, "step": 9285 }, { "epoch": 0.75, "grad_norm": 0.8985111470687281, "learning_rate": 2.9934425760020857e-06, "loss": 0.4772, "step": 9286 }, { "epoch": 0.75, "grad_norm": 0.9475458320896335, "learning_rate": 2.9915645510128666e-06, "loss": 0.454, "step": 9287 }, { "epoch": 0.75, "grad_norm": 0.935781669540675, "learning_rate": 2.9896870117020073e-06, "loss": 0.4811, "step": 9288 }, { "epoch": 0.75, "grad_norm": 0.9664864293034208, "learning_rate": 2.987809958199619e-06, "loss": 0.577, "step": 9289 }, { "epoch": 0.76, "grad_norm": 0.8836746825941467, "learning_rate": 2.9859333906357845e-06, "loss": 0.5378, "step": 9290 }, { "epoch": 0.76, "grad_norm": 0.9582144787011051, "learning_rate": 2.984057309140539e-06, "loss": 0.478, "step": 9291 }, { "epoch": 0.76, "grad_norm": 0.8878155885088148, "learning_rate": 2.9821817138439036e-06, "loss": 0.5188, "step": 9292 }, { "epoch": 0.76, "grad_norm": 0.8990987481542069, "learning_rate": 2.980306604875849e-06, "loss": 0.4875, "step": 9293 }, { "epoch": 0.76, "grad_norm": 0.927337620844019, "learning_rate": 2.9784319823663188e-06, "loss": 0.5161, "step": 9294 }, { "epoch": 0.76, "grad_norm": 0.8955209176918927, "learning_rate": 2.976557846445225e-06, "loss": 0.5026, "step": 9295 }, { "epoch": 0.76, "grad_norm": 0.9350116459240718, "learning_rate": 2.9746841972424456e-06, "loss": 0.5127, "step": 9296 }, { "epoch": 0.76, "grad_norm": 0.9022083479959, "learning_rate": 2.9728110348878135e-06, "loss": 0.4831, "step": 9297 }, { "epoch": 0.76, "grad_norm": 0.8905880164735378, "learning_rate": 2.9709383595111506e-06, "loss": 0.4788, "step": 9298 }, { "epoch": 0.76, "grad_norm": 0.9840888834902731, "learning_rate": 2.969066171242221e-06, "loss": 0.5257, "step": 9299 }, { "epoch": 0.76, "grad_norm": 0.8482427926265703, "learning_rate": 2.967194470210769e-06, "loss": 0.5342, "step": 9300 }, { "epoch": 0.76, "grad_norm": 0.8943824965526529, "learning_rate": 2.9653232565465017e-06, "loss": 0.4111, "step": 9301 }, { "epoch": 0.76, "grad_norm": 0.8255995194547043, "learning_rate": 2.9634525303790973e-06, "loss": 0.4847, "step": 9302 }, { "epoch": 0.76, "grad_norm": 0.9574954648262838, "learning_rate": 2.9615822918381844e-06, "loss": 0.5338, "step": 9303 }, { "epoch": 0.76, "grad_norm": 0.9660797147778306, "learning_rate": 2.959712541053381e-06, "loss": 0.4644, "step": 9304 }, { "epoch": 0.76, "grad_norm": 0.9252816124540626, "learning_rate": 2.9578432781542523e-06, "loss": 0.5135, "step": 9305 }, { "epoch": 0.76, "grad_norm": 0.9478822301015203, "learning_rate": 2.955974503270337e-06, "loss": 0.4625, "step": 9306 }, { "epoch": 0.76, "grad_norm": 0.962311200737902, "learning_rate": 2.954106216531141e-06, "loss": 0.5013, "step": 9307 }, { "epoch": 0.76, "grad_norm": 0.838888152056901, "learning_rate": 2.952238418066137e-06, "loss": 0.4996, "step": 9308 }, { "epoch": 0.76, "grad_norm": 0.98241779392544, "learning_rate": 2.9503711080047535e-06, "loss": 0.4344, "step": 9309 }, { "epoch": 0.76, "grad_norm": 1.0205651361476762, "learning_rate": 2.9485042864764047e-06, "loss": 0.5232, "step": 9310 }, { "epoch": 0.76, "grad_norm": 0.9279477006987505, "learning_rate": 2.9466379536104518e-06, "loss": 0.4812, "step": 9311 }, { "epoch": 0.76, "grad_norm": 0.8593933307119445, "learning_rate": 2.9447721095362325e-06, "loss": 0.4849, "step": 9312 }, { "epoch": 0.76, "grad_norm": 0.9690665581476634, "learning_rate": 2.942906754383048e-06, "loss": 0.4679, "step": 9313 }, { "epoch": 0.76, "grad_norm": 0.8951759511066802, "learning_rate": 2.9410418882801682e-06, "loss": 0.5178, "step": 9314 }, { "epoch": 0.76, "grad_norm": 0.9530573012082669, "learning_rate": 2.939177511356819e-06, "loss": 0.5129, "step": 9315 }, { "epoch": 0.76, "grad_norm": 1.0308139688157876, "learning_rate": 2.9373136237422107e-06, "loss": 0.5144, "step": 9316 }, { "epoch": 0.76, "grad_norm": 0.9297141613759037, "learning_rate": 2.9354502255655002e-06, "loss": 0.5619, "step": 9317 }, { "epoch": 0.76, "grad_norm": 0.8485557605262156, "learning_rate": 2.9335873169558236e-06, "loss": 0.4499, "step": 9318 }, { "epoch": 0.76, "grad_norm": 0.9753919721602036, "learning_rate": 2.9317248980422785e-06, "loss": 0.5164, "step": 9319 }, { "epoch": 0.76, "grad_norm": 0.9329553550167934, "learning_rate": 2.9298629689539315e-06, "loss": 0.4638, "step": 9320 }, { "epoch": 0.76, "grad_norm": 0.9864133114751812, "learning_rate": 2.9280015298198026e-06, "loss": 0.4754, "step": 9321 }, { "epoch": 0.76, "grad_norm": 0.9716573961646281, "learning_rate": 2.9261405807689014e-06, "loss": 0.5036, "step": 9322 }, { "epoch": 0.76, "grad_norm": 0.9333844851764159, "learning_rate": 2.9242801219301797e-06, "loss": 0.4928, "step": 9323 }, { "epoch": 0.76, "grad_norm": 0.8484772389666334, "learning_rate": 2.9224201534325703e-06, "loss": 0.4605, "step": 9324 }, { "epoch": 0.76, "grad_norm": 0.887435635454266, "learning_rate": 2.9205606754049667e-06, "loss": 0.4935, "step": 9325 }, { "epoch": 0.76, "grad_norm": 0.9015840885486097, "learning_rate": 2.918701687976231e-06, "loss": 0.4776, "step": 9326 }, { "epoch": 0.76, "grad_norm": 0.9225799390724025, "learning_rate": 2.9168431912751805e-06, "loss": 0.489, "step": 9327 }, { "epoch": 0.76, "grad_norm": 0.9216293988583227, "learning_rate": 2.914985185430621e-06, "loss": 0.4783, "step": 9328 }, { "epoch": 0.76, "grad_norm": 0.9834546685153829, "learning_rate": 2.9131276705713008e-06, "loss": 0.5459, "step": 9329 }, { "epoch": 0.76, "grad_norm": 0.9503774917454169, "learning_rate": 2.9112706468259478e-06, "loss": 0.5112, "step": 9330 }, { "epoch": 0.76, "grad_norm": 0.8558042581003334, "learning_rate": 2.90941411432325e-06, "loss": 0.4642, "step": 9331 }, { "epoch": 0.76, "grad_norm": 0.8959538771990759, "learning_rate": 2.9075580731918684e-06, "loss": 0.4843, "step": 9332 }, { "epoch": 0.76, "grad_norm": 0.9834208135406659, "learning_rate": 2.905702523560415e-06, "loss": 0.4608, "step": 9333 }, { "epoch": 0.76, "grad_norm": 0.926465065803759, "learning_rate": 2.90384746555749e-06, "loss": 0.4842, "step": 9334 }, { "epoch": 0.76, "grad_norm": 0.8753341930421762, "learning_rate": 2.9019928993116388e-06, "loss": 0.4617, "step": 9335 }, { "epoch": 0.76, "grad_norm": 0.87421173569085, "learning_rate": 2.900138824951383e-06, "loss": 0.5181, "step": 9336 }, { "epoch": 0.76, "grad_norm": 0.9219804198644024, "learning_rate": 2.89828524260521e-06, "loss": 0.4836, "step": 9337 }, { "epoch": 0.76, "grad_norm": 0.9697470723187763, "learning_rate": 2.8964321524015725e-06, "loss": 0.5239, "step": 9338 }, { "epoch": 0.76, "grad_norm": 0.9569066941173024, "learning_rate": 2.8945795544688814e-06, "loss": 0.544, "step": 9339 }, { "epoch": 0.76, "grad_norm": 0.982711465105853, "learning_rate": 2.8927274489355296e-06, "loss": 0.5016, "step": 9340 }, { "epoch": 0.76, "grad_norm": 0.9280691158631974, "learning_rate": 2.890875835929858e-06, "loss": 0.5355, "step": 9341 }, { "epoch": 0.76, "grad_norm": 0.9580433080173101, "learning_rate": 2.8890247155801864e-06, "loss": 0.5485, "step": 9342 }, { "epoch": 0.76, "grad_norm": 0.9684247483206608, "learning_rate": 2.8871740880147935e-06, "loss": 0.5603, "step": 9343 }, { "epoch": 0.76, "grad_norm": 0.8835115268579586, "learning_rate": 2.8853239533619314e-06, "loss": 0.5186, "step": 9344 }, { "epoch": 0.76, "grad_norm": 1.2424654229776397, "learning_rate": 2.883474311749802e-06, "loss": 0.5008, "step": 9345 }, { "epoch": 0.76, "grad_norm": 0.842275429217379, "learning_rate": 2.8816251633065963e-06, "loss": 0.4049, "step": 9346 }, { "epoch": 0.76, "grad_norm": 0.9070179199220347, "learning_rate": 2.87977650816045e-06, "loss": 0.5289, "step": 9347 }, { "epoch": 0.76, "grad_norm": 0.851064073144121, "learning_rate": 2.877928346439476e-06, "loss": 0.5164, "step": 9348 }, { "epoch": 0.76, "grad_norm": 0.9271901856488475, "learning_rate": 2.87608067827175e-06, "loss": 0.4729, "step": 9349 }, { "epoch": 0.76, "grad_norm": 0.9421025894776374, "learning_rate": 2.8742335037853173e-06, "loss": 0.4991, "step": 9350 }, { "epoch": 0.76, "grad_norm": 0.9507336913105746, "learning_rate": 2.8723868231081762e-06, "loss": 0.4617, "step": 9351 }, { "epoch": 0.76, "grad_norm": 0.8159297374452942, "learning_rate": 2.870540636368312e-06, "loss": 0.4482, "step": 9352 }, { "epoch": 0.76, "grad_norm": 1.0030324921252392, "learning_rate": 2.868694943693655e-06, "loss": 0.4944, "step": 9353 }, { "epoch": 0.76, "grad_norm": 0.9596171869334853, "learning_rate": 2.8668497452121137e-06, "loss": 0.5073, "step": 9354 }, { "epoch": 0.76, "grad_norm": 0.9262832524325116, "learning_rate": 2.8650050410515573e-06, "loss": 0.4648, "step": 9355 }, { "epoch": 0.76, "grad_norm": 0.9137971459810631, "learning_rate": 2.8631608313398252e-06, "loss": 0.4643, "step": 9356 }, { "epoch": 0.76, "grad_norm": 1.0017381152896594, "learning_rate": 2.8613171162047116e-06, "loss": 0.5519, "step": 9357 }, { "epoch": 0.76, "grad_norm": 1.0135694589387556, "learning_rate": 2.8594738957739964e-06, "loss": 0.4904, "step": 9358 }, { "epoch": 0.76, "grad_norm": 0.9381392192800052, "learning_rate": 2.8576311701754033e-06, "loss": 0.4843, "step": 9359 }, { "epoch": 0.76, "grad_norm": 0.9194632348387407, "learning_rate": 2.8557889395366344e-06, "loss": 0.4958, "step": 9360 }, { "epoch": 0.76, "grad_norm": 0.9099550661414307, "learning_rate": 2.8539472039853557e-06, "loss": 0.4452, "step": 9361 }, { "epoch": 0.76, "grad_norm": 0.9598811542751433, "learning_rate": 2.8521059636492e-06, "loss": 0.5969, "step": 9362 }, { "epoch": 0.76, "grad_norm": 0.9889306022941791, "learning_rate": 2.8502652186557546e-06, "loss": 0.5026, "step": 9363 }, { "epoch": 0.76, "grad_norm": 0.9599089363640353, "learning_rate": 2.8484249691325936e-06, "loss": 0.5357, "step": 9364 }, { "epoch": 0.76, "grad_norm": 0.8071106061246606, "learning_rate": 2.846585215207236e-06, "loss": 0.4196, "step": 9365 }, { "epoch": 0.76, "grad_norm": 0.8789960512738482, "learning_rate": 2.844745957007178e-06, "loss": 0.4865, "step": 9366 }, { "epoch": 0.76, "grad_norm": 1.004915994614177, "learning_rate": 2.8429071946598784e-06, "loss": 0.4525, "step": 9367 }, { "epoch": 0.76, "grad_norm": 0.949052188692021, "learning_rate": 2.841068928292762e-06, "loss": 0.515, "step": 9368 }, { "epoch": 0.76, "grad_norm": 0.8423956562674407, "learning_rate": 2.839231158033219e-06, "loss": 0.3948, "step": 9369 }, { "epoch": 0.76, "grad_norm": 0.8096642908837642, "learning_rate": 2.837393884008608e-06, "loss": 0.4039, "step": 9370 }, { "epoch": 0.76, "grad_norm": 0.8576437588563828, "learning_rate": 2.835557106346244e-06, "loss": 0.4479, "step": 9371 }, { "epoch": 0.76, "grad_norm": 0.9191350794963664, "learning_rate": 2.8337208251734183e-06, "loss": 0.4786, "step": 9372 }, { "epoch": 0.76, "grad_norm": 0.9644820042800843, "learning_rate": 2.8318850406173827e-06, "loss": 0.4845, "step": 9373 }, { "epoch": 0.76, "grad_norm": 0.9416056292197382, "learning_rate": 2.830049752805356e-06, "loss": 0.4415, "step": 9374 }, { "epoch": 0.76, "grad_norm": 0.9614306850353689, "learning_rate": 2.8282149618645215e-06, "loss": 0.5065, "step": 9375 }, { "epoch": 0.76, "grad_norm": 0.919458526211513, "learning_rate": 2.826380667922032e-06, "loss": 0.4998, "step": 9376 }, { "epoch": 0.76, "grad_norm": 0.9254357372660033, "learning_rate": 2.824546871104996e-06, "loss": 0.4881, "step": 9377 }, { "epoch": 0.76, "grad_norm": 0.940488112437101, "learning_rate": 2.8227135715404975e-06, "loss": 0.5278, "step": 9378 }, { "epoch": 0.76, "grad_norm": 0.9064149146132878, "learning_rate": 2.820880769355582e-06, "loss": 0.4802, "step": 9379 }, { "epoch": 0.76, "grad_norm": 0.9971665290177097, "learning_rate": 2.819048464677261e-06, "loss": 0.5214, "step": 9380 }, { "epoch": 0.76, "grad_norm": 0.9594023669258108, "learning_rate": 2.817216657632512e-06, "loss": 0.5152, "step": 9381 }, { "epoch": 0.76, "grad_norm": 0.9912338435984925, "learning_rate": 2.8153853483482817e-06, "loss": 0.4847, "step": 9382 }, { "epoch": 0.76, "grad_norm": 0.918143957172683, "learning_rate": 2.813554536951466e-06, "loss": 0.5439, "step": 9383 }, { "epoch": 0.76, "grad_norm": 0.9542177968776675, "learning_rate": 2.8117242235689546e-06, "loss": 0.5633, "step": 9384 }, { "epoch": 0.76, "grad_norm": 0.9676961660664399, "learning_rate": 2.8098944083275735e-06, "loss": 0.4745, "step": 9385 }, { "epoch": 0.76, "grad_norm": 0.8463354068720405, "learning_rate": 2.8080650913541343e-06, "loss": 0.4333, "step": 9386 }, { "epoch": 0.76, "grad_norm": 0.9071559562682353, "learning_rate": 2.8062362727754034e-06, "loss": 0.4202, "step": 9387 }, { "epoch": 0.76, "grad_norm": 1.0620416420487766, "learning_rate": 2.804407952718119e-06, "loss": 0.5192, "step": 9388 }, { "epoch": 0.76, "grad_norm": 0.9700130342590119, "learning_rate": 2.8025801313089808e-06, "loss": 0.4732, "step": 9389 }, { "epoch": 0.76, "grad_norm": 0.905026639613572, "learning_rate": 2.8007528086746574e-06, "loss": 0.4955, "step": 9390 }, { "epoch": 0.76, "grad_norm": 0.8949781812267134, "learning_rate": 2.798925984941776e-06, "loss": 0.4971, "step": 9391 }, { "epoch": 0.76, "grad_norm": 1.0893057417483687, "learning_rate": 2.797099660236937e-06, "loss": 0.5203, "step": 9392 }, { "epoch": 0.76, "grad_norm": 0.9056671218015045, "learning_rate": 2.7952738346867026e-06, "loss": 0.5157, "step": 9393 }, { "epoch": 0.76, "grad_norm": 0.9267444438266536, "learning_rate": 2.7934485084176012e-06, "loss": 0.4864, "step": 9394 }, { "epoch": 0.76, "grad_norm": 0.9428738338535183, "learning_rate": 2.791623681556125e-06, "loss": 0.4714, "step": 9395 }, { "epoch": 0.76, "grad_norm": 0.9593348069639677, "learning_rate": 2.789799354228737e-06, "loss": 0.4895, "step": 9396 }, { "epoch": 0.76, "grad_norm": 1.0047888951482236, "learning_rate": 2.7879755265618558e-06, "loss": 0.4577, "step": 9397 }, { "epoch": 0.76, "grad_norm": 1.0235280843992327, "learning_rate": 2.786152198681874e-06, "loss": 0.5219, "step": 9398 }, { "epoch": 0.76, "grad_norm": 0.9472089839708605, "learning_rate": 2.7843293707151455e-06, "loss": 0.5328, "step": 9399 }, { "epoch": 0.76, "grad_norm": 0.9448776068653041, "learning_rate": 2.782507042787991e-06, "loss": 0.4486, "step": 9400 }, { "epoch": 0.76, "grad_norm": 1.0305616862250657, "learning_rate": 2.7806852150266974e-06, "loss": 0.5552, "step": 9401 }, { "epoch": 0.76, "grad_norm": 0.8278570108296879, "learning_rate": 2.778863887557517e-06, "loss": 0.4195, "step": 9402 }, { "epoch": 0.76, "grad_norm": 0.9276535441346776, "learning_rate": 2.777043060506661e-06, "loss": 0.4551, "step": 9403 }, { "epoch": 0.76, "grad_norm": 0.9487354552538336, "learning_rate": 2.7752227340003145e-06, "loss": 0.5439, "step": 9404 }, { "epoch": 0.76, "grad_norm": 0.9832271050557329, "learning_rate": 2.773402908164625e-06, "loss": 0.5789, "step": 9405 }, { "epoch": 0.76, "grad_norm": 0.9550858429413105, "learning_rate": 2.771583583125703e-06, "loss": 0.4835, "step": 9406 }, { "epoch": 0.76, "grad_norm": 1.0062011570686709, "learning_rate": 2.7697647590096277e-06, "loss": 0.5558, "step": 9407 }, { "epoch": 0.76, "grad_norm": 0.868966115835517, "learning_rate": 2.76794643594244e-06, "loss": 0.4517, "step": 9408 }, { "epoch": 0.76, "grad_norm": 0.9535790756363516, "learning_rate": 2.766128614050154e-06, "loss": 0.4804, "step": 9409 }, { "epoch": 0.76, "grad_norm": 0.9308323891682322, "learning_rate": 2.7643112934587346e-06, "loss": 0.4971, "step": 9410 }, { "epoch": 0.76, "grad_norm": 1.0176657701318508, "learning_rate": 2.7624944742941253e-06, "loss": 0.5166, "step": 9411 }, { "epoch": 0.76, "grad_norm": 0.9501763096451348, "learning_rate": 2.760678156682229e-06, "loss": 0.4569, "step": 9412 }, { "epoch": 0.77, "grad_norm": 0.9236459156836029, "learning_rate": 2.7588623407489158e-06, "loss": 0.5252, "step": 9413 }, { "epoch": 0.77, "grad_norm": 0.8433794190856604, "learning_rate": 2.7570470266200177e-06, "loss": 0.4018, "step": 9414 }, { "epoch": 0.77, "grad_norm": 0.915106529891791, "learning_rate": 2.7552322144213405e-06, "loss": 0.4964, "step": 9415 }, { "epoch": 0.77, "grad_norm": 0.8850114828887584, "learning_rate": 2.753417904278641e-06, "loss": 0.5148, "step": 9416 }, { "epoch": 0.77, "grad_norm": 0.9452199835968088, "learning_rate": 2.751604096317655e-06, "loss": 0.4584, "step": 9417 }, { "epoch": 0.77, "grad_norm": 0.94196405618762, "learning_rate": 2.749790790664074e-06, "loss": 0.5041, "step": 9418 }, { "epoch": 0.77, "grad_norm": 0.9554860308445395, "learning_rate": 2.7479779874435607e-06, "loss": 0.4885, "step": 9419 }, { "epoch": 0.77, "grad_norm": 0.8251822970851799, "learning_rate": 2.7461656867817397e-06, "loss": 0.4526, "step": 9420 }, { "epoch": 0.77, "grad_norm": 0.9104051781210106, "learning_rate": 2.7443538888042065e-06, "loss": 0.4844, "step": 9421 }, { "epoch": 0.77, "grad_norm": 0.9173725950469258, "learning_rate": 2.742542593636509e-06, "loss": 0.4853, "step": 9422 }, { "epoch": 0.77, "grad_norm": 0.9440247777740798, "learning_rate": 2.7407318014041727e-06, "loss": 0.4653, "step": 9423 }, { "epoch": 0.77, "grad_norm": 0.9456146477999366, "learning_rate": 2.738921512232684e-06, "loss": 0.4939, "step": 9424 }, { "epoch": 0.77, "grad_norm": 0.8871216035281972, "learning_rate": 2.7371117262474945e-06, "loss": 0.4852, "step": 9425 }, { "epoch": 0.77, "grad_norm": 0.8784139534365634, "learning_rate": 2.7353024435740194e-06, "loss": 0.5012, "step": 9426 }, { "epoch": 0.77, "grad_norm": 0.9562799444064749, "learning_rate": 2.7334936643376443e-06, "loss": 0.5088, "step": 9427 }, { "epoch": 0.77, "grad_norm": 0.8914075216309687, "learning_rate": 2.7316853886637075e-06, "loss": 0.4959, "step": 9428 }, { "epoch": 0.77, "grad_norm": 0.8622154551462666, "learning_rate": 2.729877616677531e-06, "loss": 0.5002, "step": 9429 }, { "epoch": 0.77, "grad_norm": 0.8689274219881318, "learning_rate": 2.7280703485043846e-06, "loss": 0.445, "step": 9430 }, { "epoch": 0.77, "grad_norm": 0.8775797672203981, "learning_rate": 2.726263584269513e-06, "loss": 0.477, "step": 9431 }, { "epoch": 0.77, "grad_norm": 0.9325799673463466, "learning_rate": 2.724457324098123e-06, "loss": 0.5041, "step": 9432 }, { "epoch": 0.77, "grad_norm": 0.9626934216789831, "learning_rate": 2.7226515681153907e-06, "loss": 0.5497, "step": 9433 }, { "epoch": 0.77, "grad_norm": 0.9011603340435773, "learning_rate": 2.720846316446443e-06, "loss": 0.48, "step": 9434 }, { "epoch": 0.77, "grad_norm": 1.0063613488595504, "learning_rate": 2.7190415692163954e-06, "loss": 0.4801, "step": 9435 }, { "epoch": 0.77, "grad_norm": 0.9004373985017571, "learning_rate": 2.717237326550306e-06, "loss": 0.4982, "step": 9436 }, { "epoch": 0.77, "grad_norm": 0.9216341366680187, "learning_rate": 2.71543358857321e-06, "loss": 0.5106, "step": 9437 }, { "epoch": 0.77, "grad_norm": 1.0024610759720805, "learning_rate": 2.713630355410104e-06, "loss": 0.4884, "step": 9438 }, { "epoch": 0.77, "grad_norm": 0.9858367768202332, "learning_rate": 2.7118276271859555e-06, "loss": 0.5171, "step": 9439 }, { "epoch": 0.77, "grad_norm": 0.9180550622016094, "learning_rate": 2.7100254040256813e-06, "loss": 0.5102, "step": 9440 }, { "epoch": 0.77, "grad_norm": 0.839648095978644, "learning_rate": 2.7082236860541867e-06, "loss": 0.4548, "step": 9441 }, { "epoch": 0.77, "grad_norm": 0.965488004574052, "learning_rate": 2.7064224733963197e-06, "loss": 0.5141, "step": 9442 }, { "epoch": 0.77, "grad_norm": 0.9617855033127427, "learning_rate": 2.704621766176905e-06, "loss": 0.5163, "step": 9443 }, { "epoch": 0.77, "grad_norm": 0.9178157408943385, "learning_rate": 2.702821564520732e-06, "loss": 0.4896, "step": 9444 }, { "epoch": 0.77, "grad_norm": 0.9110526638080855, "learning_rate": 2.7010218685525545e-06, "loss": 0.462, "step": 9445 }, { "epoch": 0.77, "grad_norm": 0.9618892367926404, "learning_rate": 2.699222678397082e-06, "loss": 0.4735, "step": 9446 }, { "epoch": 0.77, "grad_norm": 1.0025662808262406, "learning_rate": 2.697423994179007e-06, "loss": 0.4799, "step": 9447 }, { "epoch": 0.77, "grad_norm": 0.9728581194952428, "learning_rate": 2.69562581602297e-06, "loss": 0.4956, "step": 9448 }, { "epoch": 0.77, "grad_norm": 0.8845833875493481, "learning_rate": 2.693828144053584e-06, "loss": 0.4821, "step": 9449 }, { "epoch": 0.77, "grad_norm": 0.9953067711145615, "learning_rate": 2.6920309783954277e-06, "loss": 0.5049, "step": 9450 }, { "epoch": 0.77, "grad_norm": 0.9240944266957233, "learning_rate": 2.690234319173045e-06, "loss": 0.4273, "step": 9451 }, { "epoch": 0.77, "grad_norm": 0.8056754390924662, "learning_rate": 2.688438166510935e-06, "loss": 0.4646, "step": 9452 }, { "epoch": 0.77, "grad_norm": 0.953057999448235, "learning_rate": 2.68664252053358e-06, "loss": 0.5191, "step": 9453 }, { "epoch": 0.77, "grad_norm": 0.8365867448944447, "learning_rate": 2.6848473813654087e-06, "loss": 0.4236, "step": 9454 }, { "epoch": 0.77, "grad_norm": 0.889545304390988, "learning_rate": 2.6830527491308257e-06, "loss": 0.459, "step": 9455 }, { "epoch": 0.77, "grad_norm": 0.9223982552953418, "learning_rate": 2.681258623954196e-06, "loss": 0.4824, "step": 9456 }, { "epoch": 0.77, "grad_norm": 0.9608557480794483, "learning_rate": 2.679465005959856e-06, "loss": 0.5088, "step": 9457 }, { "epoch": 0.77, "grad_norm": 1.0188180580969628, "learning_rate": 2.6776718952720903e-06, "loss": 0.458, "step": 9458 }, { "epoch": 0.77, "grad_norm": 0.8616425434444939, "learning_rate": 2.6758792920151745e-06, "loss": 0.4269, "step": 9459 }, { "epoch": 0.77, "grad_norm": 1.0145444858253545, "learning_rate": 2.6740871963133243e-06, "loss": 0.4662, "step": 9460 }, { "epoch": 0.77, "grad_norm": 0.9901094833309612, "learning_rate": 2.6722956082907334e-06, "loss": 0.4941, "step": 9461 }, { "epoch": 0.77, "grad_norm": 0.90990267972996, "learning_rate": 2.670504528071557e-06, "loss": 0.4996, "step": 9462 }, { "epoch": 0.77, "grad_norm": 0.8671739375567898, "learning_rate": 2.668713955779918e-06, "loss": 0.4801, "step": 9463 }, { "epoch": 0.77, "grad_norm": 0.8410079626584746, "learning_rate": 2.6669238915398943e-06, "loss": 0.4196, "step": 9464 }, { "epoch": 0.77, "grad_norm": 0.9790064700550006, "learning_rate": 2.6651343354755453e-06, "loss": 0.5025, "step": 9465 }, { "epoch": 0.77, "grad_norm": 0.989725553659738, "learning_rate": 2.663345287710878e-06, "loss": 0.4847, "step": 9466 }, { "epoch": 0.77, "grad_norm": 1.0120797707377107, "learning_rate": 2.6615567483698746e-06, "loss": 0.5814, "step": 9467 }, { "epoch": 0.77, "grad_norm": 1.0247258994347321, "learning_rate": 2.65976871757648e-06, "loss": 0.5506, "step": 9468 }, { "epoch": 0.77, "grad_norm": 0.9353296234328144, "learning_rate": 2.6579811954546054e-06, "loss": 0.4999, "step": 9469 }, { "epoch": 0.77, "grad_norm": 0.873564732962634, "learning_rate": 2.6561941821281145e-06, "loss": 0.4801, "step": 9470 }, { "epoch": 0.77, "grad_norm": 0.9463813696150183, "learning_rate": 2.6544076777208603e-06, "loss": 0.4883, "step": 9471 }, { "epoch": 0.77, "grad_norm": 0.9701094798832111, "learning_rate": 2.6526216823566342e-06, "loss": 0.4821, "step": 9472 }, { "epoch": 0.77, "grad_norm": 0.9290121340326827, "learning_rate": 2.65083619615921e-06, "loss": 0.5389, "step": 9473 }, { "epoch": 0.77, "grad_norm": 0.9520983634733708, "learning_rate": 2.6490512192523175e-06, "loss": 0.4464, "step": 9474 }, { "epoch": 0.77, "grad_norm": 0.8680418166970947, "learning_rate": 2.6472667517596584e-06, "loss": 0.461, "step": 9475 }, { "epoch": 0.77, "grad_norm": 0.8481399018890614, "learning_rate": 2.6454827938048855e-06, "loss": 0.4635, "step": 9476 }, { "epoch": 0.77, "grad_norm": 0.8970798829121703, "learning_rate": 2.643699345511638e-06, "loss": 0.4517, "step": 9477 }, { "epoch": 0.77, "grad_norm": 1.0208441448818784, "learning_rate": 2.6419164070034974e-06, "loss": 0.517, "step": 9478 }, { "epoch": 0.77, "grad_norm": 0.8962370261002454, "learning_rate": 2.6401339784040226e-06, "loss": 0.5019, "step": 9479 }, { "epoch": 0.77, "grad_norm": 0.8456090475029107, "learning_rate": 2.6383520598367363e-06, "loss": 0.4345, "step": 9480 }, { "epoch": 0.77, "grad_norm": 0.8804027202694467, "learning_rate": 2.6365706514251244e-06, "loss": 0.4929, "step": 9481 }, { "epoch": 0.77, "grad_norm": 0.9137295020872604, "learning_rate": 2.6347897532926293e-06, "loss": 0.5308, "step": 9482 }, { "epoch": 0.77, "grad_norm": 0.9212246006767905, "learning_rate": 2.6330093655626777e-06, "loss": 0.5352, "step": 9483 }, { "epoch": 0.77, "grad_norm": 1.042293489524842, "learning_rate": 2.6312294883586385e-06, "loss": 0.5123, "step": 9484 }, { "epoch": 0.77, "grad_norm": 0.8841512558148169, "learning_rate": 2.6294501218038603e-06, "loss": 0.446, "step": 9485 }, { "epoch": 0.77, "grad_norm": 0.8420400723533827, "learning_rate": 2.627671266021652e-06, "loss": 0.4708, "step": 9486 }, { "epoch": 0.77, "grad_norm": 0.9083137612965833, "learning_rate": 2.625892921135288e-06, "loss": 0.4783, "step": 9487 }, { "epoch": 0.77, "grad_norm": 0.9938593713070609, "learning_rate": 2.6241150872679968e-06, "loss": 0.5045, "step": 9488 }, { "epoch": 0.77, "grad_norm": 0.9119577080481137, "learning_rate": 2.6223377645429948e-06, "loss": 0.4987, "step": 9489 }, { "epoch": 0.77, "grad_norm": 1.0419405547070333, "learning_rate": 2.6205609530834388e-06, "loss": 0.4877, "step": 9490 }, { "epoch": 0.77, "grad_norm": 0.9126973386049844, "learning_rate": 2.6187846530124615e-06, "loss": 0.5284, "step": 9491 }, { "epoch": 0.77, "grad_norm": 0.9253444650389665, "learning_rate": 2.6170088644531623e-06, "loss": 0.5231, "step": 9492 }, { "epoch": 0.77, "grad_norm": 0.8321810930190079, "learning_rate": 2.6152335875286027e-06, "loss": 0.4274, "step": 9493 }, { "epoch": 0.77, "grad_norm": 0.9589537397942688, "learning_rate": 2.6134588223617995e-06, "loss": 0.5199, "step": 9494 }, { "epoch": 0.77, "grad_norm": 0.9208045079162227, "learning_rate": 2.6116845690757533e-06, "loss": 0.4873, "step": 9495 }, { "epoch": 0.77, "grad_norm": 0.9568307857077881, "learning_rate": 2.6099108277934105e-06, "loss": 0.5072, "step": 9496 }, { "epoch": 0.77, "grad_norm": 1.0430924603891203, "learning_rate": 2.6081375986376924e-06, "loss": 0.5863, "step": 9497 }, { "epoch": 0.77, "grad_norm": 0.8728274230320672, "learning_rate": 2.6063648817314825e-06, "loss": 0.4295, "step": 9498 }, { "epoch": 0.77, "grad_norm": 0.9260757150277996, "learning_rate": 2.6045926771976306e-06, "loss": 0.4946, "step": 9499 }, { "epoch": 0.77, "grad_norm": 0.9797267215825568, "learning_rate": 2.6028209851589403e-06, "loss": 0.5314, "step": 9500 }, { "epoch": 0.77, "grad_norm": 0.8235502974705514, "learning_rate": 2.6010498057382005e-06, "loss": 0.4063, "step": 9501 }, { "epoch": 0.77, "grad_norm": 0.9412399480520027, "learning_rate": 2.599279139058143e-06, "loss": 0.5289, "step": 9502 }, { "epoch": 0.77, "grad_norm": 0.9456699294244987, "learning_rate": 2.597508985241477e-06, "loss": 0.4849, "step": 9503 }, { "epoch": 0.77, "grad_norm": 0.8843131800728579, "learning_rate": 2.5957393444108724e-06, "loss": 0.462, "step": 9504 }, { "epoch": 0.77, "grad_norm": 0.9220872425336669, "learning_rate": 2.593970216688967e-06, "loss": 0.5103, "step": 9505 }, { "epoch": 0.77, "grad_norm": 0.9703291160659552, "learning_rate": 2.592201602198351e-06, "loss": 0.4847, "step": 9506 }, { "epoch": 0.77, "grad_norm": 1.0437899523711882, "learning_rate": 2.5904335010615976e-06, "loss": 0.4661, "step": 9507 }, { "epoch": 0.77, "grad_norm": 0.8931272353575075, "learning_rate": 2.588665913401226e-06, "loss": 0.4767, "step": 9508 }, { "epoch": 0.77, "grad_norm": 0.8971499854589432, "learning_rate": 2.5868988393397376e-06, "loss": 0.4886, "step": 9509 }, { "epoch": 0.77, "grad_norm": 0.9114204447284524, "learning_rate": 2.5851322789995815e-06, "loss": 0.4812, "step": 9510 }, { "epoch": 0.77, "grad_norm": 0.9433208349417566, "learning_rate": 2.5833662325031816e-06, "loss": 0.4965, "step": 9511 }, { "epoch": 0.77, "grad_norm": 0.9745841552592122, "learning_rate": 2.5816006999729225e-06, "loss": 0.5398, "step": 9512 }, { "epoch": 0.77, "grad_norm": 1.1338912061174495, "learning_rate": 2.5798356815311587e-06, "loss": 0.4673, "step": 9513 }, { "epoch": 0.77, "grad_norm": 0.9102039459425405, "learning_rate": 2.5780711773001943e-06, "loss": 0.4687, "step": 9514 }, { "epoch": 0.77, "grad_norm": 0.8972357697454645, "learning_rate": 2.5763071874023205e-06, "loss": 0.442, "step": 9515 }, { "epoch": 0.77, "grad_norm": 0.9881441739800304, "learning_rate": 2.5745437119597704e-06, "loss": 0.4863, "step": 9516 }, { "epoch": 0.77, "grad_norm": 0.9675120535718381, "learning_rate": 2.5727807510947545e-06, "loss": 0.5134, "step": 9517 }, { "epoch": 0.77, "grad_norm": 0.9015375721815215, "learning_rate": 2.5710183049294445e-06, "loss": 0.4743, "step": 9518 }, { "epoch": 0.77, "grad_norm": 0.9734237840850912, "learning_rate": 2.56925637358598e-06, "loss": 0.5198, "step": 9519 }, { "epoch": 0.77, "grad_norm": 0.9644298697485151, "learning_rate": 2.5674949571864517e-06, "loss": 0.4779, "step": 9520 }, { "epoch": 0.77, "grad_norm": 0.9969693321534621, "learning_rate": 2.5657340558529353e-06, "loss": 0.4861, "step": 9521 }, { "epoch": 0.77, "grad_norm": 0.9181947952380234, "learning_rate": 2.5639736697074525e-06, "loss": 0.4836, "step": 9522 }, { "epoch": 0.77, "grad_norm": 0.9606810410171697, "learning_rate": 2.5622137988719985e-06, "loss": 0.5083, "step": 9523 }, { "epoch": 0.77, "grad_norm": 0.940512663001103, "learning_rate": 2.5604544434685307e-06, "loss": 0.4637, "step": 9524 }, { "epoch": 0.77, "grad_norm": 0.9785135388690958, "learning_rate": 2.558695603618975e-06, "loss": 0.4966, "step": 9525 }, { "epoch": 0.77, "grad_norm": 0.9331808129454607, "learning_rate": 2.5569372794452063e-06, "loss": 0.5326, "step": 9526 }, { "epoch": 0.77, "grad_norm": 0.9294999568518234, "learning_rate": 2.555179471069089e-06, "loss": 0.48, "step": 9527 }, { "epoch": 0.77, "grad_norm": 0.824147125924437, "learning_rate": 2.553422178612427e-06, "loss": 0.4187, "step": 9528 }, { "epoch": 0.77, "grad_norm": 0.8936141587086273, "learning_rate": 2.5516654021970035e-06, "loss": 0.5049, "step": 9529 }, { "epoch": 0.77, "grad_norm": 0.9344279939268219, "learning_rate": 2.549909141944561e-06, "loss": 0.479, "step": 9530 }, { "epoch": 0.77, "grad_norm": 0.9227147305713791, "learning_rate": 2.5481533979768092e-06, "loss": 0.4666, "step": 9531 }, { "epoch": 0.77, "grad_norm": 0.9643823288376145, "learning_rate": 2.546398170415412e-06, "loss": 0.4633, "step": 9532 }, { "epoch": 0.77, "grad_norm": 0.9135184290568981, "learning_rate": 2.5446434593820156e-06, "loss": 0.4838, "step": 9533 }, { "epoch": 0.77, "grad_norm": 0.9140822658348559, "learning_rate": 2.5428892649982117e-06, "loss": 0.4997, "step": 9534 }, { "epoch": 0.77, "grad_norm": 0.9113683203067207, "learning_rate": 2.5411355873855683e-06, "loss": 0.4603, "step": 9535 }, { "epoch": 0.78, "grad_norm": 0.8322288199107464, "learning_rate": 2.539382426665611e-06, "loss": 0.4867, "step": 9536 }, { "epoch": 0.78, "grad_norm": 0.9147448880750836, "learning_rate": 2.537629782959835e-06, "loss": 0.4898, "step": 9537 }, { "epoch": 0.78, "grad_norm": 0.868170086743266, "learning_rate": 2.5358776563896957e-06, "loss": 0.5012, "step": 9538 }, { "epoch": 0.78, "grad_norm": 0.9781015542954844, "learning_rate": 2.5341260470766173e-06, "loss": 0.4501, "step": 9539 }, { "epoch": 0.78, "grad_norm": 0.933814696684243, "learning_rate": 2.5323749551419775e-06, "loss": 0.4684, "step": 9540 }, { "epoch": 0.78, "grad_norm": 0.8989376433156853, "learning_rate": 2.5306243807071305e-06, "loss": 0.5363, "step": 9541 }, { "epoch": 0.78, "grad_norm": 0.8773684970723838, "learning_rate": 2.5288743238933887e-06, "loss": 0.4537, "step": 9542 }, { "epoch": 0.78, "grad_norm": 0.9498142865358258, "learning_rate": 2.5271247848220294e-06, "loss": 0.5679, "step": 9543 }, { "epoch": 0.78, "grad_norm": 1.0550451747440102, "learning_rate": 2.525375763614294e-06, "loss": 0.5219, "step": 9544 }, { "epoch": 0.78, "grad_norm": 0.9255261798407214, "learning_rate": 2.5236272603913915e-06, "loss": 0.5036, "step": 9545 }, { "epoch": 0.78, "grad_norm": 0.9255632591648001, "learning_rate": 2.5218792752744847e-06, "loss": 0.5053, "step": 9546 }, { "epoch": 0.78, "grad_norm": 0.9945535840087542, "learning_rate": 2.5201318083847105e-06, "loss": 0.5119, "step": 9547 }, { "epoch": 0.78, "grad_norm": 0.9160402986044256, "learning_rate": 2.518384859843168e-06, "loss": 0.4775, "step": 9548 }, { "epoch": 0.78, "grad_norm": 0.9130901844914137, "learning_rate": 2.516638429770919e-06, "loss": 0.4579, "step": 9549 }, { "epoch": 0.78, "grad_norm": 0.8844580873802608, "learning_rate": 2.514892518288988e-06, "loss": 0.4504, "step": 9550 }, { "epoch": 0.78, "grad_norm": 0.9584403151191385, "learning_rate": 2.5131471255183705e-06, "loss": 0.525, "step": 9551 }, { "epoch": 0.78, "grad_norm": 0.9838552272295368, "learning_rate": 2.511402251580013e-06, "loss": 0.5415, "step": 9552 }, { "epoch": 0.78, "grad_norm": 0.9275636019742892, "learning_rate": 2.509657896594837e-06, "loss": 0.4796, "step": 9553 }, { "epoch": 0.78, "grad_norm": 0.9780448168288274, "learning_rate": 2.507914060683725e-06, "loss": 0.4797, "step": 9554 }, { "epoch": 0.78, "grad_norm": 1.0122611059232476, "learning_rate": 2.5061707439675222e-06, "loss": 0.4896, "step": 9555 }, { "epoch": 0.78, "grad_norm": 0.98051994871729, "learning_rate": 2.5044279465670408e-06, "loss": 0.5126, "step": 9556 }, { "epoch": 0.78, "grad_norm": 0.9005729862836364, "learning_rate": 2.502685668603053e-06, "loss": 0.5542, "step": 9557 }, { "epoch": 0.78, "grad_norm": 0.9593751701329121, "learning_rate": 2.5009439101963027e-06, "loss": 0.5163, "step": 9558 }, { "epoch": 0.78, "grad_norm": 0.9336429381380371, "learning_rate": 2.499202671467483e-06, "loss": 0.553, "step": 9559 }, { "epoch": 0.78, "grad_norm": 1.0034038590580017, "learning_rate": 2.497461952537267e-06, "loss": 0.5383, "step": 9560 }, { "epoch": 0.78, "grad_norm": 0.8763757759226454, "learning_rate": 2.4957217535262824e-06, "loss": 0.4981, "step": 9561 }, { "epoch": 0.78, "grad_norm": 0.8586560124179438, "learning_rate": 2.4939820745551235e-06, "loss": 0.4485, "step": 9562 }, { "epoch": 0.78, "grad_norm": 0.9065661965736719, "learning_rate": 2.4922429157443484e-06, "loss": 0.4697, "step": 9563 }, { "epoch": 0.78, "grad_norm": 0.8918877224490259, "learning_rate": 2.490504277214484e-06, "loss": 0.4276, "step": 9564 }, { "epoch": 0.78, "grad_norm": 0.8620501304929654, "learning_rate": 2.488766159086009e-06, "loss": 0.4448, "step": 9565 }, { "epoch": 0.78, "grad_norm": 0.9677343569180769, "learning_rate": 2.4870285614793764e-06, "loss": 0.5492, "step": 9566 }, { "epoch": 0.78, "grad_norm": 0.962363169336885, "learning_rate": 2.485291484515e-06, "loss": 0.5051, "step": 9567 }, { "epoch": 0.78, "grad_norm": 0.959581770332624, "learning_rate": 2.4835549283132597e-06, "loss": 0.5402, "step": 9568 }, { "epoch": 0.78, "grad_norm": 0.8514647633831238, "learning_rate": 2.4818188929944942e-06, "loss": 0.4795, "step": 9569 }, { "epoch": 0.78, "grad_norm": 0.9092107495200746, "learning_rate": 2.4800833786790145e-06, "loss": 0.4939, "step": 9570 }, { "epoch": 0.78, "grad_norm": 0.9327251184011716, "learning_rate": 2.478348385487084e-06, "loss": 0.4533, "step": 9571 }, { "epoch": 0.78, "grad_norm": 0.8735652553810023, "learning_rate": 2.476613913538938e-06, "loss": 0.4457, "step": 9572 }, { "epoch": 0.78, "grad_norm": 1.0708948579788935, "learning_rate": 2.474879962954775e-06, "loss": 0.5, "step": 9573 }, { "epoch": 0.78, "grad_norm": 0.9015351195751412, "learning_rate": 2.4731465338547556e-06, "loss": 0.5406, "step": 9574 }, { "epoch": 0.78, "grad_norm": 0.9339400308308337, "learning_rate": 2.471413626359005e-06, "loss": 0.4519, "step": 9575 }, { "epoch": 0.78, "grad_norm": 0.9899716519567556, "learning_rate": 2.4696812405876147e-06, "loss": 0.4821, "step": 9576 }, { "epoch": 0.78, "grad_norm": 0.9336255754909419, "learning_rate": 2.46794937666063e-06, "loss": 0.5054, "step": 9577 }, { "epoch": 0.78, "grad_norm": 0.9350326299793474, "learning_rate": 2.466218034698078e-06, "loss": 0.4756, "step": 9578 }, { "epoch": 0.78, "grad_norm": 0.896737867789691, "learning_rate": 2.4644872148199316e-06, "loss": 0.4517, "step": 9579 }, { "epoch": 0.78, "grad_norm": 0.9455232413075647, "learning_rate": 2.4627569171461363e-06, "loss": 0.5259, "step": 9580 }, { "epoch": 0.78, "grad_norm": 0.8955076931033673, "learning_rate": 2.4610271417966014e-06, "loss": 0.4832, "step": 9581 }, { "epoch": 0.78, "grad_norm": 0.866670299293872, "learning_rate": 2.4592978888912013e-06, "loss": 0.477, "step": 9582 }, { "epoch": 0.78, "grad_norm": 0.8821913909176877, "learning_rate": 2.457569158549763e-06, "loss": 0.4958, "step": 9583 }, { "epoch": 0.78, "grad_norm": 0.9863938976553523, "learning_rate": 2.455840950892099e-06, "loss": 0.5138, "step": 9584 }, { "epoch": 0.78, "grad_norm": 0.9181851836678736, "learning_rate": 2.4541132660379606e-06, "loss": 0.5052, "step": 9585 }, { "epoch": 0.78, "grad_norm": 0.87896082720906, "learning_rate": 2.4523861041070806e-06, "loss": 0.4395, "step": 9586 }, { "epoch": 0.78, "grad_norm": 0.9452044022637043, "learning_rate": 2.4506594652191485e-06, "loss": 0.4585, "step": 9587 }, { "epoch": 0.78, "grad_norm": 0.9581124622435845, "learning_rate": 2.448933349493823e-06, "loss": 0.4749, "step": 9588 }, { "epoch": 0.78, "grad_norm": 0.9126209879683963, "learning_rate": 2.4472077570507124e-06, "loss": 0.5034, "step": 9589 }, { "epoch": 0.78, "grad_norm": 0.8679783835342433, "learning_rate": 2.4454826880094106e-06, "loss": 0.5103, "step": 9590 }, { "epoch": 0.78, "grad_norm": 0.8997725986140164, "learning_rate": 2.443758142489454e-06, "loss": 0.4383, "step": 9591 }, { "epoch": 0.78, "grad_norm": 0.9282741810737473, "learning_rate": 2.442034120610357e-06, "loss": 0.4888, "step": 9592 }, { "epoch": 0.78, "grad_norm": 0.9426438609355607, "learning_rate": 2.4403106224915908e-06, "loss": 0.4972, "step": 9593 }, { "epoch": 0.78, "grad_norm": 0.9193151062029296, "learning_rate": 2.438587648252596e-06, "loss": 0.4282, "step": 9594 }, { "epoch": 0.78, "grad_norm": 0.8577142049418723, "learning_rate": 2.4368651980127644e-06, "loss": 0.4865, "step": 9595 }, { "epoch": 0.78, "grad_norm": 0.9277333955003407, "learning_rate": 2.4351432718914727e-06, "loss": 0.4682, "step": 9596 }, { "epoch": 0.78, "grad_norm": 0.8571646694128863, "learning_rate": 2.433421870008038e-06, "loss": 0.4404, "step": 9597 }, { "epoch": 0.78, "grad_norm": 0.9350419161187955, "learning_rate": 2.431700992481757e-06, "loss": 0.468, "step": 9598 }, { "epoch": 0.78, "grad_norm": 0.7483768064636855, "learning_rate": 2.4299806394318837e-06, "loss": 0.3972, "step": 9599 }, { "epoch": 0.78, "grad_norm": 1.0325912865114175, "learning_rate": 2.428260810977641e-06, "loss": 0.5122, "step": 9600 }, { "epoch": 0.78, "grad_norm": 0.9058356524191399, "learning_rate": 2.4265415072382016e-06, "loss": 0.5053, "step": 9601 }, { "epoch": 0.78, "grad_norm": 0.942026873185667, "learning_rate": 2.424822728332724e-06, "loss": 0.5308, "step": 9602 }, { "epoch": 0.78, "grad_norm": 0.9747815459252755, "learning_rate": 2.423104474380309e-06, "loss": 0.4839, "step": 9603 }, { "epoch": 0.78, "grad_norm": 0.8950242754724765, "learning_rate": 2.421386745500034e-06, "loss": 0.476, "step": 9604 }, { "epoch": 0.78, "grad_norm": 0.8927261857002168, "learning_rate": 2.419669541810934e-06, "loss": 0.4962, "step": 9605 }, { "epoch": 0.78, "grad_norm": 1.031794549609845, "learning_rate": 2.417952863432015e-06, "loss": 0.4638, "step": 9606 }, { "epoch": 0.78, "grad_norm": 0.941660614300367, "learning_rate": 2.4162367104822313e-06, "loss": 0.4732, "step": 9607 }, { "epoch": 0.78, "grad_norm": 0.9447255800757794, "learning_rate": 2.414521083080523e-06, "loss": 0.4594, "step": 9608 }, { "epoch": 0.78, "grad_norm": 1.0667354565333285, "learning_rate": 2.4128059813457716e-06, "loss": 0.5123, "step": 9609 }, { "epoch": 0.78, "grad_norm": 0.8808929346517816, "learning_rate": 2.411091405396836e-06, "loss": 0.4741, "step": 9610 }, { "epoch": 0.78, "grad_norm": 0.9070711715732838, "learning_rate": 2.409377355352536e-06, "loss": 0.4799, "step": 9611 }, { "epoch": 0.78, "grad_norm": 0.9931652854573912, "learning_rate": 2.4076638313316537e-06, "loss": 0.5847, "step": 9612 }, { "epoch": 0.78, "grad_norm": 0.9773303592488506, "learning_rate": 2.405950833452928e-06, "loss": 0.438, "step": 9613 }, { "epoch": 0.78, "grad_norm": 0.8959575285566649, "learning_rate": 2.4042383618350795e-06, "loss": 0.4453, "step": 9614 }, { "epoch": 0.78, "grad_norm": 0.907181588357818, "learning_rate": 2.402526416596772e-06, "loss": 0.4528, "step": 9615 }, { "epoch": 0.78, "grad_norm": 0.9749348938822829, "learning_rate": 2.400814997856645e-06, "loss": 0.515, "step": 9616 }, { "epoch": 0.78, "grad_norm": 0.901830383250741, "learning_rate": 2.399104105733299e-06, "loss": 0.4434, "step": 9617 }, { "epoch": 0.78, "grad_norm": 0.8893553496072407, "learning_rate": 2.3973937403452983e-06, "loss": 0.4025, "step": 9618 }, { "epoch": 0.78, "grad_norm": 0.9038113038070881, "learning_rate": 2.3956839018111634e-06, "loss": 0.4542, "step": 9619 }, { "epoch": 0.78, "grad_norm": 0.992165409961388, "learning_rate": 2.393974590249394e-06, "loss": 0.5901, "step": 9620 }, { "epoch": 0.78, "grad_norm": 0.8849862750002848, "learning_rate": 2.3922658057784355e-06, "loss": 0.5163, "step": 9621 }, { "epoch": 0.78, "grad_norm": 1.0192890227700766, "learning_rate": 2.3905575485167098e-06, "loss": 0.4513, "step": 9622 }, { "epoch": 0.78, "grad_norm": 0.8242519230545053, "learning_rate": 2.388849818582596e-06, "loss": 0.4359, "step": 9623 }, { "epoch": 0.78, "grad_norm": 0.91491375213291, "learning_rate": 2.387142616094441e-06, "loss": 0.4939, "step": 9624 }, { "epoch": 0.78, "grad_norm": 0.9370320013929132, "learning_rate": 2.385435941170544e-06, "loss": 0.534, "step": 9625 }, { "epoch": 0.78, "grad_norm": 1.0038811640559153, "learning_rate": 2.3837297939291893e-06, "loss": 0.5309, "step": 9626 }, { "epoch": 0.78, "grad_norm": 0.8885292541660332, "learning_rate": 2.382024174488601e-06, "loss": 0.4796, "step": 9627 }, { "epoch": 0.78, "grad_norm": 0.9018924053530425, "learning_rate": 2.38031908296698e-06, "loss": 0.4756, "step": 9628 }, { "epoch": 0.78, "grad_norm": 0.9536886642846818, "learning_rate": 2.378614519482487e-06, "loss": 0.476, "step": 9629 }, { "epoch": 0.78, "grad_norm": 0.8693603792931843, "learning_rate": 2.376910484153252e-06, "loss": 0.4338, "step": 9630 }, { "epoch": 0.78, "grad_norm": 0.9319492597355499, "learning_rate": 2.375206977097353e-06, "loss": 0.4931, "step": 9631 }, { "epoch": 0.78, "grad_norm": 0.8780828804269664, "learning_rate": 2.373503998432852e-06, "loss": 0.4885, "step": 9632 }, { "epoch": 0.78, "grad_norm": 0.869729081783994, "learning_rate": 2.3718015482777535e-06, "loss": 0.4, "step": 9633 }, { "epoch": 0.78, "grad_norm": 0.9881607907408684, "learning_rate": 2.3700996267500486e-06, "loss": 0.4615, "step": 9634 }, { "epoch": 0.78, "grad_norm": 0.8627314779565832, "learning_rate": 2.368398233967668e-06, "loss": 0.4861, "step": 9635 }, { "epoch": 0.78, "grad_norm": 0.9965660580842806, "learning_rate": 2.3666973700485207e-06, "loss": 0.4252, "step": 9636 }, { "epoch": 0.78, "grad_norm": 0.8821204773558616, "learning_rate": 2.3649970351104744e-06, "loss": 0.4828, "step": 9637 }, { "epoch": 0.78, "grad_norm": 0.8515620291830194, "learning_rate": 2.363297229271365e-06, "loss": 0.4645, "step": 9638 }, { "epoch": 0.78, "grad_norm": 0.9134141990975033, "learning_rate": 2.3615979526489773e-06, "loss": 0.4702, "step": 9639 }, { "epoch": 0.78, "grad_norm": 1.045250600324794, "learning_rate": 2.3598992053610826e-06, "loss": 0.514, "step": 9640 }, { "epoch": 0.78, "grad_norm": 0.9719330232433728, "learning_rate": 2.358200987525393e-06, "loss": 0.4843, "step": 9641 }, { "epoch": 0.78, "grad_norm": 0.9890798245689238, "learning_rate": 2.356503299259597e-06, "loss": 0.5332, "step": 9642 }, { "epoch": 0.78, "grad_norm": 0.951865022725942, "learning_rate": 2.354806140681343e-06, "loss": 0.477, "step": 9643 }, { "epoch": 0.78, "grad_norm": 0.9038312048330461, "learning_rate": 2.353109511908245e-06, "loss": 0.4452, "step": 9644 }, { "epoch": 0.78, "grad_norm": 0.9811003847716361, "learning_rate": 2.351413413057868e-06, "loss": 0.5233, "step": 9645 }, { "epoch": 0.78, "grad_norm": 0.9479546781437039, "learning_rate": 2.349717844247764e-06, "loss": 0.4685, "step": 9646 }, { "epoch": 0.78, "grad_norm": 0.8049590726514992, "learning_rate": 2.3480228055954246e-06, "loss": 0.4421, "step": 9647 }, { "epoch": 0.78, "grad_norm": 0.9733781462938889, "learning_rate": 2.3463282972183176e-06, "loss": 0.5053, "step": 9648 }, { "epoch": 0.78, "grad_norm": 0.9600108414024033, "learning_rate": 2.3446343192338705e-06, "loss": 0.5029, "step": 9649 }, { "epoch": 0.78, "grad_norm": 0.908930633454571, "learning_rate": 2.342940871759477e-06, "loss": 0.51, "step": 9650 }, { "epoch": 0.78, "grad_norm": 0.8563848690253233, "learning_rate": 2.3412479549124843e-06, "loss": 0.4161, "step": 9651 }, { "epoch": 0.78, "grad_norm": 0.9269137705402973, "learning_rate": 2.339555568810221e-06, "loss": 0.4778, "step": 9652 }, { "epoch": 0.78, "grad_norm": 0.9345994871074489, "learning_rate": 2.337863713569959e-06, "loss": 0.4064, "step": 9653 }, { "epoch": 0.78, "grad_norm": 0.8561141204612545, "learning_rate": 2.336172389308945e-06, "loss": 0.4461, "step": 9654 }, { "epoch": 0.78, "grad_norm": 0.9263354207130737, "learning_rate": 2.334481596144387e-06, "loss": 0.4931, "step": 9655 }, { "epoch": 0.78, "grad_norm": 0.9646139588123631, "learning_rate": 2.3327913341934573e-06, "loss": 0.4798, "step": 9656 }, { "epoch": 0.78, "grad_norm": 0.9821076222973479, "learning_rate": 2.3311016035732825e-06, "loss": 0.5305, "step": 9657 }, { "epoch": 0.78, "grad_norm": 0.8870591207772264, "learning_rate": 2.329412404400969e-06, "loss": 0.4557, "step": 9658 }, { "epoch": 0.79, "grad_norm": 0.8341473641915862, "learning_rate": 2.3277237367935702e-06, "loss": 0.3986, "step": 9659 }, { "epoch": 0.79, "grad_norm": 1.0667231560326589, "learning_rate": 2.3260356008681107e-06, "loss": 0.5143, "step": 9660 }, { "epoch": 0.79, "grad_norm": 0.9747798801186214, "learning_rate": 2.3243479967415773e-06, "loss": 0.4896, "step": 9661 }, { "epoch": 0.79, "grad_norm": 0.934749581531243, "learning_rate": 2.322660924530922e-06, "loss": 0.4454, "step": 9662 }, { "epoch": 0.79, "grad_norm": 0.9290559957629269, "learning_rate": 2.32097438435305e-06, "loss": 0.4966, "step": 9663 }, { "epoch": 0.79, "grad_norm": 1.0453163615005325, "learning_rate": 2.319288376324846e-06, "loss": 0.4847, "step": 9664 }, { "epoch": 0.79, "grad_norm": 0.9662583465504331, "learning_rate": 2.317602900563143e-06, "loss": 0.5262, "step": 9665 }, { "epoch": 0.79, "grad_norm": 1.0136411008586397, "learning_rate": 2.3159179571847446e-06, "loss": 0.5118, "step": 9666 }, { "epoch": 0.79, "grad_norm": 0.9338870766327807, "learning_rate": 2.314233546306416e-06, "loss": 0.4899, "step": 9667 }, { "epoch": 0.79, "grad_norm": 1.472138551273479, "learning_rate": 2.3125496680448877e-06, "loss": 0.5472, "step": 9668 }, { "epoch": 0.79, "grad_norm": 0.809799494625484, "learning_rate": 2.3108663225168436e-06, "loss": 0.424, "step": 9669 }, { "epoch": 0.79, "grad_norm": 0.9614148731201514, "learning_rate": 2.3091835098389493e-06, "loss": 0.4957, "step": 9670 }, { "epoch": 0.79, "grad_norm": 1.0110403015033504, "learning_rate": 2.307501230127812e-06, "loss": 0.4923, "step": 9671 }, { "epoch": 0.79, "grad_norm": 0.9439389881727236, "learning_rate": 2.3058194835000167e-06, "loss": 0.4792, "step": 9672 }, { "epoch": 0.79, "grad_norm": 0.9739089259424125, "learning_rate": 2.3041382700721073e-06, "loss": 0.5028, "step": 9673 }, { "epoch": 0.79, "grad_norm": 0.9288763116072336, "learning_rate": 2.3024575899605906e-06, "loss": 0.5383, "step": 9674 }, { "epoch": 0.79, "grad_norm": 1.0166464866010603, "learning_rate": 2.3007774432819308e-06, "loss": 0.4986, "step": 9675 }, { "epoch": 0.79, "grad_norm": 0.9510888787436993, "learning_rate": 2.2990978301525702e-06, "loss": 0.4687, "step": 9676 }, { "epoch": 0.79, "grad_norm": 1.015865022970959, "learning_rate": 2.297418750688897e-06, "loss": 0.4786, "step": 9677 }, { "epoch": 0.79, "grad_norm": 0.8650789735205615, "learning_rate": 2.2957402050072717e-06, "loss": 0.4507, "step": 9678 }, { "epoch": 0.79, "grad_norm": 1.0254599114877216, "learning_rate": 2.294062193224016e-06, "loss": 0.4999, "step": 9679 }, { "epoch": 0.79, "grad_norm": 0.9169938351661332, "learning_rate": 2.292384715455419e-06, "loss": 0.4526, "step": 9680 }, { "epoch": 0.79, "grad_norm": 1.0437066091326381, "learning_rate": 2.2907077718177183e-06, "loss": 0.5488, "step": 9681 }, { "epoch": 0.79, "grad_norm": 0.9602945871231334, "learning_rate": 2.2890313624271363e-06, "loss": 0.4523, "step": 9682 }, { "epoch": 0.79, "grad_norm": 0.9967843968821193, "learning_rate": 2.2873554873998393e-06, "loss": 0.5272, "step": 9683 }, { "epoch": 0.79, "grad_norm": 0.9266329545721437, "learning_rate": 2.285680146851965e-06, "loss": 0.4719, "step": 9684 }, { "epoch": 0.79, "grad_norm": 0.9560564901228441, "learning_rate": 2.2840053408996154e-06, "loss": 0.5117, "step": 9685 }, { "epoch": 0.79, "grad_norm": 0.9832147229170937, "learning_rate": 2.28233106965885e-06, "loss": 0.4882, "step": 9686 }, { "epoch": 0.79, "grad_norm": 0.9488749898311409, "learning_rate": 2.2806573332456973e-06, "loss": 0.4401, "step": 9687 }, { "epoch": 0.79, "grad_norm": 0.9169415250097036, "learning_rate": 2.278984131776145e-06, "loss": 0.4657, "step": 9688 }, { "epoch": 0.79, "grad_norm": 0.8472737030846431, "learning_rate": 2.2773114653661433e-06, "loss": 0.4264, "step": 9689 }, { "epoch": 0.79, "grad_norm": 0.9309930762749372, "learning_rate": 2.2756393341316065e-06, "loss": 0.4762, "step": 9690 }, { "epoch": 0.79, "grad_norm": 0.9340556544258505, "learning_rate": 2.2739677381884117e-06, "loss": 0.4586, "step": 9691 }, { "epoch": 0.79, "grad_norm": 0.8572938512075292, "learning_rate": 2.272296677652399e-06, "loss": 0.4299, "step": 9692 }, { "epoch": 0.79, "grad_norm": 0.9464483530843911, "learning_rate": 2.2706261526393734e-06, "loss": 0.4628, "step": 9693 }, { "epoch": 0.79, "grad_norm": 0.9382340497970053, "learning_rate": 2.2689561632651024e-06, "loss": 0.4493, "step": 9694 }, { "epoch": 0.79, "grad_norm": 0.9729695950363514, "learning_rate": 2.267286709645309e-06, "loss": 0.4547, "step": 9695 }, { "epoch": 0.79, "grad_norm": 0.9290966977858953, "learning_rate": 2.2656177918956867e-06, "loss": 0.499, "step": 9696 }, { "epoch": 0.79, "grad_norm": 0.9104737506886303, "learning_rate": 2.2639494101318914e-06, "loss": 0.4894, "step": 9697 }, { "epoch": 0.79, "grad_norm": 0.8565759329384154, "learning_rate": 2.262281564469541e-06, "loss": 0.472, "step": 9698 }, { "epoch": 0.79, "grad_norm": 0.9881347553725216, "learning_rate": 2.260614255024214e-06, "loss": 0.5355, "step": 9699 }, { "epoch": 0.79, "grad_norm": 0.8516162680336561, "learning_rate": 2.2589474819114564e-06, "loss": 0.4418, "step": 9700 }, { "epoch": 0.79, "grad_norm": 1.0157475120079382, "learning_rate": 2.2572812452467708e-06, "loss": 0.5232, "step": 9701 }, { "epoch": 0.79, "grad_norm": 0.9035265623515684, "learning_rate": 2.255615545145626e-06, "loss": 0.4444, "step": 9702 }, { "epoch": 0.79, "grad_norm": 0.9245265400864687, "learning_rate": 2.2539503817234553e-06, "loss": 0.5143, "step": 9703 }, { "epoch": 0.79, "grad_norm": 0.9118533686851575, "learning_rate": 2.252285755095652e-06, "loss": 0.5552, "step": 9704 }, { "epoch": 0.79, "grad_norm": 0.8264811753969631, "learning_rate": 2.2506216653775736e-06, "loss": 0.5094, "step": 9705 }, { "epoch": 0.79, "grad_norm": 0.8832554789712772, "learning_rate": 2.2489581126845408e-06, "loss": 0.4546, "step": 9706 }, { "epoch": 0.79, "grad_norm": 0.8941894011676278, "learning_rate": 2.2472950971318377e-06, "loss": 0.4624, "step": 9707 }, { "epoch": 0.79, "grad_norm": 0.9404419890377869, "learning_rate": 2.2456326188347045e-06, "loss": 0.5373, "step": 9708 }, { "epoch": 0.79, "grad_norm": 0.8772512482369041, "learning_rate": 2.2439706779083538e-06, "loss": 0.431, "step": 9709 }, { "epoch": 0.79, "grad_norm": 0.9547698813593285, "learning_rate": 2.2423092744679553e-06, "loss": 0.5533, "step": 9710 }, { "epoch": 0.79, "grad_norm": 0.93032348439044, "learning_rate": 2.240648408628643e-06, "loss": 0.4784, "step": 9711 }, { "epoch": 0.79, "grad_norm": 0.9153025168867757, "learning_rate": 2.238988080505513e-06, "loss": 0.4683, "step": 9712 }, { "epoch": 0.79, "grad_norm": 0.9114927690819346, "learning_rate": 2.2373282902136273e-06, "loss": 0.4661, "step": 9713 }, { "epoch": 0.79, "grad_norm": 0.8452718104433999, "learning_rate": 2.2356690378680036e-06, "loss": 0.4077, "step": 9714 }, { "epoch": 0.79, "grad_norm": 0.8560275740625997, "learning_rate": 2.2340103235836286e-06, "loss": 0.5184, "step": 9715 }, { "epoch": 0.79, "grad_norm": 1.0038013957013665, "learning_rate": 2.2323521474754508e-06, "loss": 0.5474, "step": 9716 }, { "epoch": 0.79, "grad_norm": 0.946783776261924, "learning_rate": 2.2306945096583775e-06, "loss": 0.4912, "step": 9717 }, { "epoch": 0.79, "grad_norm": 0.9489969255455457, "learning_rate": 2.2290374102472846e-06, "loss": 0.4942, "step": 9718 }, { "epoch": 0.79, "grad_norm": 0.9521301748260809, "learning_rate": 2.2273808493570082e-06, "loss": 0.4411, "step": 9719 }, { "epoch": 0.79, "grad_norm": 0.9577675331331204, "learning_rate": 2.2257248271023424e-06, "loss": 0.4104, "step": 9720 }, { "epoch": 0.79, "grad_norm": 1.0214226660713241, "learning_rate": 2.22406934359805e-06, "loss": 0.4496, "step": 9721 }, { "epoch": 0.79, "grad_norm": 0.8994988307598074, "learning_rate": 2.2224143989588545e-06, "loss": 0.4956, "step": 9722 }, { "epoch": 0.79, "grad_norm": 0.8937085342620766, "learning_rate": 2.2207599932994427e-06, "loss": 0.4643, "step": 9723 }, { "epoch": 0.79, "grad_norm": 0.9139353897263605, "learning_rate": 2.2191061267344636e-06, "loss": 0.4705, "step": 9724 }, { "epoch": 0.79, "grad_norm": 0.8709930127962902, "learning_rate": 2.217452799378531e-06, "loss": 0.4819, "step": 9725 }, { "epoch": 0.79, "grad_norm": 0.995164754817237, "learning_rate": 2.215800011346211e-06, "loss": 0.5095, "step": 9726 }, { "epoch": 0.79, "grad_norm": 0.8863722636206753, "learning_rate": 2.2141477627520504e-06, "loss": 0.4836, "step": 9727 }, { "epoch": 0.79, "grad_norm": 1.0011157201630954, "learning_rate": 2.212496053710541e-06, "loss": 0.5673, "step": 9728 }, { "epoch": 0.79, "grad_norm": 0.9278185762841324, "learning_rate": 2.2108448843361487e-06, "loss": 0.4849, "step": 9729 }, { "epoch": 0.79, "grad_norm": 0.9013886456943522, "learning_rate": 2.209194254743295e-06, "loss": 0.4775, "step": 9730 }, { "epoch": 0.79, "grad_norm": 0.9261698216325288, "learning_rate": 2.2075441650463734e-06, "loss": 0.4504, "step": 9731 }, { "epoch": 0.79, "grad_norm": 1.0073436550756663, "learning_rate": 2.205894615359724e-06, "loss": 0.5411, "step": 9732 }, { "epoch": 0.79, "grad_norm": 1.1251748379561548, "learning_rate": 2.2042456057976693e-06, "loss": 0.5031, "step": 9733 }, { "epoch": 0.79, "grad_norm": 1.012572403508521, "learning_rate": 2.2025971364744758e-06, "loss": 0.506, "step": 9734 }, { "epoch": 0.79, "grad_norm": 0.93453836437706, "learning_rate": 2.2009492075043847e-06, "loss": 0.5359, "step": 9735 }, { "epoch": 0.79, "grad_norm": 0.9697596467071894, "learning_rate": 2.199301819001597e-06, "loss": 0.5473, "step": 9736 }, { "epoch": 0.79, "grad_norm": 1.071892038087734, "learning_rate": 2.1976549710802754e-06, "loss": 0.4695, "step": 9737 }, { "epoch": 0.79, "grad_norm": 1.0109392674982929, "learning_rate": 2.1960086638545385e-06, "loss": 0.4593, "step": 9738 }, { "epoch": 0.79, "grad_norm": 0.8912902858755715, "learning_rate": 2.1943628974384858e-06, "loss": 0.4292, "step": 9739 }, { "epoch": 0.79, "grad_norm": 0.9169358523666661, "learning_rate": 2.192717671946156e-06, "loss": 0.5011, "step": 9740 }, { "epoch": 0.79, "grad_norm": 0.9412617557898649, "learning_rate": 2.191072987491567e-06, "loss": 0.479, "step": 9741 }, { "epoch": 0.79, "grad_norm": 0.9641902627726724, "learning_rate": 2.1894288441886946e-06, "loss": 0.484, "step": 9742 }, { "epoch": 0.79, "grad_norm": 0.9835215942561565, "learning_rate": 2.1877852421514767e-06, "loss": 0.5121, "step": 9743 }, { "epoch": 0.79, "grad_norm": 0.8855674381525747, "learning_rate": 2.1861421814938076e-06, "loss": 0.3979, "step": 9744 }, { "epoch": 0.79, "grad_norm": 0.9306720357373683, "learning_rate": 2.18449966232956e-06, "loss": 0.487, "step": 9745 }, { "epoch": 0.79, "grad_norm": 0.9644690013564537, "learning_rate": 2.18285768477255e-06, "loss": 0.4636, "step": 9746 }, { "epoch": 0.79, "grad_norm": 0.9276625157351951, "learning_rate": 2.1812162489365686e-06, "loss": 0.5162, "step": 9747 }, { "epoch": 0.79, "grad_norm": 0.9515009929127202, "learning_rate": 2.179575354935366e-06, "loss": 0.5239, "step": 9748 }, { "epoch": 0.79, "grad_norm": 0.9622644087270956, "learning_rate": 2.1779350028826584e-06, "loss": 0.5131, "step": 9749 }, { "epoch": 0.79, "grad_norm": 0.9126946605383349, "learning_rate": 2.1762951928921105e-06, "loss": 0.4957, "step": 9750 }, { "epoch": 0.79, "grad_norm": 1.0443250368096582, "learning_rate": 2.174655925077371e-06, "loss": 0.5188, "step": 9751 }, { "epoch": 0.79, "grad_norm": 0.8529784079548666, "learning_rate": 2.1730171995520334e-06, "loss": 0.452, "step": 9752 }, { "epoch": 0.79, "grad_norm": 0.9125903642765387, "learning_rate": 2.171379016429661e-06, "loss": 0.4801, "step": 9753 }, { "epoch": 0.79, "grad_norm": 1.2263550367912526, "learning_rate": 2.1697413758237785e-06, "loss": 0.5218, "step": 9754 }, { "epoch": 0.79, "grad_norm": 0.8015976310048045, "learning_rate": 2.1681042778478755e-06, "loss": 0.4424, "step": 9755 }, { "epoch": 0.79, "grad_norm": 1.0733405439258612, "learning_rate": 2.166467722615394e-06, "loss": 0.5043, "step": 9756 }, { "epoch": 0.79, "grad_norm": 0.9307547619272879, "learning_rate": 2.1648317102397565e-06, "loss": 0.4392, "step": 9757 }, { "epoch": 0.79, "grad_norm": 0.9033183363968029, "learning_rate": 2.1631962408343264e-06, "loss": 0.5032, "step": 9758 }, { "epoch": 0.79, "grad_norm": 1.0066450643953964, "learning_rate": 2.1615613145124514e-06, "loss": 0.5493, "step": 9759 }, { "epoch": 0.79, "grad_norm": 0.9444646156175104, "learning_rate": 2.1599269313874217e-06, "loss": 0.488, "step": 9760 }, { "epoch": 0.79, "grad_norm": 1.014755699670725, "learning_rate": 2.158293091572501e-06, "loss": 0.5632, "step": 9761 }, { "epoch": 0.79, "grad_norm": 1.022504963163021, "learning_rate": 2.156659795180913e-06, "loss": 0.5, "step": 9762 }, { "epoch": 0.79, "grad_norm": 0.8841065130850687, "learning_rate": 2.155027042325848e-06, "loss": 0.5152, "step": 9763 }, { "epoch": 0.79, "grad_norm": 0.8805351611241143, "learning_rate": 2.1533948331204445e-06, "loss": 0.4458, "step": 9764 }, { "epoch": 0.79, "grad_norm": 1.1318116744956215, "learning_rate": 2.151763167677825e-06, "loss": 0.4929, "step": 9765 }, { "epoch": 0.79, "grad_norm": 0.9358085439942705, "learning_rate": 2.150132046111054e-06, "loss": 0.5011, "step": 9766 }, { "epoch": 0.79, "grad_norm": 0.9357248606524596, "learning_rate": 2.1485014685331684e-06, "loss": 0.5508, "step": 9767 }, { "epoch": 0.79, "grad_norm": 0.9589413280149912, "learning_rate": 2.1468714350571683e-06, "loss": 0.46, "step": 9768 }, { "epoch": 0.79, "grad_norm": 0.9224443985715425, "learning_rate": 2.145241945796014e-06, "loss": 0.4801, "step": 9769 }, { "epoch": 0.79, "grad_norm": 0.8847139248412932, "learning_rate": 2.14361300086262e-06, "loss": 0.5088, "step": 9770 }, { "epoch": 0.79, "grad_norm": 0.962732416595997, "learning_rate": 2.141984600369882e-06, "loss": 0.4688, "step": 9771 }, { "epoch": 0.79, "grad_norm": 0.8677824980011563, "learning_rate": 2.1403567444306384e-06, "loss": 0.4751, "step": 9772 }, { "epoch": 0.79, "grad_norm": 0.9071070373566746, "learning_rate": 2.1387294331577e-06, "loss": 0.4645, "step": 9773 }, { "epoch": 0.79, "grad_norm": 0.9406593469392999, "learning_rate": 2.1371026666638404e-06, "loss": 0.4911, "step": 9774 }, { "epoch": 0.79, "grad_norm": 0.9851660325459202, "learning_rate": 2.1354764450617937e-06, "loss": 0.4579, "step": 9775 }, { "epoch": 0.79, "grad_norm": 0.9575769497626949, "learning_rate": 2.1338507684642483e-06, "loss": 0.4854, "step": 9776 }, { "epoch": 0.79, "grad_norm": 0.9043458981275626, "learning_rate": 2.1322256369838723e-06, "loss": 0.4312, "step": 9777 }, { "epoch": 0.79, "grad_norm": 0.9136899357618365, "learning_rate": 2.1306010507332787e-06, "loss": 0.4731, "step": 9778 }, { "epoch": 0.79, "grad_norm": 0.9489654635266751, "learning_rate": 2.128977009825052e-06, "loss": 0.4946, "step": 9779 }, { "epoch": 0.79, "grad_norm": 0.886178646999231, "learning_rate": 2.1273535143717372e-06, "loss": 0.4779, "step": 9780 }, { "epoch": 0.79, "grad_norm": 0.9274155006249445, "learning_rate": 2.125730564485844e-06, "loss": 0.4499, "step": 9781 }, { "epoch": 0.8, "grad_norm": 1.0206326818075697, "learning_rate": 2.124108160279832e-06, "loss": 0.5138, "step": 9782 }, { "epoch": 0.8, "grad_norm": 0.9908500142071262, "learning_rate": 2.1224863018661435e-06, "loss": 0.5542, "step": 9783 }, { "epoch": 0.8, "grad_norm": 0.9421564240045448, "learning_rate": 2.1208649893571653e-06, "loss": 0.472, "step": 9784 }, { "epoch": 0.8, "grad_norm": 0.9529297223093769, "learning_rate": 2.119244222865253e-06, "loss": 0.5093, "step": 9785 }, { "epoch": 0.8, "grad_norm": 0.983152381687885, "learning_rate": 2.117624002502727e-06, "loss": 0.5081, "step": 9786 }, { "epoch": 0.8, "grad_norm": 0.9269249520941745, "learning_rate": 2.1160043283818697e-06, "loss": 0.4889, "step": 9787 }, { "epoch": 0.8, "grad_norm": 0.9311810373511374, "learning_rate": 2.114385200614912e-06, "loss": 0.4725, "step": 9788 }, { "epoch": 0.8, "grad_norm": 0.9330190902902091, "learning_rate": 2.112766619314072e-06, "loss": 0.5116, "step": 9789 }, { "epoch": 0.8, "grad_norm": 0.9001553215481263, "learning_rate": 2.111148584591506e-06, "loss": 0.5007, "step": 9790 }, { "epoch": 0.8, "grad_norm": 1.0008529581665582, "learning_rate": 2.1095310965593463e-06, "loss": 0.5086, "step": 9791 }, { "epoch": 0.8, "grad_norm": 0.969189216671511, "learning_rate": 2.107914155329682e-06, "loss": 0.542, "step": 9792 }, { "epoch": 0.8, "grad_norm": 0.9746252765484869, "learning_rate": 2.1062977610145697e-06, "loss": 0.4838, "step": 9793 }, { "epoch": 0.8, "grad_norm": 0.9223795804782231, "learning_rate": 2.1046819137260155e-06, "loss": 0.4721, "step": 9794 }, { "epoch": 0.8, "grad_norm": 0.9041889187855691, "learning_rate": 2.103066613576007e-06, "loss": 0.476, "step": 9795 }, { "epoch": 0.8, "grad_norm": 0.9913638606757014, "learning_rate": 2.1014518606764744e-06, "loss": 0.4791, "step": 9796 }, { "epoch": 0.8, "grad_norm": 0.97369286260393, "learning_rate": 2.0998376551393218e-06, "loss": 0.5438, "step": 9797 }, { "epoch": 0.8, "grad_norm": 0.8437686252604286, "learning_rate": 2.0982239970764127e-06, "loss": 0.3833, "step": 9798 }, { "epoch": 0.8, "grad_norm": 0.8703143734179483, "learning_rate": 2.096610886599575e-06, "loss": 0.4263, "step": 9799 }, { "epoch": 0.8, "grad_norm": 1.035672950025825, "learning_rate": 2.0949983238205863e-06, "loss": 0.542, "step": 9800 }, { "epoch": 0.8, "grad_norm": 0.9362809465383324, "learning_rate": 2.0933863088512076e-06, "loss": 0.4959, "step": 9801 }, { "epoch": 0.8, "grad_norm": 0.9017408168518072, "learning_rate": 2.0917748418031415e-06, "loss": 0.4708, "step": 9802 }, { "epoch": 0.8, "grad_norm": 1.0087137373104949, "learning_rate": 2.0901639227880643e-06, "loss": 0.4836, "step": 9803 }, { "epoch": 0.8, "grad_norm": 0.8750531971104142, "learning_rate": 2.0885535519176115e-06, "loss": 0.4628, "step": 9804 }, { "epoch": 0.8, "grad_norm": 0.9406350767966913, "learning_rate": 2.0869437293033835e-06, "loss": 0.4725, "step": 9805 }, { "epoch": 0.8, "grad_norm": 0.9721435535796548, "learning_rate": 2.08533445505693e-06, "loss": 0.4899, "step": 9806 }, { "epoch": 0.8, "grad_norm": 0.9351073095710531, "learning_rate": 2.083725729289784e-06, "loss": 0.462, "step": 9807 }, { "epoch": 0.8, "grad_norm": 0.9166700362208122, "learning_rate": 2.0821175521134208e-06, "loss": 0.4798, "step": 9808 }, { "epoch": 0.8, "grad_norm": 0.9199003461402707, "learning_rate": 2.080509923639288e-06, "loss": 0.4413, "step": 9809 }, { "epoch": 0.8, "grad_norm": 0.9284407199731455, "learning_rate": 2.078902843978792e-06, "loss": 0.4966, "step": 9810 }, { "epoch": 0.8, "grad_norm": 0.882987750650245, "learning_rate": 2.0772963132433065e-06, "loss": 0.4945, "step": 9811 }, { "epoch": 0.8, "grad_norm": 0.9658350421424726, "learning_rate": 2.0756903315441535e-06, "loss": 0.5078, "step": 9812 }, { "epoch": 0.8, "grad_norm": 0.9092385679023566, "learning_rate": 2.0740848989926365e-06, "loss": 0.4854, "step": 9813 }, { "epoch": 0.8, "grad_norm": 1.0058460963863638, "learning_rate": 2.0724800157000034e-06, "loss": 0.5533, "step": 9814 }, { "epoch": 0.8, "grad_norm": 0.8937420971510914, "learning_rate": 2.0708756817774743e-06, "loss": 0.401, "step": 9815 }, { "epoch": 0.8, "grad_norm": 1.0443858846384912, "learning_rate": 2.069271897336227e-06, "loss": 0.5096, "step": 9816 }, { "epoch": 0.8, "grad_norm": 1.0056854570600682, "learning_rate": 2.0676686624874054e-06, "loss": 0.5215, "step": 9817 }, { "epoch": 0.8, "grad_norm": 0.9695529148626671, "learning_rate": 2.066065977342103e-06, "loss": 0.5354, "step": 9818 }, { "epoch": 0.8, "grad_norm": 0.8368337032103081, "learning_rate": 2.064463842011397e-06, "loss": 0.4196, "step": 9819 }, { "epoch": 0.8, "grad_norm": 0.8703363610587038, "learning_rate": 2.0628622566063063e-06, "loss": 0.481, "step": 9820 }, { "epoch": 0.8, "grad_norm": 0.8656780500628107, "learning_rate": 2.06126122123782e-06, "loss": 0.431, "step": 9821 }, { "epoch": 0.8, "grad_norm": 0.89463213892815, "learning_rate": 2.0596607360168897e-06, "loss": 0.5256, "step": 9822 }, { "epoch": 0.8, "grad_norm": 0.8925044512010746, "learning_rate": 2.058060801054429e-06, "loss": 0.4504, "step": 9823 }, { "epoch": 0.8, "grad_norm": 0.8702517722379945, "learning_rate": 2.0564614164613064e-06, "loss": 0.4574, "step": 9824 }, { "epoch": 0.8, "grad_norm": 0.9533218505721511, "learning_rate": 2.054862582348366e-06, "loss": 0.5221, "step": 9825 }, { "epoch": 0.8, "grad_norm": 0.9865782669574705, "learning_rate": 2.0532642988263994e-06, "loss": 0.5369, "step": 9826 }, { "epoch": 0.8, "grad_norm": 1.086952839581523, "learning_rate": 2.0516665660061675e-06, "loss": 0.5524, "step": 9827 }, { "epoch": 0.8, "grad_norm": 0.9065297806646161, "learning_rate": 2.050069383998393e-06, "loss": 0.4822, "step": 9828 }, { "epoch": 0.8, "grad_norm": 0.9407575239281395, "learning_rate": 2.0484727529137616e-06, "loss": 0.4421, "step": 9829 }, { "epoch": 0.8, "grad_norm": 1.0282959759015617, "learning_rate": 2.0468766728629084e-06, "loss": 0.5221, "step": 9830 }, { "epoch": 0.8, "grad_norm": 0.9230027591994061, "learning_rate": 2.045281143956455e-06, "loss": 0.4707, "step": 9831 }, { "epoch": 0.8, "grad_norm": 0.9706643303868061, "learning_rate": 2.0436861663049577e-06, "loss": 0.5287, "step": 9832 }, { "epoch": 0.8, "grad_norm": 0.8624615421512666, "learning_rate": 2.0420917400189532e-06, "loss": 0.4672, "step": 9833 }, { "epoch": 0.8, "grad_norm": 1.019489997515685, "learning_rate": 2.0404978652089325e-06, "loss": 0.5163, "step": 9834 }, { "epoch": 0.8, "grad_norm": 0.8595574720850923, "learning_rate": 2.0389045419853483e-06, "loss": 0.4102, "step": 9835 }, { "epoch": 0.8, "grad_norm": 1.0556470578827364, "learning_rate": 2.037311770458619e-06, "loss": 0.4998, "step": 9836 }, { "epoch": 0.8, "grad_norm": 0.9005746593140518, "learning_rate": 2.0357195507391237e-06, "loss": 0.4978, "step": 9837 }, { "epoch": 0.8, "grad_norm": 0.9848758102170952, "learning_rate": 2.034127882937197e-06, "loss": 0.5397, "step": 9838 }, { "epoch": 0.8, "grad_norm": 1.0766503341546003, "learning_rate": 2.032536767163141e-06, "loss": 0.5174, "step": 9839 }, { "epoch": 0.8, "grad_norm": 1.0545814409469874, "learning_rate": 2.0309462035272207e-06, "loss": 0.5166, "step": 9840 }, { "epoch": 0.8, "grad_norm": 0.9315275679597564, "learning_rate": 2.02935619213966e-06, "loss": 0.4328, "step": 9841 }, { "epoch": 0.8, "grad_norm": 0.9159068671624757, "learning_rate": 2.0277667331106456e-06, "loss": 0.4934, "step": 9842 }, { "epoch": 0.8, "grad_norm": 0.9956840495068574, "learning_rate": 2.026177826550326e-06, "loss": 0.5241, "step": 9843 }, { "epoch": 0.8, "grad_norm": 0.9763647677692302, "learning_rate": 2.0245894725688097e-06, "loss": 0.509, "step": 9844 }, { "epoch": 0.8, "grad_norm": 0.8663904354062106, "learning_rate": 2.023001671276168e-06, "loss": 0.476, "step": 9845 }, { "epoch": 0.8, "grad_norm": 0.9302822464015351, "learning_rate": 2.021414422782435e-06, "loss": 0.526, "step": 9846 }, { "epoch": 0.8, "grad_norm": 0.9151958944350562, "learning_rate": 2.019827727197605e-06, "loss": 0.4823, "step": 9847 }, { "epoch": 0.8, "grad_norm": 0.871270534613334, "learning_rate": 2.018241584631636e-06, "loss": 0.471, "step": 9848 }, { "epoch": 0.8, "grad_norm": 0.9485613903404995, "learning_rate": 2.0166559951944477e-06, "loss": 0.5482, "step": 9849 }, { "epoch": 0.8, "grad_norm": 1.0218535933745767, "learning_rate": 2.015070958995915e-06, "loss": 0.5702, "step": 9850 }, { "epoch": 0.8, "grad_norm": 0.9870914624477832, "learning_rate": 2.0134864761458815e-06, "loss": 0.5596, "step": 9851 }, { "epoch": 0.8, "grad_norm": 0.9066468027478812, "learning_rate": 2.011902546754152e-06, "loss": 0.4668, "step": 9852 }, { "epoch": 0.8, "grad_norm": 0.929133236447092, "learning_rate": 2.01031917093049e-06, "loss": 0.5044, "step": 9853 }, { "epoch": 0.8, "grad_norm": 0.9296472004568087, "learning_rate": 2.0087363487846236e-06, "loss": 0.464, "step": 9854 }, { "epoch": 0.8, "grad_norm": 0.8409541167939985, "learning_rate": 2.007154080426239e-06, "loss": 0.4338, "step": 9855 }, { "epoch": 0.8, "grad_norm": 0.8973893618611448, "learning_rate": 2.0055723659649907e-06, "loss": 0.4885, "step": 9856 }, { "epoch": 0.8, "grad_norm": 0.8864057016507616, "learning_rate": 2.0039912055104826e-06, "loss": 0.412, "step": 9857 }, { "epoch": 0.8, "grad_norm": 0.9103346867478691, "learning_rate": 2.002410599172292e-06, "loss": 0.4636, "step": 9858 }, { "epoch": 0.8, "grad_norm": 0.9040653111981451, "learning_rate": 2.0008305470599533e-06, "loss": 0.3976, "step": 9859 }, { "epoch": 0.8, "grad_norm": 0.9571850512409811, "learning_rate": 1.999251049282962e-06, "loss": 0.4924, "step": 9860 }, { "epoch": 0.8, "grad_norm": 0.9831040082324778, "learning_rate": 1.9976721059507766e-06, "loss": 0.5063, "step": 9861 }, { "epoch": 0.8, "grad_norm": 0.9674607752432008, "learning_rate": 1.996093717172819e-06, "loss": 0.4871, "step": 9862 }, { "epoch": 0.8, "grad_norm": 0.8735604631321405, "learning_rate": 1.994515883058464e-06, "loss": 0.443, "step": 9863 }, { "epoch": 0.8, "grad_norm": 0.8727484466187492, "learning_rate": 1.9929386037170574e-06, "loss": 0.4173, "step": 9864 }, { "epoch": 0.8, "grad_norm": 0.893569677723104, "learning_rate": 1.9913618792579037e-06, "loss": 0.4256, "step": 9865 }, { "epoch": 0.8, "grad_norm": 0.79604340010775, "learning_rate": 1.9897857097902683e-06, "loss": 0.4173, "step": 9866 }, { "epoch": 0.8, "grad_norm": 0.9552205687203998, "learning_rate": 1.9882100954233786e-06, "loss": 0.449, "step": 9867 }, { "epoch": 0.8, "grad_norm": 0.9992518118991707, "learning_rate": 1.9866350362664243e-06, "loss": 0.4373, "step": 9868 }, { "epoch": 0.8, "grad_norm": 0.9556895391027411, "learning_rate": 1.98506053242855e-06, "loss": 0.529, "step": 9869 }, { "epoch": 0.8, "grad_norm": 0.9899771953409056, "learning_rate": 1.9834865840188767e-06, "loss": 0.4856, "step": 9870 }, { "epoch": 0.8, "grad_norm": 0.9073343077121624, "learning_rate": 1.9819131911464682e-06, "loss": 0.4587, "step": 9871 }, { "epoch": 0.8, "grad_norm": 0.8637435483735759, "learning_rate": 1.9803403539203657e-06, "loss": 0.4847, "step": 9872 }, { "epoch": 0.8, "grad_norm": 0.8414267955167837, "learning_rate": 1.9787680724495617e-06, "loss": 0.4817, "step": 9873 }, { "epoch": 0.8, "grad_norm": 0.8750397831401274, "learning_rate": 1.977196346843019e-06, "loss": 0.4716, "step": 9874 }, { "epoch": 0.8, "grad_norm": 0.986056259393715, "learning_rate": 1.975625177209648e-06, "loss": 0.5231, "step": 9875 }, { "epoch": 0.8, "grad_norm": 0.9361734808532571, "learning_rate": 1.9740545636583397e-06, "loss": 0.5589, "step": 9876 }, { "epoch": 0.8, "grad_norm": 0.8785889300977264, "learning_rate": 1.9724845062979283e-06, "loss": 0.4577, "step": 9877 }, { "epoch": 0.8, "grad_norm": 0.9376677737805472, "learning_rate": 1.9709150052372206e-06, "loss": 0.4759, "step": 9878 }, { "epoch": 0.8, "grad_norm": 0.9065893288202759, "learning_rate": 1.96934606058498e-06, "loss": 0.5017, "step": 9879 }, { "epoch": 0.8, "grad_norm": 0.9532402399142695, "learning_rate": 1.9677776724499354e-06, "loss": 0.4666, "step": 9880 }, { "epoch": 0.8, "grad_norm": 0.956789992815111, "learning_rate": 1.9662098409407737e-06, "loss": 0.489, "step": 9881 }, { "epoch": 0.8, "grad_norm": 0.9697198209953076, "learning_rate": 1.964642566166146e-06, "loss": 0.4756, "step": 9882 }, { "epoch": 0.8, "grad_norm": 0.9394385645475665, "learning_rate": 1.963075848234659e-06, "loss": 0.4685, "step": 9883 }, { "epoch": 0.8, "grad_norm": 0.8343886173938521, "learning_rate": 1.9615096872548865e-06, "loss": 0.3947, "step": 9884 }, { "epoch": 0.8, "grad_norm": 0.8882866642715643, "learning_rate": 1.9599440833353624e-06, "loss": 0.468, "step": 9885 }, { "epoch": 0.8, "grad_norm": 0.9117902743086408, "learning_rate": 1.9583790365845823e-06, "loss": 0.5095, "step": 9886 }, { "epoch": 0.8, "grad_norm": 0.919139555747959, "learning_rate": 1.9568145471110024e-06, "loss": 0.4837, "step": 9887 }, { "epoch": 0.8, "grad_norm": 0.9313516499344854, "learning_rate": 1.955250615023042e-06, "loss": 0.4367, "step": 9888 }, { "epoch": 0.8, "grad_norm": 0.9586301700560018, "learning_rate": 1.953687240429073e-06, "loss": 0.5063, "step": 9889 }, { "epoch": 0.8, "grad_norm": 0.9736966037058312, "learning_rate": 1.952124423437447e-06, "loss": 0.5151, "step": 9890 }, { "epoch": 0.8, "grad_norm": 0.8183733956732533, "learning_rate": 1.9505621641564567e-06, "loss": 0.4253, "step": 9891 }, { "epoch": 0.8, "grad_norm": 0.9848703923333934, "learning_rate": 1.9490004626943693e-06, "loss": 0.5306, "step": 9892 }, { "epoch": 0.8, "grad_norm": 0.9515937598579888, "learning_rate": 1.9474393191594076e-06, "loss": 0.5488, "step": 9893 }, { "epoch": 0.8, "grad_norm": 0.9331171965430829, "learning_rate": 1.9458787336597617e-06, "loss": 0.5142, "step": 9894 }, { "epoch": 0.8, "grad_norm": 0.9951701520003015, "learning_rate": 1.9443187063035707e-06, "loss": 0.4915, "step": 9895 }, { "epoch": 0.8, "grad_norm": 1.0027301745746888, "learning_rate": 1.9427592371989533e-06, "loss": 0.4976, "step": 9896 }, { "epoch": 0.8, "grad_norm": 1.0164921417024477, "learning_rate": 1.94120032645397e-06, "loss": 0.4775, "step": 9897 }, { "epoch": 0.8, "grad_norm": 0.9411467082573, "learning_rate": 1.939641974176658e-06, "loss": 0.4446, "step": 9898 }, { "epoch": 0.8, "grad_norm": 0.9271568784067858, "learning_rate": 1.9380841804750063e-06, "loss": 0.4688, "step": 9899 }, { "epoch": 0.8, "grad_norm": 0.9318796970360436, "learning_rate": 1.9365269454569724e-06, "loss": 0.4144, "step": 9900 }, { "epoch": 0.8, "grad_norm": 0.9201807476325126, "learning_rate": 1.934970269230464e-06, "loss": 0.4805, "step": 9901 }, { "epoch": 0.8, "grad_norm": 0.9016938331848386, "learning_rate": 1.9334141519033676e-06, "loss": 0.4726, "step": 9902 }, { "epoch": 0.8, "grad_norm": 0.944460903658482, "learning_rate": 1.931858593583513e-06, "loss": 0.4517, "step": 9903 }, { "epoch": 0.8, "grad_norm": 0.9583119094345428, "learning_rate": 1.9303035943787017e-06, "loss": 0.4548, "step": 9904 }, { "epoch": 0.81, "grad_norm": 0.9361054748728511, "learning_rate": 1.928749154396693e-06, "loss": 0.4618, "step": 9905 }, { "epoch": 0.81, "grad_norm": 0.9730274138362711, "learning_rate": 1.9271952737452116e-06, "loss": 0.5136, "step": 9906 }, { "epoch": 0.81, "grad_norm": 0.9661630484610683, "learning_rate": 1.9256419525319316e-06, "loss": 0.5029, "step": 9907 }, { "epoch": 0.81, "grad_norm": 1.0001036297857349, "learning_rate": 1.9240891908645075e-06, "loss": 0.4867, "step": 9908 }, { "epoch": 0.81, "grad_norm": 0.928526629453305, "learning_rate": 1.9225369888505364e-06, "loss": 0.4901, "step": 9909 }, { "epoch": 0.81, "grad_norm": 0.9721509399919, "learning_rate": 1.920985346597588e-06, "loss": 0.469, "step": 9910 }, { "epoch": 0.81, "grad_norm": 0.9657540152337845, "learning_rate": 1.919434264213188e-06, "loss": 0.4857, "step": 9911 }, { "epoch": 0.81, "grad_norm": 0.8993769666472281, "learning_rate": 1.917883741804829e-06, "loss": 0.4626, "step": 9912 }, { "epoch": 0.81, "grad_norm": 0.9028837295832993, "learning_rate": 1.916333779479953e-06, "loss": 0.4623, "step": 9913 }, { "epoch": 0.81, "grad_norm": 0.9350439033146903, "learning_rate": 1.914784377345982e-06, "loss": 0.4892, "step": 9914 }, { "epoch": 0.81, "grad_norm": 0.9658679451666188, "learning_rate": 1.9132355355102772e-06, "loss": 0.5008, "step": 9915 }, { "epoch": 0.81, "grad_norm": 1.0958571373836872, "learning_rate": 1.911687254080179e-06, "loss": 0.5447, "step": 9916 }, { "epoch": 0.81, "grad_norm": 0.9567367250311967, "learning_rate": 1.910139533162978e-06, "loss": 0.5503, "step": 9917 }, { "epoch": 0.81, "grad_norm": 0.9283114470381654, "learning_rate": 1.908592372865935e-06, "loss": 0.4855, "step": 9918 }, { "epoch": 0.81, "grad_norm": 0.902042369443858, "learning_rate": 1.907045773296259e-06, "loss": 0.4982, "step": 9919 }, { "epoch": 0.81, "grad_norm": 0.904011334270541, "learning_rate": 1.905499734561137e-06, "loss": 0.4867, "step": 9920 }, { "epoch": 0.81, "grad_norm": 0.8789908542582429, "learning_rate": 1.9039542567677005e-06, "loss": 0.4767, "step": 9921 }, { "epoch": 0.81, "grad_norm": 0.9020209635226962, "learning_rate": 1.9024093400230537e-06, "loss": 0.4793, "step": 9922 }, { "epoch": 0.81, "grad_norm": 0.9024267922140833, "learning_rate": 1.9008649844342563e-06, "loss": 0.4655, "step": 9923 }, { "epoch": 0.81, "grad_norm": 0.8526771021123333, "learning_rate": 1.8993211901083353e-06, "loss": 0.4777, "step": 9924 }, { "epoch": 0.81, "grad_norm": 0.9247623057643407, "learning_rate": 1.8977779571522648e-06, "loss": 0.4666, "step": 9925 }, { "epoch": 0.81, "grad_norm": 0.904002456384038, "learning_rate": 1.8962352856729994e-06, "loss": 0.4571, "step": 9926 }, { "epoch": 0.81, "grad_norm": 0.9967168916175249, "learning_rate": 1.894693175777439e-06, "loss": 0.4864, "step": 9927 }, { "epoch": 0.81, "grad_norm": 0.9685133596114146, "learning_rate": 1.8931516275724527e-06, "loss": 0.4826, "step": 9928 }, { "epoch": 0.81, "grad_norm": 0.9063216564545956, "learning_rate": 1.8916106411648671e-06, "loss": 0.4927, "step": 9929 }, { "epoch": 0.81, "grad_norm": 0.9582577486773548, "learning_rate": 1.8900702166614748e-06, "loss": 0.5338, "step": 9930 }, { "epoch": 0.81, "grad_norm": 0.9567418434865032, "learning_rate": 1.888530354169017e-06, "loss": 0.5355, "step": 9931 }, { "epoch": 0.81, "grad_norm": 1.003573912510184, "learning_rate": 1.886991053794217e-06, "loss": 0.5383, "step": 9932 }, { "epoch": 0.81, "grad_norm": 0.9694975334450654, "learning_rate": 1.8854523156437378e-06, "loss": 0.484, "step": 9933 }, { "epoch": 0.81, "grad_norm": 1.098245385864126, "learning_rate": 1.8839141398242145e-06, "loss": 0.488, "step": 9934 }, { "epoch": 0.81, "grad_norm": 0.8761128268936527, "learning_rate": 1.8823765264422433e-06, "loss": 0.4723, "step": 9935 }, { "epoch": 0.81, "grad_norm": 0.9493800786184847, "learning_rate": 1.8808394756043813e-06, "loss": 0.4919, "step": 9936 }, { "epoch": 0.81, "grad_norm": 1.3088263147613775, "learning_rate": 1.8793029874171365e-06, "loss": 0.5155, "step": 9937 }, { "epoch": 0.81, "grad_norm": 0.9159160082187201, "learning_rate": 1.877767061986997e-06, "loss": 0.4776, "step": 9938 }, { "epoch": 0.81, "grad_norm": 0.9965361309784785, "learning_rate": 1.8762316994203933e-06, "loss": 0.5508, "step": 9939 }, { "epoch": 0.81, "grad_norm": 0.9365775609977088, "learning_rate": 1.874696899823727e-06, "loss": 0.4507, "step": 9940 }, { "epoch": 0.81, "grad_norm": 0.9352189676628262, "learning_rate": 1.8731626633033573e-06, "loss": 0.4177, "step": 9941 }, { "epoch": 0.81, "grad_norm": 1.001110662973336, "learning_rate": 1.8716289899656104e-06, "loss": 0.4951, "step": 9942 }, { "epoch": 0.81, "grad_norm": 0.9350494135836109, "learning_rate": 1.870095879916759e-06, "loss": 0.4889, "step": 9943 }, { "epoch": 0.81, "grad_norm": 0.897962644434671, "learning_rate": 1.868563333263057e-06, "loss": 0.4506, "step": 9944 }, { "epoch": 0.81, "grad_norm": 0.9040067942876323, "learning_rate": 1.8670313501107007e-06, "loss": 0.4252, "step": 9945 }, { "epoch": 0.81, "grad_norm": 0.9556567090230454, "learning_rate": 1.8654999305658584e-06, "loss": 0.4749, "step": 9946 }, { "epoch": 0.81, "grad_norm": 1.0508522995130318, "learning_rate": 1.863969074734655e-06, "loss": 0.5285, "step": 9947 }, { "epoch": 0.81, "grad_norm": 0.9940303655091299, "learning_rate": 1.8624387827231815e-06, "loss": 0.4676, "step": 9948 }, { "epoch": 0.81, "grad_norm": 1.0131111844858838, "learning_rate": 1.8609090546374764e-06, "loss": 0.4565, "step": 9949 }, { "epoch": 0.81, "grad_norm": 1.0139059658204395, "learning_rate": 1.8593798905835602e-06, "loss": 0.5247, "step": 9950 }, { "epoch": 0.81, "grad_norm": 1.0027110042791425, "learning_rate": 1.857851290667394e-06, "loss": 0.5166, "step": 9951 }, { "epoch": 0.81, "grad_norm": 0.9092026195676937, "learning_rate": 1.8563232549949107e-06, "loss": 0.4741, "step": 9952 }, { "epoch": 0.81, "grad_norm": 0.9812192496856625, "learning_rate": 1.8547957836720032e-06, "loss": 0.5114, "step": 9953 }, { "epoch": 0.81, "grad_norm": 0.9298750878339397, "learning_rate": 1.853268876804526e-06, "loss": 0.4892, "step": 9954 }, { "epoch": 0.81, "grad_norm": 0.9908936671001775, "learning_rate": 1.8517425344982831e-06, "loss": 0.4935, "step": 9955 }, { "epoch": 0.81, "grad_norm": 0.8566091038180487, "learning_rate": 1.8502167568590611e-06, "loss": 0.4603, "step": 9956 }, { "epoch": 0.81, "grad_norm": 0.857918949746223, "learning_rate": 1.8486915439925857e-06, "loss": 0.4569, "step": 9957 }, { "epoch": 0.81, "grad_norm": 0.9498102863681523, "learning_rate": 1.8471668960045575e-06, "loss": 0.4881, "step": 9958 }, { "epoch": 0.81, "grad_norm": 1.020434986705423, "learning_rate": 1.845642813000631e-06, "loss": 0.5073, "step": 9959 }, { "epoch": 0.81, "grad_norm": 0.8760589828007601, "learning_rate": 1.8441192950864273e-06, "loss": 0.4469, "step": 9960 }, { "epoch": 0.81, "grad_norm": 0.9536048276057314, "learning_rate": 1.8425963423675164e-06, "loss": 0.4702, "step": 9961 }, { "epoch": 0.81, "grad_norm": 0.9686770758998478, "learning_rate": 1.8410739549494494e-06, "loss": 0.5042, "step": 9962 }, { "epoch": 0.81, "grad_norm": 0.9767156015980288, "learning_rate": 1.8395521329377175e-06, "loss": 0.5231, "step": 9963 }, { "epoch": 0.81, "grad_norm": 0.8311337343768164, "learning_rate": 1.8380308764377841e-06, "loss": 0.4839, "step": 9964 }, { "epoch": 0.81, "grad_norm": 0.8986677240710658, "learning_rate": 1.8365101855550716e-06, "loss": 0.564, "step": 9965 }, { "epoch": 0.81, "grad_norm": 1.0756588962698206, "learning_rate": 1.8349900603949644e-06, "loss": 0.4841, "step": 9966 }, { "epoch": 0.81, "grad_norm": 0.8578985450159758, "learning_rate": 1.8334705010627996e-06, "loss": 0.4791, "step": 9967 }, { "epoch": 0.81, "grad_norm": 0.9483676107078279, "learning_rate": 1.8319515076638893e-06, "loss": 0.4844, "step": 9968 }, { "epoch": 0.81, "grad_norm": 0.8803822299638585, "learning_rate": 1.8304330803034932e-06, "loss": 0.4359, "step": 9969 }, { "epoch": 0.81, "grad_norm": 0.9990550895909772, "learning_rate": 1.8289152190868376e-06, "loss": 0.4617, "step": 9970 }, { "epoch": 0.81, "grad_norm": 1.0814653391866287, "learning_rate": 1.8273979241191087e-06, "loss": 0.5006, "step": 9971 }, { "epoch": 0.81, "grad_norm": 0.8372147293790334, "learning_rate": 1.8258811955054578e-06, "loss": 0.4535, "step": 9972 }, { "epoch": 0.81, "grad_norm": 0.90090598405615, "learning_rate": 1.8243650333509854e-06, "loss": 0.4859, "step": 9973 }, { "epoch": 0.81, "grad_norm": 0.8238375354825378, "learning_rate": 1.8228494377607686e-06, "loss": 0.3835, "step": 9974 }, { "epoch": 0.81, "grad_norm": 0.9275498090759045, "learning_rate": 1.82133440883983e-06, "loss": 0.4434, "step": 9975 }, { "epoch": 0.81, "grad_norm": 0.9679147524700789, "learning_rate": 1.819819946693162e-06, "loss": 0.4931, "step": 9976 }, { "epoch": 0.81, "grad_norm": 0.9938967763690725, "learning_rate": 1.8183060514257167e-06, "loss": 0.506, "step": 9977 }, { "epoch": 0.81, "grad_norm": 1.0073247438748036, "learning_rate": 1.8167927231424077e-06, "loss": 0.492, "step": 9978 }, { "epoch": 0.81, "grad_norm": 0.9441319716629862, "learning_rate": 1.8152799619480986e-06, "loss": 0.4797, "step": 9979 }, { "epoch": 0.81, "grad_norm": 0.8386503280748313, "learning_rate": 1.813767767947634e-06, "loss": 0.446, "step": 9980 }, { "epoch": 0.81, "grad_norm": 0.9339502064681194, "learning_rate": 1.8122561412457984e-06, "loss": 0.4854, "step": 9981 }, { "epoch": 0.81, "grad_norm": 0.9193184258857796, "learning_rate": 1.8107450819473505e-06, "loss": 0.4928, "step": 9982 }, { "epoch": 0.81, "grad_norm": 0.8856714395591396, "learning_rate": 1.8092345901570053e-06, "loss": 0.4528, "step": 9983 }, { "epoch": 0.81, "grad_norm": 0.9024058082729695, "learning_rate": 1.8077246659794368e-06, "loss": 0.4922, "step": 9984 }, { "epoch": 0.81, "grad_norm": 0.9992991290855968, "learning_rate": 1.8062153095192826e-06, "loss": 0.4805, "step": 9985 }, { "epoch": 0.81, "grad_norm": 0.9208122168752765, "learning_rate": 1.8047065208811421e-06, "loss": 0.5148, "step": 9986 }, { "epoch": 0.81, "grad_norm": 0.8837422771578278, "learning_rate": 1.8031983001695674e-06, "loss": 0.4828, "step": 9987 }, { "epoch": 0.81, "grad_norm": 0.9831378843029416, "learning_rate": 1.8016906474890805e-06, "loss": 0.5306, "step": 9988 }, { "epoch": 0.81, "grad_norm": 0.9996761669814016, "learning_rate": 1.80018356294416e-06, "loss": 0.4487, "step": 9989 }, { "epoch": 0.81, "grad_norm": 0.9537374004354636, "learning_rate": 1.7986770466392445e-06, "loss": 0.5528, "step": 9990 }, { "epoch": 0.81, "grad_norm": 0.9596722342287717, "learning_rate": 1.797171098678736e-06, "loss": 0.486, "step": 9991 }, { "epoch": 0.81, "grad_norm": 1.0236408628013114, "learning_rate": 1.7956657191669969e-06, "loss": 0.5186, "step": 9992 }, { "epoch": 0.81, "grad_norm": 0.8524064003382771, "learning_rate": 1.7941609082083434e-06, "loss": 0.4421, "step": 9993 }, { "epoch": 0.81, "grad_norm": 0.8894376608304295, "learning_rate": 1.792656665907061e-06, "loss": 0.4256, "step": 9994 }, { "epoch": 0.81, "grad_norm": 0.8876867329730918, "learning_rate": 1.7911529923673908e-06, "loss": 0.4742, "step": 9995 }, { "epoch": 0.81, "grad_norm": 1.0183024170797783, "learning_rate": 1.7896498876935374e-06, "loss": 0.5163, "step": 9996 }, { "epoch": 0.81, "grad_norm": 0.9581560806374572, "learning_rate": 1.7881473519896642e-06, "loss": 0.4665, "step": 9997 }, { "epoch": 0.81, "grad_norm": 0.8977743384520495, "learning_rate": 1.7866453853598985e-06, "loss": 0.497, "step": 9998 }, { "epoch": 0.81, "grad_norm": 0.967090112841617, "learning_rate": 1.7851439879083188e-06, "loss": 0.4658, "step": 9999 }, { "epoch": 0.81, "grad_norm": 0.9733260797622603, "learning_rate": 1.7836431597389758e-06, "loss": 0.5535, "step": 10000 }, { "epoch": 0.81, "grad_norm": 0.9988413406779859, "learning_rate": 1.7821429009558723e-06, "loss": 0.5021, "step": 10001 }, { "epoch": 0.81, "grad_norm": 0.9240963254362822, "learning_rate": 1.7806432116629779e-06, "loss": 0.4725, "step": 10002 }, { "epoch": 0.81, "grad_norm": 0.9397626741846741, "learning_rate": 1.7791440919642178e-06, "loss": 0.5162, "step": 10003 }, { "epoch": 0.81, "grad_norm": 1.0066418221971942, "learning_rate": 1.7776455419634797e-06, "loss": 0.4792, "step": 10004 }, { "epoch": 0.81, "grad_norm": 1.007951156924576, "learning_rate": 1.776147561764613e-06, "loss": 0.4439, "step": 10005 }, { "epoch": 0.81, "grad_norm": 0.900720562872108, "learning_rate": 1.7746501514714277e-06, "loss": 0.4627, "step": 10006 }, { "epoch": 0.81, "grad_norm": 0.8383180580559433, "learning_rate": 1.7731533111876887e-06, "loss": 0.4472, "step": 10007 }, { "epoch": 0.81, "grad_norm": 0.922154067229698, "learning_rate": 1.7716570410171285e-06, "loss": 0.4728, "step": 10008 }, { "epoch": 0.81, "grad_norm": 0.9071711336797791, "learning_rate": 1.7701613410634367e-06, "loss": 0.4782, "step": 10009 }, { "epoch": 0.81, "grad_norm": 0.9574378588931637, "learning_rate": 1.7686662114302633e-06, "loss": 0.4943, "step": 10010 }, { "epoch": 0.81, "grad_norm": 0.974259591928751, "learning_rate": 1.7671716522212212e-06, "loss": 0.4604, "step": 10011 }, { "epoch": 0.81, "grad_norm": 0.8729156997285382, "learning_rate": 1.7656776635398832e-06, "loss": 0.4835, "step": 10012 }, { "epoch": 0.81, "grad_norm": 0.9662586084214412, "learning_rate": 1.7641842454897772e-06, "loss": 0.472, "step": 10013 }, { "epoch": 0.81, "grad_norm": 0.8912794524180124, "learning_rate": 1.7626913981743975e-06, "loss": 0.4739, "step": 10014 }, { "epoch": 0.81, "grad_norm": 0.9871749963458629, "learning_rate": 1.761199121697197e-06, "loss": 0.4227, "step": 10015 }, { "epoch": 0.81, "grad_norm": 0.9448824397611246, "learning_rate": 1.75970741616159e-06, "loss": 0.5074, "step": 10016 }, { "epoch": 0.81, "grad_norm": 0.9480742488796361, "learning_rate": 1.7582162816709503e-06, "loss": 0.451, "step": 10017 }, { "epoch": 0.81, "grad_norm": 1.090097064319148, "learning_rate": 1.7567257183286113e-06, "loss": 0.5197, "step": 10018 }, { "epoch": 0.81, "grad_norm": 0.9388593712796945, "learning_rate": 1.7552357262378705e-06, "loss": 0.4594, "step": 10019 }, { "epoch": 0.81, "grad_norm": 0.9233057993892104, "learning_rate": 1.7537463055019788e-06, "loss": 0.4664, "step": 10020 }, { "epoch": 0.81, "grad_norm": 0.9478652667485742, "learning_rate": 1.7522574562241535e-06, "loss": 0.4949, "step": 10021 }, { "epoch": 0.81, "grad_norm": 0.880962159646419, "learning_rate": 1.750769178507571e-06, "loss": 0.5246, "step": 10022 }, { "epoch": 0.81, "grad_norm": 1.0028321411856618, "learning_rate": 1.7492814724553664e-06, "loss": 0.4973, "step": 10023 }, { "epoch": 0.81, "grad_norm": 0.9466321446941334, "learning_rate": 1.7477943381706386e-06, "loss": 0.4993, "step": 10024 }, { "epoch": 0.81, "grad_norm": 0.9446176667725231, "learning_rate": 1.7463077757564452e-06, "loss": 0.4882, "step": 10025 }, { "epoch": 0.81, "grad_norm": 0.8584036715987096, "learning_rate": 1.7448217853158e-06, "loss": 0.4834, "step": 10026 }, { "epoch": 0.81, "grad_norm": 0.9167014228298731, "learning_rate": 1.7433363669516823e-06, "loss": 0.4908, "step": 10027 }, { "epoch": 0.82, "grad_norm": 0.8828091074105395, "learning_rate": 1.7418515207670306e-06, "loss": 0.4757, "step": 10028 }, { "epoch": 0.82, "grad_norm": 0.9526950744639585, "learning_rate": 1.7403672468647436e-06, "loss": 0.5107, "step": 10029 }, { "epoch": 0.82, "grad_norm": 0.949876811338634, "learning_rate": 1.7388835453476805e-06, "loss": 0.5153, "step": 10030 }, { "epoch": 0.82, "grad_norm": 0.8093822357593842, "learning_rate": 1.737400416318663e-06, "loss": 0.4581, "step": 10031 }, { "epoch": 0.82, "grad_norm": 0.9160558834764048, "learning_rate": 1.7359178598804637e-06, "loss": 0.5068, "step": 10032 }, { "epoch": 0.82, "grad_norm": 0.9326020932997082, "learning_rate": 1.7344358761358283e-06, "loss": 0.5048, "step": 10033 }, { "epoch": 0.82, "grad_norm": 0.9103459982568537, "learning_rate": 1.7329544651874542e-06, "loss": 0.511, "step": 10034 }, { "epoch": 0.82, "grad_norm": 1.0039486942702247, "learning_rate": 1.7314736271380029e-06, "loss": 0.4903, "step": 10035 }, { "epoch": 0.82, "grad_norm": 0.9825062782601638, "learning_rate": 1.7299933620900945e-06, "loss": 0.5486, "step": 10036 }, { "epoch": 0.82, "grad_norm": 0.9997925465861273, "learning_rate": 1.7285136701463134e-06, "loss": 0.4759, "step": 10037 }, { "epoch": 0.82, "grad_norm": 0.9244469095884913, "learning_rate": 1.7270345514091936e-06, "loss": 0.4788, "step": 10038 }, { "epoch": 0.82, "grad_norm": 1.033859739615693, "learning_rate": 1.725556005981246e-06, "loss": 0.5205, "step": 10039 }, { "epoch": 0.82, "grad_norm": 1.0181678912579275, "learning_rate": 1.7240780339649255e-06, "loss": 0.5217, "step": 10040 }, { "epoch": 0.82, "grad_norm": 0.9828476813834534, "learning_rate": 1.7226006354626567e-06, "loss": 0.5503, "step": 10041 }, { "epoch": 0.82, "grad_norm": 0.9345978924819633, "learning_rate": 1.7211238105768213e-06, "loss": 0.5011, "step": 10042 }, { "epoch": 0.82, "grad_norm": 0.9078361413963695, "learning_rate": 1.719647559409765e-06, "loss": 0.4669, "step": 10043 }, { "epoch": 0.82, "grad_norm": 0.9774706241629969, "learning_rate": 1.7181718820637839e-06, "loss": 0.4891, "step": 10044 }, { "epoch": 0.82, "grad_norm": 0.8467688352719807, "learning_rate": 1.7166967786411493e-06, "loss": 0.456, "step": 10045 }, { "epoch": 0.82, "grad_norm": 0.9516124394986565, "learning_rate": 1.7152222492440796e-06, "loss": 0.444, "step": 10046 }, { "epoch": 0.82, "grad_norm": 0.99074343373286, "learning_rate": 1.7137482939747574e-06, "loss": 0.5475, "step": 10047 }, { "epoch": 0.82, "grad_norm": 0.9347063175331168, "learning_rate": 1.71227491293533e-06, "loss": 0.4965, "step": 10048 }, { "epoch": 0.82, "grad_norm": 0.9351374387744484, "learning_rate": 1.7108021062279023e-06, "loss": 0.5165, "step": 10049 }, { "epoch": 0.82, "grad_norm": 0.8743820993543441, "learning_rate": 1.7093298739545305e-06, "loss": 0.4837, "step": 10050 }, { "epoch": 0.82, "grad_norm": 0.964425055640185, "learning_rate": 1.7078582162172509e-06, "loss": 0.4377, "step": 10051 }, { "epoch": 0.82, "grad_norm": 0.993934678759803, "learning_rate": 1.7063871331180382e-06, "loss": 0.4785, "step": 10052 }, { "epoch": 0.82, "grad_norm": 0.9542850256323199, "learning_rate": 1.704916624758841e-06, "loss": 0.5095, "step": 10053 }, { "epoch": 0.82, "grad_norm": 1.003853157296499, "learning_rate": 1.7034466912415638e-06, "loss": 0.4687, "step": 10054 }, { "epoch": 0.82, "grad_norm": 0.9517152267431251, "learning_rate": 1.7019773326680745e-06, "loss": 0.5356, "step": 10055 }, { "epoch": 0.82, "grad_norm": 0.895878308211716, "learning_rate": 1.7005085491401908e-06, "loss": 0.4277, "step": 10056 }, { "epoch": 0.82, "grad_norm": 0.9082358180531004, "learning_rate": 1.6990403407597078e-06, "loss": 0.4525, "step": 10057 }, { "epoch": 0.82, "grad_norm": 0.9908955989458099, "learning_rate": 1.6975727076283642e-06, "loss": 0.5181, "step": 10058 }, { "epoch": 0.82, "grad_norm": 0.8707024973610689, "learning_rate": 1.6961056498478666e-06, "loss": 0.4356, "step": 10059 }, { "epoch": 0.82, "grad_norm": 0.9765181578225404, "learning_rate": 1.6946391675198838e-06, "loss": 0.5583, "step": 10060 }, { "epoch": 0.82, "grad_norm": 0.958357904820011, "learning_rate": 1.6931732607460405e-06, "loss": 0.52, "step": 10061 }, { "epoch": 0.82, "grad_norm": 1.0838303700246354, "learning_rate": 1.6917079296279181e-06, "loss": 0.5312, "step": 10062 }, { "epoch": 0.82, "grad_norm": 0.7970114790011827, "learning_rate": 1.690243174267071e-06, "loss": 0.4548, "step": 10063 }, { "epoch": 0.82, "grad_norm": 0.9075527996205083, "learning_rate": 1.6887789947649991e-06, "loss": 0.4227, "step": 10064 }, { "epoch": 0.82, "grad_norm": 0.9414766364746932, "learning_rate": 1.687315391223171e-06, "loss": 0.4893, "step": 10065 }, { "epoch": 0.82, "grad_norm": 0.9237635388169918, "learning_rate": 1.6858523637430136e-06, "loss": 0.4995, "step": 10066 }, { "epoch": 0.82, "grad_norm": 0.9143611340266095, "learning_rate": 1.6843899124259133e-06, "loss": 0.4894, "step": 10067 }, { "epoch": 0.82, "grad_norm": 0.9646461705809476, "learning_rate": 1.6829280373732126e-06, "loss": 0.4884, "step": 10068 }, { "epoch": 0.82, "grad_norm": 0.9470992263257096, "learning_rate": 1.681466738686227e-06, "loss": 0.5024, "step": 10069 }, { "epoch": 0.82, "grad_norm": 0.9202676930536429, "learning_rate": 1.6800060164662146e-06, "loss": 0.4791, "step": 10070 }, { "epoch": 0.82, "grad_norm": 0.9064573173443682, "learning_rate": 1.6785458708144053e-06, "loss": 0.517, "step": 10071 }, { "epoch": 0.82, "grad_norm": 0.9259280196769716, "learning_rate": 1.677086301831986e-06, "loss": 0.511, "step": 10072 }, { "epoch": 0.82, "grad_norm": 0.9364034530429901, "learning_rate": 1.675627309620107e-06, "loss": 0.5445, "step": 10073 }, { "epoch": 0.82, "grad_norm": 0.8440682150423323, "learning_rate": 1.6741688942798663e-06, "loss": 0.4847, "step": 10074 }, { "epoch": 0.82, "grad_norm": 0.94140642798752, "learning_rate": 1.6727110559123405e-06, "loss": 0.4846, "step": 10075 }, { "epoch": 0.82, "grad_norm": 0.9448124346191853, "learning_rate": 1.6712537946185503e-06, "loss": 0.4815, "step": 10076 }, { "epoch": 0.82, "grad_norm": 0.9390309876950016, "learning_rate": 1.6697971104994847e-06, "loss": 0.4681, "step": 10077 }, { "epoch": 0.82, "grad_norm": 1.5830720212508986, "learning_rate": 1.6683410036560899e-06, "loss": 0.4805, "step": 10078 }, { "epoch": 0.82, "grad_norm": 0.9370005137394446, "learning_rate": 1.666885474189276e-06, "loss": 0.463, "step": 10079 }, { "epoch": 0.82, "grad_norm": 0.8736685269958002, "learning_rate": 1.6654305221999035e-06, "loss": 0.465, "step": 10080 }, { "epoch": 0.82, "grad_norm": 1.006200032644295, "learning_rate": 1.663976147788806e-06, "loss": 0.4743, "step": 10081 }, { "epoch": 0.82, "grad_norm": 1.0545813247664437, "learning_rate": 1.6625223510567667e-06, "loss": 0.5324, "step": 10082 }, { "epoch": 0.82, "grad_norm": 1.0119780993174157, "learning_rate": 1.6610691321045325e-06, "loss": 0.5058, "step": 10083 }, { "epoch": 0.82, "grad_norm": 0.9609372264458037, "learning_rate": 1.6596164910328106e-06, "loss": 0.5189, "step": 10084 }, { "epoch": 0.82, "grad_norm": 0.937127977991446, "learning_rate": 1.6581644279422705e-06, "loss": 0.4489, "step": 10085 }, { "epoch": 0.82, "grad_norm": 0.9913881125396772, "learning_rate": 1.6567129429335316e-06, "loss": 0.4661, "step": 10086 }, { "epoch": 0.82, "grad_norm": 0.9612288385089394, "learning_rate": 1.6552620361071903e-06, "loss": 0.432, "step": 10087 }, { "epoch": 0.82, "grad_norm": 0.895201885032304, "learning_rate": 1.6538117075637849e-06, "loss": 0.4727, "step": 10088 }, { "epoch": 0.82, "grad_norm": 0.8722790693420445, "learning_rate": 1.6523619574038264e-06, "loss": 0.4433, "step": 10089 }, { "epoch": 0.82, "grad_norm": 0.8694586279318163, "learning_rate": 1.6509127857277784e-06, "loss": 0.4481, "step": 10090 }, { "epoch": 0.82, "grad_norm": 0.9235188229616803, "learning_rate": 1.6494641926360722e-06, "loss": 0.4391, "step": 10091 }, { "epoch": 0.82, "grad_norm": 1.035054049678654, "learning_rate": 1.6480161782290849e-06, "loss": 0.5072, "step": 10092 }, { "epoch": 0.82, "grad_norm": 0.9909914510220467, "learning_rate": 1.6465687426071741e-06, "loss": 0.5084, "step": 10093 }, { "epoch": 0.82, "grad_norm": 0.9044075400050785, "learning_rate": 1.6451218858706374e-06, "loss": 0.4573, "step": 10094 }, { "epoch": 0.82, "grad_norm": 0.9300078914155345, "learning_rate": 1.6436756081197426e-06, "loss": 0.5399, "step": 10095 }, { "epoch": 0.82, "grad_norm": 0.9039526767115124, "learning_rate": 1.6422299094547156e-06, "loss": 0.4677, "step": 10096 }, { "epoch": 0.82, "grad_norm": 0.9784978778245498, "learning_rate": 1.6407847899757468e-06, "loss": 0.4883, "step": 10097 }, { "epoch": 0.82, "grad_norm": 1.0152703015299072, "learning_rate": 1.6393402497829713e-06, "loss": 0.5826, "step": 10098 }, { "epoch": 0.82, "grad_norm": 0.9145163251531081, "learning_rate": 1.6378962889765048e-06, "loss": 0.4215, "step": 10099 }, { "epoch": 0.82, "grad_norm": 0.8961781171614732, "learning_rate": 1.6364529076564072e-06, "loss": 0.4545, "step": 10100 }, { "epoch": 0.82, "grad_norm": 0.9460249075728607, "learning_rate": 1.635010105922704e-06, "loss": 0.5136, "step": 10101 }, { "epoch": 0.82, "grad_norm": 0.9431549507296044, "learning_rate": 1.633567883875381e-06, "loss": 0.4878, "step": 10102 }, { "epoch": 0.82, "grad_norm": 0.966938572007313, "learning_rate": 1.6321262416143856e-06, "loss": 0.555, "step": 10103 }, { "epoch": 0.82, "grad_norm": 0.9395240377305119, "learning_rate": 1.6306851792396138e-06, "loss": 0.4301, "step": 10104 }, { "epoch": 0.82, "grad_norm": 0.9624464563853393, "learning_rate": 1.62924469685094e-06, "loss": 0.52, "step": 10105 }, { "epoch": 0.82, "grad_norm": 0.8621026113911863, "learning_rate": 1.6278047945481823e-06, "loss": 0.4814, "step": 10106 }, { "epoch": 0.82, "grad_norm": 0.8405037196377936, "learning_rate": 1.626365472431125e-06, "loss": 0.4594, "step": 10107 }, { "epoch": 0.82, "grad_norm": 0.9767441682840015, "learning_rate": 1.6249267305995141e-06, "loss": 0.496, "step": 10108 }, { "epoch": 0.82, "grad_norm": 0.8078450200011851, "learning_rate": 1.6234885691530543e-06, "loss": 0.4337, "step": 10109 }, { "epoch": 0.82, "grad_norm": 0.9512121505034593, "learning_rate": 1.6220509881914015e-06, "loss": 0.489, "step": 10110 }, { "epoch": 0.82, "grad_norm": 0.9531062549342812, "learning_rate": 1.620613987814189e-06, "loss": 0.5192, "step": 10111 }, { "epoch": 0.82, "grad_norm": 0.9742192433408755, "learning_rate": 1.6191775681209932e-06, "loss": 0.4612, "step": 10112 }, { "epoch": 0.82, "grad_norm": 0.922332170634465, "learning_rate": 1.6177417292113572e-06, "loss": 0.4994, "step": 10113 }, { "epoch": 0.82, "grad_norm": 0.8630218836631168, "learning_rate": 1.6163064711847842e-06, "loss": 0.4881, "step": 10114 }, { "epoch": 0.82, "grad_norm": 1.0168371457320893, "learning_rate": 1.6148717941407387e-06, "loss": 0.4818, "step": 10115 }, { "epoch": 0.82, "grad_norm": 1.0180019521693955, "learning_rate": 1.613437698178636e-06, "loss": 0.4638, "step": 10116 }, { "epoch": 0.82, "grad_norm": 0.9034584910700775, "learning_rate": 1.6120041833978662e-06, "loss": 0.4181, "step": 10117 }, { "epoch": 0.82, "grad_norm": 1.026478894740036, "learning_rate": 1.6105712498977644e-06, "loss": 0.5086, "step": 10118 }, { "epoch": 0.82, "grad_norm": 0.9523384413831647, "learning_rate": 1.6091388977776334e-06, "loss": 0.4532, "step": 10119 }, { "epoch": 0.82, "grad_norm": 0.8911241632838921, "learning_rate": 1.607707127136734e-06, "loss": 0.4532, "step": 10120 }, { "epoch": 0.82, "grad_norm": 0.9356572273897498, "learning_rate": 1.6062759380742898e-06, "loss": 0.4329, "step": 10121 }, { "epoch": 0.82, "grad_norm": 0.9142388310791838, "learning_rate": 1.6048453306894719e-06, "loss": 0.4623, "step": 10122 }, { "epoch": 0.82, "grad_norm": 0.8648947239779669, "learning_rate": 1.6034153050814315e-06, "loss": 0.4531, "step": 10123 }, { "epoch": 0.82, "grad_norm": 0.9380175538325769, "learning_rate": 1.601985861349261e-06, "loss": 0.5184, "step": 10124 }, { "epoch": 0.82, "grad_norm": 0.9619274961680155, "learning_rate": 1.60055699959202e-06, "loss": 0.5017, "step": 10125 }, { "epoch": 0.82, "grad_norm": 1.0598349113443915, "learning_rate": 1.599128719908729e-06, "loss": 0.5305, "step": 10126 }, { "epoch": 0.82, "grad_norm": 1.0604631061633714, "learning_rate": 1.5977010223983692e-06, "loss": 0.5205, "step": 10127 }, { "epoch": 0.82, "grad_norm": 0.932318399473858, "learning_rate": 1.5962739071598709e-06, "loss": 0.5136, "step": 10128 }, { "epoch": 0.82, "grad_norm": 1.0945709533633685, "learning_rate": 1.594847374292141e-06, "loss": 0.4599, "step": 10129 }, { "epoch": 0.82, "grad_norm": 0.9370398617218378, "learning_rate": 1.5934214238940282e-06, "loss": 0.4661, "step": 10130 }, { "epoch": 0.82, "grad_norm": 0.9230957099531405, "learning_rate": 1.5919960560643589e-06, "loss": 0.4422, "step": 10131 }, { "epoch": 0.82, "grad_norm": 0.9601615361009249, "learning_rate": 1.5905712709019017e-06, "loss": 0.5276, "step": 10132 }, { "epoch": 0.82, "grad_norm": 0.8953985465198216, "learning_rate": 1.589147068505398e-06, "loss": 0.4196, "step": 10133 }, { "epoch": 0.82, "grad_norm": 0.8861250406008324, "learning_rate": 1.5877234489735405e-06, "loss": 0.4784, "step": 10134 }, { "epoch": 0.82, "grad_norm": 0.8392301724786144, "learning_rate": 1.5863004124049897e-06, "loss": 0.4503, "step": 10135 }, { "epoch": 0.82, "grad_norm": 0.8732452868340083, "learning_rate": 1.584877958898352e-06, "loss": 0.476, "step": 10136 }, { "epoch": 0.82, "grad_norm": 1.0485661019215877, "learning_rate": 1.583456088552212e-06, "loss": 0.4975, "step": 10137 }, { "epoch": 0.82, "grad_norm": 0.9059777099372927, "learning_rate": 1.5820348014650977e-06, "loss": 0.5057, "step": 10138 }, { "epoch": 0.82, "grad_norm": 0.975352945782583, "learning_rate": 1.5806140977355056e-06, "loss": 0.5405, "step": 10139 }, { "epoch": 0.82, "grad_norm": 0.9568187340392101, "learning_rate": 1.579193977461887e-06, "loss": 0.4768, "step": 10140 }, { "epoch": 0.82, "grad_norm": 0.8368006214403227, "learning_rate": 1.5777744407426598e-06, "loss": 0.4269, "step": 10141 }, { "epoch": 0.82, "grad_norm": 0.9323506413597026, "learning_rate": 1.5763554876761888e-06, "loss": 0.4785, "step": 10142 }, { "epoch": 0.82, "grad_norm": 1.0340089929500242, "learning_rate": 1.5749371183608154e-06, "loss": 0.4363, "step": 10143 }, { "epoch": 0.82, "grad_norm": 0.8927867883701817, "learning_rate": 1.573519332894824e-06, "loss": 0.4133, "step": 10144 }, { "epoch": 0.82, "grad_norm": 0.8767356770638337, "learning_rate": 1.5721021313764684e-06, "loss": 0.5238, "step": 10145 }, { "epoch": 0.82, "grad_norm": 0.9740716993726325, "learning_rate": 1.57068551390396e-06, "loss": 0.5013, "step": 10146 }, { "epoch": 0.82, "grad_norm": 0.8677354945105021, "learning_rate": 1.5692694805754716e-06, "loss": 0.4156, "step": 10147 }, { "epoch": 0.82, "grad_norm": 1.031162555983234, "learning_rate": 1.5678540314891243e-06, "loss": 0.5158, "step": 10148 }, { "epoch": 0.82, "grad_norm": 0.9945018198037907, "learning_rate": 1.566439166743019e-06, "loss": 0.5158, "step": 10149 }, { "epoch": 0.82, "grad_norm": 1.0414349625809685, "learning_rate": 1.5650248864351957e-06, "loss": 0.5062, "step": 10150 }, { "epoch": 0.83, "grad_norm": 0.9229593872417223, "learning_rate": 1.5636111906636665e-06, "loss": 0.4811, "step": 10151 }, { "epoch": 0.83, "grad_norm": 0.9315096534678732, "learning_rate": 1.5621980795263981e-06, "loss": 0.4762, "step": 10152 }, { "epoch": 0.83, "grad_norm": 0.9962588003282389, "learning_rate": 1.560785553121319e-06, "loss": 0.5632, "step": 10153 }, { "epoch": 0.83, "grad_norm": 1.001513498736759, "learning_rate": 1.5593736115463154e-06, "loss": 0.4853, "step": 10154 }, { "epoch": 0.83, "grad_norm": 0.9768955139754243, "learning_rate": 1.5579622548992356e-06, "loss": 0.4419, "step": 10155 }, { "epoch": 0.83, "grad_norm": 0.8768967199113278, "learning_rate": 1.5565514832778816e-06, "loss": 0.4644, "step": 10156 }, { "epoch": 0.83, "grad_norm": 0.8558221097469616, "learning_rate": 1.5551412967800206e-06, "loss": 0.4429, "step": 10157 }, { "epoch": 0.83, "grad_norm": 0.881466729197389, "learning_rate": 1.5537316955033766e-06, "loss": 0.432, "step": 10158 }, { "epoch": 0.83, "grad_norm": 1.051284596756194, "learning_rate": 1.5523226795456349e-06, "loss": 0.5281, "step": 10159 }, { "epoch": 0.83, "grad_norm": 0.9155417122416348, "learning_rate": 1.5509142490044382e-06, "loss": 0.4846, "step": 10160 }, { "epoch": 0.83, "grad_norm": 1.0411739639118625, "learning_rate": 1.5495064039773921e-06, "loss": 0.5852, "step": 10161 }, { "epoch": 0.83, "grad_norm": 0.8887895632464202, "learning_rate": 1.5480991445620541e-06, "loss": 0.5016, "step": 10162 }, { "epoch": 0.83, "grad_norm": 0.9360948530755474, "learning_rate": 1.5466924708559483e-06, "loss": 0.4885, "step": 10163 }, { "epoch": 0.83, "grad_norm": 0.945720706242638, "learning_rate": 1.5452863829565568e-06, "loss": 0.5147, "step": 10164 }, { "epoch": 0.83, "grad_norm": 0.9668172070220067, "learning_rate": 1.5438808809613193e-06, "loss": 0.5072, "step": 10165 }, { "epoch": 0.83, "grad_norm": 0.8863857853165308, "learning_rate": 1.5424759649676357e-06, "loss": 0.5039, "step": 10166 }, { "epoch": 0.83, "grad_norm": 0.9821899836139799, "learning_rate": 1.5410716350728671e-06, "loss": 0.5183, "step": 10167 }, { "epoch": 0.83, "grad_norm": 0.869172883665788, "learning_rate": 1.5396678913743324e-06, "loss": 0.4712, "step": 10168 }, { "epoch": 0.83, "grad_norm": 0.8392357457508518, "learning_rate": 1.5382647339693068e-06, "loss": 0.4401, "step": 10169 }, { "epoch": 0.83, "grad_norm": 0.887219175964945, "learning_rate": 1.5368621629550295e-06, "loss": 0.4847, "step": 10170 }, { "epoch": 0.83, "grad_norm": 0.8524345364458238, "learning_rate": 1.535460178428697e-06, "loss": 0.4431, "step": 10171 }, { "epoch": 0.83, "grad_norm": 0.9630643844671861, "learning_rate": 1.5340587804874662e-06, "loss": 0.4392, "step": 10172 }, { "epoch": 0.83, "grad_norm": 0.9904964289364382, "learning_rate": 1.5326579692284537e-06, "loss": 0.4567, "step": 10173 }, { "epoch": 0.83, "grad_norm": 0.9282806035313729, "learning_rate": 1.5312577447487342e-06, "loss": 0.5784, "step": 10174 }, { "epoch": 0.83, "grad_norm": 1.0558418299723358, "learning_rate": 1.52985810714534e-06, "loss": 0.4927, "step": 10175 }, { "epoch": 0.83, "grad_norm": 1.1063856478240854, "learning_rate": 1.5284590565152658e-06, "loss": 0.4835, "step": 10176 }, { "epoch": 0.83, "grad_norm": 0.9117316773353915, "learning_rate": 1.527060592955464e-06, "loss": 0.5542, "step": 10177 }, { "epoch": 0.83, "grad_norm": 0.8713169752157789, "learning_rate": 1.525662716562849e-06, "loss": 0.4683, "step": 10178 }, { "epoch": 0.83, "grad_norm": 0.9073438743697106, "learning_rate": 1.5242654274342895e-06, "loss": 0.4708, "step": 10179 }, { "epoch": 0.83, "grad_norm": 0.8966897278534155, "learning_rate": 1.5228687256666209e-06, "loss": 0.4603, "step": 10180 }, { "epoch": 0.83, "grad_norm": 0.9327987256824378, "learning_rate": 1.521472611356628e-06, "loss": 0.5207, "step": 10181 }, { "epoch": 0.83, "grad_norm": 0.9177579625453101, "learning_rate": 1.5200770846010626e-06, "loss": 0.4408, "step": 10182 }, { "epoch": 0.83, "grad_norm": 0.8501020411606455, "learning_rate": 1.518682145496634e-06, "loss": 0.4893, "step": 10183 }, { "epoch": 0.83, "grad_norm": 0.9254580734831974, "learning_rate": 1.51728779414001e-06, "loss": 0.4761, "step": 10184 }, { "epoch": 0.83, "grad_norm": 0.8867520160877831, "learning_rate": 1.515894030627817e-06, "loss": 0.4256, "step": 10185 }, { "epoch": 0.83, "grad_norm": 0.8189185529115245, "learning_rate": 1.5145008550566454e-06, "loss": 0.4715, "step": 10186 }, { "epoch": 0.83, "grad_norm": 1.0093508277724619, "learning_rate": 1.5131082675230325e-06, "loss": 0.5081, "step": 10187 }, { "epoch": 0.83, "grad_norm": 0.9998210644300601, "learning_rate": 1.5117162681234932e-06, "loss": 0.4606, "step": 10188 }, { "epoch": 0.83, "grad_norm": 0.9591548783203413, "learning_rate": 1.510324856954486e-06, "loss": 0.4548, "step": 10189 }, { "epoch": 0.83, "grad_norm": 0.8416750074760201, "learning_rate": 1.5089340341124348e-06, "loss": 0.4433, "step": 10190 }, { "epoch": 0.83, "grad_norm": 0.8237614999712479, "learning_rate": 1.5075437996937248e-06, "loss": 0.458, "step": 10191 }, { "epoch": 0.83, "grad_norm": 0.8947058468112148, "learning_rate": 1.5061541537946979e-06, "loss": 0.4556, "step": 10192 }, { "epoch": 0.83, "grad_norm": 0.8301616530169087, "learning_rate": 1.50476509651165e-06, "loss": 0.4465, "step": 10193 }, { "epoch": 0.83, "grad_norm": 0.9188082792789959, "learning_rate": 1.5033766279408502e-06, "loss": 0.4811, "step": 10194 }, { "epoch": 0.83, "grad_norm": 1.0092638616018894, "learning_rate": 1.5019887481785112e-06, "loss": 0.5283, "step": 10195 }, { "epoch": 0.83, "grad_norm": 0.9427040337991623, "learning_rate": 1.500601457320814e-06, "loss": 0.4537, "step": 10196 }, { "epoch": 0.83, "grad_norm": 0.9217148432993036, "learning_rate": 1.499214755463898e-06, "loss": 0.4822, "step": 10197 }, { "epoch": 0.83, "grad_norm": 1.0911851164229525, "learning_rate": 1.4978286427038602e-06, "loss": 0.6013, "step": 10198 }, { "epoch": 0.83, "grad_norm": 0.9671555249802585, "learning_rate": 1.4964431191367524e-06, "loss": 0.4105, "step": 10199 }, { "epoch": 0.83, "grad_norm": 0.8743030992042353, "learning_rate": 1.4950581848585977e-06, "loss": 0.4241, "step": 10200 }, { "epoch": 0.83, "grad_norm": 0.88143266868202, "learning_rate": 1.493673839965365e-06, "loss": 0.4976, "step": 10201 }, { "epoch": 0.83, "grad_norm": 1.0492592623073465, "learning_rate": 1.4922900845529898e-06, "loss": 0.4855, "step": 10202 }, { "epoch": 0.83, "grad_norm": 0.9898697168016961, "learning_rate": 1.4909069187173652e-06, "loss": 0.4903, "step": 10203 }, { "epoch": 0.83, "grad_norm": 0.8645242451171236, "learning_rate": 1.4895243425543459e-06, "loss": 0.4411, "step": 10204 }, { "epoch": 0.83, "grad_norm": 0.877275663461517, "learning_rate": 1.4881423561597374e-06, "loss": 0.4829, "step": 10205 }, { "epoch": 0.83, "grad_norm": 0.8964463282505074, "learning_rate": 1.4867609596293165e-06, "loss": 0.4686, "step": 10206 }, { "epoch": 0.83, "grad_norm": 1.0755508778310003, "learning_rate": 1.4853801530588085e-06, "loss": 0.5166, "step": 10207 }, { "epoch": 0.83, "grad_norm": 0.9564994262029834, "learning_rate": 1.4839999365439039e-06, "loss": 0.4424, "step": 10208 }, { "epoch": 0.83, "grad_norm": 0.9465572798859154, "learning_rate": 1.4826203101802494e-06, "loss": 0.4262, "step": 10209 }, { "epoch": 0.83, "grad_norm": 0.8966023133247082, "learning_rate": 1.4812412740634552e-06, "loss": 0.4824, "step": 10210 }, { "epoch": 0.83, "grad_norm": 0.8745563853905437, "learning_rate": 1.4798628282890793e-06, "loss": 0.4788, "step": 10211 }, { "epoch": 0.83, "grad_norm": 1.0027440224193567, "learning_rate": 1.4784849729526573e-06, "loss": 0.5189, "step": 10212 }, { "epoch": 0.83, "grad_norm": 0.8894999547435524, "learning_rate": 1.4771077081496654e-06, "loss": 0.4733, "step": 10213 }, { "epoch": 0.83, "grad_norm": 0.8973434912002435, "learning_rate": 1.4757310339755494e-06, "loss": 0.4915, "step": 10214 }, { "epoch": 0.83, "grad_norm": 0.9867159710071842, "learning_rate": 1.4743549505257126e-06, "loss": 0.4814, "step": 10215 }, { "epoch": 0.83, "grad_norm": 0.9946006345156738, "learning_rate": 1.472979457895517e-06, "loss": 0.5211, "step": 10216 }, { "epoch": 0.83, "grad_norm": 0.9379457171519671, "learning_rate": 1.4716045561802772e-06, "loss": 0.4925, "step": 10217 }, { "epoch": 0.83, "grad_norm": 0.8979517459470215, "learning_rate": 1.4702302454752815e-06, "loss": 0.4941, "step": 10218 }, { "epoch": 0.83, "grad_norm": 1.018496605305982, "learning_rate": 1.4688565258757615e-06, "loss": 0.4438, "step": 10219 }, { "epoch": 0.83, "grad_norm": 0.9519939488958427, "learning_rate": 1.4674833974769166e-06, "loss": 0.4612, "step": 10220 }, { "epoch": 0.83, "grad_norm": 0.8882005768328872, "learning_rate": 1.466110860373905e-06, "loss": 0.428, "step": 10221 }, { "epoch": 0.83, "grad_norm": 0.9357707331145971, "learning_rate": 1.464738914661843e-06, "loss": 0.5477, "step": 10222 }, { "epoch": 0.83, "grad_norm": 1.0508828769107081, "learning_rate": 1.4633675604357988e-06, "loss": 0.5031, "step": 10223 }, { "epoch": 0.83, "grad_norm": 1.0270950208906446, "learning_rate": 1.4619967977908157e-06, "loss": 0.4976, "step": 10224 }, { "epoch": 0.83, "grad_norm": 0.987460185788436, "learning_rate": 1.4606266268218783e-06, "loss": 0.5491, "step": 10225 }, { "epoch": 0.83, "grad_norm": 0.9871122718704699, "learning_rate": 1.4592570476239421e-06, "loss": 0.4953, "step": 10226 }, { "epoch": 0.83, "grad_norm": 0.9820611895502117, "learning_rate": 1.4578880602919165e-06, "loss": 0.5188, "step": 10227 }, { "epoch": 0.83, "grad_norm": 0.9576539109395478, "learning_rate": 1.4565196649206737e-06, "loss": 0.5063, "step": 10228 }, { "epoch": 0.83, "grad_norm": 0.8461603678292988, "learning_rate": 1.4551518616050352e-06, "loss": 0.4568, "step": 10229 }, { "epoch": 0.83, "grad_norm": 0.9297742997758425, "learning_rate": 1.453784650439798e-06, "loss": 0.5646, "step": 10230 }, { "epoch": 0.83, "grad_norm": 0.9075481777747914, "learning_rate": 1.4524180315197023e-06, "loss": 0.4459, "step": 10231 }, { "epoch": 0.83, "grad_norm": 0.9125842697656487, "learning_rate": 1.451052004939455e-06, "loss": 0.4746, "step": 10232 }, { "epoch": 0.83, "grad_norm": 0.862288669193609, "learning_rate": 1.4496865707937201e-06, "loss": 0.4309, "step": 10233 }, { "epoch": 0.83, "grad_norm": 0.9072812931637467, "learning_rate": 1.4483217291771257e-06, "loss": 0.4439, "step": 10234 }, { "epoch": 0.83, "grad_norm": 0.8165505502618535, "learning_rate": 1.4469574801842445e-06, "loss": 0.4725, "step": 10235 }, { "epoch": 0.83, "grad_norm": 0.869627256256952, "learning_rate": 1.445593823909628e-06, "loss": 0.4678, "step": 10236 }, { "epoch": 0.83, "grad_norm": 0.9151138992538269, "learning_rate": 1.444230760447769e-06, "loss": 0.4892, "step": 10237 }, { "epoch": 0.83, "grad_norm": 1.2319204202974274, "learning_rate": 1.44286828989313e-06, "loss": 0.4873, "step": 10238 }, { "epoch": 0.83, "grad_norm": 0.9497741178270789, "learning_rate": 1.441506412340129e-06, "loss": 0.482, "step": 10239 }, { "epoch": 0.83, "grad_norm": 0.8981045686296287, "learning_rate": 1.4401451278831435e-06, "loss": 0.4408, "step": 10240 }, { "epoch": 0.83, "grad_norm": 0.9088762480381548, "learning_rate": 1.4387844366165038e-06, "loss": 0.5165, "step": 10241 }, { "epoch": 0.83, "grad_norm": 0.8337177904896431, "learning_rate": 1.4374243386345132e-06, "loss": 0.4659, "step": 10242 }, { "epoch": 0.83, "grad_norm": 1.0151898625090054, "learning_rate": 1.4360648340314188e-06, "loss": 0.5337, "step": 10243 }, { "epoch": 0.83, "grad_norm": 1.0033732105045368, "learning_rate": 1.4347059229014359e-06, "loss": 0.5321, "step": 10244 }, { "epoch": 0.83, "grad_norm": 0.9061509243075366, "learning_rate": 1.433347605338734e-06, "loss": 0.4419, "step": 10245 }, { "epoch": 0.83, "grad_norm": 1.0150086633032052, "learning_rate": 1.4319898814374477e-06, "loss": 0.4895, "step": 10246 }, { "epoch": 0.83, "grad_norm": 0.9799337144189451, "learning_rate": 1.4306327512916574e-06, "loss": 0.5107, "step": 10247 }, { "epoch": 0.83, "grad_norm": 0.9820232899095072, "learning_rate": 1.429276214995421e-06, "loss": 0.5189, "step": 10248 }, { "epoch": 0.83, "grad_norm": 1.1088600675559581, "learning_rate": 1.4279202726427387e-06, "loss": 0.5836, "step": 10249 }, { "epoch": 0.83, "grad_norm": 1.0022309856003913, "learning_rate": 1.4265649243275782e-06, "loss": 0.4901, "step": 10250 }, { "epoch": 0.83, "grad_norm": 0.8821887053125698, "learning_rate": 1.4252101701438636e-06, "loss": 0.4594, "step": 10251 }, { "epoch": 0.83, "grad_norm": 1.0464708818785635, "learning_rate": 1.4238560101854815e-06, "loss": 0.5232, "step": 10252 }, { "epoch": 0.83, "grad_norm": 0.8163985577430741, "learning_rate": 1.4225024445462654e-06, "loss": 0.4529, "step": 10253 }, { "epoch": 0.83, "grad_norm": 0.9243481894091906, "learning_rate": 1.421149473320026e-06, "loss": 0.4898, "step": 10254 }, { "epoch": 0.83, "grad_norm": 0.8764138643875491, "learning_rate": 1.4197970966005148e-06, "loss": 0.4318, "step": 10255 }, { "epoch": 0.83, "grad_norm": 0.92803108348694, "learning_rate": 1.418445314481458e-06, "loss": 0.5064, "step": 10256 }, { "epoch": 0.83, "grad_norm": 0.8426680871851131, "learning_rate": 1.4170941270565275e-06, "loss": 0.4832, "step": 10257 }, { "epoch": 0.83, "grad_norm": 0.9808605193966883, "learning_rate": 1.4157435344193605e-06, "loss": 0.4438, "step": 10258 }, { "epoch": 0.83, "grad_norm": 0.9243613521277586, "learning_rate": 1.4143935366635531e-06, "loss": 0.4952, "step": 10259 }, { "epoch": 0.83, "grad_norm": 0.9816508493061582, "learning_rate": 1.4130441338826595e-06, "loss": 0.454, "step": 10260 }, { "epoch": 0.83, "grad_norm": 0.9007035804833416, "learning_rate": 1.411695326170187e-06, "loss": 0.4572, "step": 10261 }, { "epoch": 0.83, "grad_norm": 0.9111091371833596, "learning_rate": 1.4103471136196145e-06, "loss": 0.4367, "step": 10262 }, { "epoch": 0.83, "grad_norm": 0.8489740540487186, "learning_rate": 1.4089994963243658e-06, "loss": 0.3963, "step": 10263 }, { "epoch": 0.83, "grad_norm": 0.9519934832289844, "learning_rate": 1.407652474377832e-06, "loss": 0.5052, "step": 10264 }, { "epoch": 0.83, "grad_norm": 0.9604546986315811, "learning_rate": 1.4063060478733604e-06, "loss": 0.4527, "step": 10265 }, { "epoch": 0.83, "grad_norm": 0.8780119658630006, "learning_rate": 1.4049602169042598e-06, "loss": 0.4459, "step": 10266 }, { "epoch": 0.83, "grad_norm": 0.8712225686758214, "learning_rate": 1.4036149815637866e-06, "loss": 0.4109, "step": 10267 }, { "epoch": 0.83, "grad_norm": 0.8832135456375555, "learning_rate": 1.4022703419451755e-06, "loss": 0.4775, "step": 10268 }, { "epoch": 0.83, "grad_norm": 0.8817635322244807, "learning_rate": 1.4009262981416016e-06, "loss": 0.4529, "step": 10269 }, { "epoch": 0.83, "grad_norm": 0.978087555119488, "learning_rate": 1.3995828502462072e-06, "loss": 0.4929, "step": 10270 }, { "epoch": 0.83, "grad_norm": 0.9898534652973374, "learning_rate": 1.3982399983520934e-06, "loss": 0.4752, "step": 10271 }, { "epoch": 0.83, "grad_norm": 0.8987085477583058, "learning_rate": 1.39689774255232e-06, "loss": 0.4729, "step": 10272 }, { "epoch": 0.83, "grad_norm": 0.9777419186698381, "learning_rate": 1.3955560829398974e-06, "loss": 0.5074, "step": 10273 }, { "epoch": 0.84, "grad_norm": 0.9044479183395163, "learning_rate": 1.3942150196078108e-06, "loss": 0.4735, "step": 10274 }, { "epoch": 0.84, "grad_norm": 0.955084130118838, "learning_rate": 1.3928745526489874e-06, "loss": 0.4806, "step": 10275 }, { "epoch": 0.84, "grad_norm": 1.0172347146907044, "learning_rate": 1.3915346821563235e-06, "loss": 0.5519, "step": 10276 }, { "epoch": 0.84, "grad_norm": 1.084576363094847, "learning_rate": 1.3901954082226698e-06, "loss": 0.4468, "step": 10277 }, { "epoch": 0.84, "grad_norm": 0.952157339824633, "learning_rate": 1.3888567309408396e-06, "loss": 0.5082, "step": 10278 }, { "epoch": 0.84, "grad_norm": 0.8740904269122499, "learning_rate": 1.3875186504035965e-06, "loss": 0.5169, "step": 10279 }, { "epoch": 0.84, "grad_norm": 0.9403765835118713, "learning_rate": 1.386181166703675e-06, "loss": 0.4676, "step": 10280 }, { "epoch": 0.84, "grad_norm": 0.8786768842079513, "learning_rate": 1.384844279933757e-06, "loss": 0.4222, "step": 10281 }, { "epoch": 0.84, "grad_norm": 0.9925467422216124, "learning_rate": 1.3835079901864878e-06, "loss": 0.5285, "step": 10282 }, { "epoch": 0.84, "grad_norm": 0.9809311190484911, "learning_rate": 1.3821722975544727e-06, "loss": 0.5496, "step": 10283 }, { "epoch": 0.84, "grad_norm": 0.9394321489302069, "learning_rate": 1.3808372021302752e-06, "loss": 0.4923, "step": 10284 }, { "epoch": 0.84, "grad_norm": 0.9952986041724703, "learning_rate": 1.37950270400641e-06, "loss": 0.493, "step": 10285 }, { "epoch": 0.84, "grad_norm": 0.934921142448034, "learning_rate": 1.378168803275366e-06, "loss": 0.4647, "step": 10286 }, { "epoch": 0.84, "grad_norm": 0.9848679192787415, "learning_rate": 1.376835500029573e-06, "loss": 0.4982, "step": 10287 }, { "epoch": 0.84, "grad_norm": 0.9792804880817738, "learning_rate": 1.375502794361432e-06, "loss": 0.5542, "step": 10288 }, { "epoch": 0.84, "grad_norm": 0.9281471341149863, "learning_rate": 1.3741706863632976e-06, "loss": 0.5077, "step": 10289 }, { "epoch": 0.84, "grad_norm": 0.9541313032337133, "learning_rate": 1.372839176127485e-06, "loss": 0.4716, "step": 10290 }, { "epoch": 0.84, "grad_norm": 0.8903951337293582, "learning_rate": 1.3715082637462607e-06, "loss": 0.514, "step": 10291 }, { "epoch": 0.84, "grad_norm": 0.926295498225597, "learning_rate": 1.370177949311866e-06, "loss": 0.4976, "step": 10292 }, { "epoch": 0.84, "grad_norm": 0.8380071746620592, "learning_rate": 1.368848232916481e-06, "loss": 0.4658, "step": 10293 }, { "epoch": 0.84, "grad_norm": 0.9757498423299027, "learning_rate": 1.3675191146522593e-06, "loss": 0.5021, "step": 10294 }, { "epoch": 0.84, "grad_norm": 0.8970210924053779, "learning_rate": 1.366190594611304e-06, "loss": 0.4657, "step": 10295 }, { "epoch": 0.84, "grad_norm": 0.8435500538474133, "learning_rate": 1.3648626728856862e-06, "loss": 0.4304, "step": 10296 }, { "epoch": 0.84, "grad_norm": 0.8889349239109522, "learning_rate": 1.3635353495674208e-06, "loss": 0.5115, "step": 10297 }, { "epoch": 0.84, "grad_norm": 0.8356644523131306, "learning_rate": 1.3622086247484989e-06, "loss": 0.4342, "step": 10298 }, { "epoch": 0.84, "grad_norm": 0.8973942966517006, "learning_rate": 1.3608824985208569e-06, "loss": 0.4747, "step": 10299 }, { "epoch": 0.84, "grad_norm": 0.9808056088548277, "learning_rate": 1.3595569709763934e-06, "loss": 0.4904, "step": 10300 }, { "epoch": 0.84, "grad_norm": 0.9684079981307847, "learning_rate": 1.3582320422069684e-06, "loss": 0.4608, "step": 10301 }, { "epoch": 0.84, "grad_norm": 0.9465905249500525, "learning_rate": 1.3569077123043973e-06, "loss": 0.4564, "step": 10302 }, { "epoch": 0.84, "grad_norm": 0.9502539835566461, "learning_rate": 1.3555839813604555e-06, "loss": 0.5308, "step": 10303 }, { "epoch": 0.84, "grad_norm": 0.8392779707666963, "learning_rate": 1.3542608494668785e-06, "loss": 0.4489, "step": 10304 }, { "epoch": 0.84, "grad_norm": 0.9518759760255114, "learning_rate": 1.3529383167153543e-06, "loss": 0.5635, "step": 10305 }, { "epoch": 0.84, "grad_norm": 0.9503279328972887, "learning_rate": 1.3516163831975337e-06, "loss": 0.446, "step": 10306 }, { "epoch": 0.84, "grad_norm": 1.0188594497418193, "learning_rate": 1.350295049005027e-06, "loss": 0.5087, "step": 10307 }, { "epoch": 0.84, "grad_norm": 0.9222776847545859, "learning_rate": 1.348974314229401e-06, "loss": 0.4797, "step": 10308 }, { "epoch": 0.84, "grad_norm": 0.8973687429402823, "learning_rate": 1.3476541789621822e-06, "loss": 0.4761, "step": 10309 }, { "epoch": 0.84, "grad_norm": 0.924539907796562, "learning_rate": 1.3463346432948555e-06, "loss": 0.4248, "step": 10310 }, { "epoch": 0.84, "grad_norm": 0.913652622748875, "learning_rate": 1.3450157073188608e-06, "loss": 0.4537, "step": 10311 }, { "epoch": 0.84, "grad_norm": 0.9662074046160266, "learning_rate": 1.3436973711256006e-06, "loss": 0.4851, "step": 10312 }, { "epoch": 0.84, "grad_norm": 0.8981108949974423, "learning_rate": 1.3423796348064343e-06, "loss": 0.4598, "step": 10313 }, { "epoch": 0.84, "grad_norm": 0.9353563433744401, "learning_rate": 1.34106249845268e-06, "loss": 0.5112, "step": 10314 }, { "epoch": 0.84, "grad_norm": 0.9043064502707608, "learning_rate": 1.339745962155613e-06, "loss": 0.4663, "step": 10315 }, { "epoch": 0.84, "grad_norm": 1.0139357510706588, "learning_rate": 1.338430026006471e-06, "loss": 0.4875, "step": 10316 }, { "epoch": 0.84, "grad_norm": 0.891505665915497, "learning_rate": 1.337114690096446e-06, "loss": 0.4168, "step": 10317 }, { "epoch": 0.84, "grad_norm": 1.3950374138336772, "learning_rate": 1.3357999545166878e-06, "loss": 0.5183, "step": 10318 }, { "epoch": 0.84, "grad_norm": 0.9195164034412875, "learning_rate": 1.3344858193583076e-06, "loss": 0.4681, "step": 10319 }, { "epoch": 0.84, "grad_norm": 0.9308858780333419, "learning_rate": 1.333172284712373e-06, "loss": 0.453, "step": 10320 }, { "epoch": 0.84, "grad_norm": 1.0416797789084544, "learning_rate": 1.3318593506699129e-06, "loss": 0.5003, "step": 10321 }, { "epoch": 0.84, "grad_norm": 1.041236641456775, "learning_rate": 1.3305470173219104e-06, "loss": 0.5134, "step": 10322 }, { "epoch": 0.84, "grad_norm": 0.8013652571175578, "learning_rate": 1.3292352847593115e-06, "loss": 0.4064, "step": 10323 }, { "epoch": 0.84, "grad_norm": 0.9611643454044503, "learning_rate": 1.3279241530730147e-06, "loss": 0.4909, "step": 10324 }, { "epoch": 0.84, "grad_norm": 0.8897865375319298, "learning_rate": 1.3266136223538827e-06, "loss": 0.4491, "step": 10325 }, { "epoch": 0.84, "grad_norm": 0.9901942315736499, "learning_rate": 1.325303692692732e-06, "loss": 0.4868, "step": 10326 }, { "epoch": 0.84, "grad_norm": 0.9971852320774585, "learning_rate": 1.323994364180342e-06, "loss": 0.5157, "step": 10327 }, { "epoch": 0.84, "grad_norm": 0.8725406497712802, "learning_rate": 1.322685636907447e-06, "loss": 0.4606, "step": 10328 }, { "epoch": 0.84, "grad_norm": 1.0280428865888989, "learning_rate": 1.321377510964742e-06, "loss": 0.526, "step": 10329 }, { "epoch": 0.84, "grad_norm": 0.9565540414208218, "learning_rate": 1.3200699864428757e-06, "loss": 0.5046, "step": 10330 }, { "epoch": 0.84, "grad_norm": 1.0174627243449055, "learning_rate": 1.31876306343246e-06, "loss": 0.5204, "step": 10331 }, { "epoch": 0.84, "grad_norm": 0.9857465322789754, "learning_rate": 1.3174567420240647e-06, "loss": 0.4811, "step": 10332 }, { "epoch": 0.84, "grad_norm": 0.938586286054605, "learning_rate": 1.3161510223082152e-06, "loss": 0.4589, "step": 10333 }, { "epoch": 0.84, "grad_norm": 1.0344397790703828, "learning_rate": 1.314845904375397e-06, "loss": 0.5439, "step": 10334 }, { "epoch": 0.84, "grad_norm": 0.9056043167338509, "learning_rate": 1.3135413883160564e-06, "loss": 0.4655, "step": 10335 }, { "epoch": 0.84, "grad_norm": 0.9769940959605952, "learning_rate": 1.3122374742205878e-06, "loss": 0.4396, "step": 10336 }, { "epoch": 0.84, "grad_norm": 0.9450823830694117, "learning_rate": 1.3109341621793614e-06, "loss": 0.4665, "step": 10337 }, { "epoch": 0.84, "grad_norm": 0.9852144192111556, "learning_rate": 1.309631452282688e-06, "loss": 0.5602, "step": 10338 }, { "epoch": 0.84, "grad_norm": 0.9717639282246133, "learning_rate": 1.3083293446208467e-06, "loss": 0.452, "step": 10339 }, { "epoch": 0.84, "grad_norm": 0.9232201932196542, "learning_rate": 1.3070278392840718e-06, "loss": 0.5083, "step": 10340 }, { "epoch": 0.84, "grad_norm": 0.9227017022484458, "learning_rate": 1.305726936362559e-06, "loss": 0.4331, "step": 10341 }, { "epoch": 0.84, "grad_norm": 1.0087015750858035, "learning_rate": 1.3044266359464542e-06, "loss": 0.5254, "step": 10342 }, { "epoch": 0.84, "grad_norm": 0.9414831130487048, "learning_rate": 1.3031269381258737e-06, "loss": 0.4517, "step": 10343 }, { "epoch": 0.84, "grad_norm": 0.9980339567237606, "learning_rate": 1.3018278429908815e-06, "loss": 0.5105, "step": 10344 }, { "epoch": 0.84, "grad_norm": 0.8688966377834313, "learning_rate": 1.3005293506315042e-06, "loss": 0.4611, "step": 10345 }, { "epoch": 0.84, "grad_norm": 0.945465953881035, "learning_rate": 1.2992314611377255e-06, "loss": 0.5041, "step": 10346 }, { "epoch": 0.84, "grad_norm": 0.9456803456419439, "learning_rate": 1.2979341745994922e-06, "loss": 0.4881, "step": 10347 }, { "epoch": 0.84, "grad_norm": 0.9546926099393971, "learning_rate": 1.296637491106697e-06, "loss": 0.4886, "step": 10348 }, { "epoch": 0.84, "grad_norm": 1.0014373713572333, "learning_rate": 1.295341410749208e-06, "loss": 0.5031, "step": 10349 }, { "epoch": 0.84, "grad_norm": 0.9578354382404265, "learning_rate": 1.2940459336168366e-06, "loss": 0.4757, "step": 10350 }, { "epoch": 0.84, "grad_norm": 0.9225824045489499, "learning_rate": 1.2927510597993598e-06, "loss": 0.4755, "step": 10351 }, { "epoch": 0.84, "grad_norm": 1.0158769338187628, "learning_rate": 1.2914567893865103e-06, "loss": 0.5099, "step": 10352 }, { "epoch": 0.84, "grad_norm": 0.9259783971877905, "learning_rate": 1.2901631224679844e-06, "loss": 0.4483, "step": 10353 }, { "epoch": 0.84, "grad_norm": 1.0430204010462292, "learning_rate": 1.2888700591334225e-06, "loss": 0.5591, "step": 10354 }, { "epoch": 0.84, "grad_norm": 0.9358738496499883, "learning_rate": 1.2875775994724448e-06, "loss": 0.4656, "step": 10355 }, { "epoch": 0.84, "grad_norm": 0.999405660521046, "learning_rate": 1.2862857435746078e-06, "loss": 0.5058, "step": 10356 }, { "epoch": 0.84, "grad_norm": 1.0669907720051155, "learning_rate": 1.284994491529441e-06, "loss": 0.5028, "step": 10357 }, { "epoch": 0.84, "grad_norm": 1.0632046162659259, "learning_rate": 1.283703843426425e-06, "loss": 0.4915, "step": 10358 }, { "epoch": 0.84, "grad_norm": 0.9965790286536484, "learning_rate": 1.2824137993550033e-06, "loss": 0.5406, "step": 10359 }, { "epoch": 0.84, "grad_norm": 0.9001904962554457, "learning_rate": 1.2811243594045697e-06, "loss": 0.5098, "step": 10360 }, { "epoch": 0.84, "grad_norm": 1.0280818029750178, "learning_rate": 1.2798355236644876e-06, "loss": 0.5532, "step": 10361 }, { "epoch": 0.84, "grad_norm": 0.931773872431827, "learning_rate": 1.278547292224067e-06, "loss": 0.4716, "step": 10362 }, { "epoch": 0.84, "grad_norm": 0.8939526381099582, "learning_rate": 1.2772596651725833e-06, "loss": 0.4572, "step": 10363 }, { "epoch": 0.84, "grad_norm": 0.9181306232702845, "learning_rate": 1.275972642599268e-06, "loss": 0.4845, "step": 10364 }, { "epoch": 0.84, "grad_norm": 0.9712261562760381, "learning_rate": 1.274686224593311e-06, "loss": 0.4899, "step": 10365 }, { "epoch": 0.84, "grad_norm": 1.011891784830918, "learning_rate": 1.273400411243857e-06, "loss": 0.4992, "step": 10366 }, { "epoch": 0.84, "grad_norm": 0.9281749249363781, "learning_rate": 1.2721152026400174e-06, "loss": 0.4891, "step": 10367 }, { "epoch": 0.84, "grad_norm": 1.0111059017453472, "learning_rate": 1.2708305988708502e-06, "loss": 0.4853, "step": 10368 }, { "epoch": 0.84, "grad_norm": 1.0195681428784356, "learning_rate": 1.2695466000253798e-06, "loss": 0.512, "step": 10369 }, { "epoch": 0.84, "grad_norm": 0.9951959453513336, "learning_rate": 1.268263206192587e-06, "loss": 0.5128, "step": 10370 }, { "epoch": 0.84, "grad_norm": 1.0410562492605302, "learning_rate": 1.2669804174614097e-06, "loss": 0.5366, "step": 10371 }, { "epoch": 0.84, "grad_norm": 0.8844639541632079, "learning_rate": 1.2656982339207401e-06, "loss": 0.4934, "step": 10372 }, { "epoch": 0.84, "grad_norm": 0.9036479544451441, "learning_rate": 1.2644166556594396e-06, "loss": 0.5045, "step": 10373 }, { "epoch": 0.84, "grad_norm": 1.0051379719537108, "learning_rate": 1.2631356827663144e-06, "loss": 0.5172, "step": 10374 }, { "epoch": 0.84, "grad_norm": 0.9048632205778424, "learning_rate": 1.2618553153301361e-06, "loss": 0.421, "step": 10375 }, { "epoch": 0.84, "grad_norm": 1.0617817877623916, "learning_rate": 1.2605755534396347e-06, "loss": 0.542, "step": 10376 }, { "epoch": 0.84, "grad_norm": 0.8293522861775061, "learning_rate": 1.259296397183497e-06, "loss": 0.427, "step": 10377 }, { "epoch": 0.84, "grad_norm": 1.0283502189905431, "learning_rate": 1.2580178466503623e-06, "loss": 0.5244, "step": 10378 }, { "epoch": 0.84, "grad_norm": 0.8610204224832774, "learning_rate": 1.2567399019288406e-06, "loss": 0.5077, "step": 10379 }, { "epoch": 0.84, "grad_norm": 0.9043700711845449, "learning_rate": 1.2554625631074846e-06, "loss": 0.4689, "step": 10380 }, { "epoch": 0.84, "grad_norm": 0.9362311134421188, "learning_rate": 1.2541858302748199e-06, "loss": 0.4184, "step": 10381 }, { "epoch": 0.84, "grad_norm": 0.9579535314078547, "learning_rate": 1.2529097035193183e-06, "loss": 0.5297, "step": 10382 }, { "epoch": 0.84, "grad_norm": 1.0046460874542875, "learning_rate": 1.2516341829294155e-06, "loss": 0.5359, "step": 10383 }, { "epoch": 0.84, "grad_norm": 0.859887174800114, "learning_rate": 1.2503592685935039e-06, "loss": 0.3874, "step": 10384 }, { "epoch": 0.84, "grad_norm": 1.1239221563293567, "learning_rate": 1.2490849605999355e-06, "loss": 0.5216, "step": 10385 }, { "epoch": 0.84, "grad_norm": 0.8692722620485404, "learning_rate": 1.2478112590370139e-06, "loss": 0.3922, "step": 10386 }, { "epoch": 0.84, "grad_norm": 0.9322363876829223, "learning_rate": 1.246538163993013e-06, "loss": 0.4898, "step": 10387 }, { "epoch": 0.84, "grad_norm": 0.944415903299271, "learning_rate": 1.2452656755561509e-06, "loss": 0.4815, "step": 10388 }, { "epoch": 0.84, "grad_norm": 0.914130294902286, "learning_rate": 1.2439937938146118e-06, "loss": 0.4974, "step": 10389 }, { "epoch": 0.84, "grad_norm": 0.8514860899931579, "learning_rate": 1.2427225188565362e-06, "loss": 0.4952, "step": 10390 }, { "epoch": 0.84, "grad_norm": 0.9783298964863831, "learning_rate": 1.2414518507700247e-06, "loss": 0.5409, "step": 10391 }, { "epoch": 0.84, "grad_norm": 0.9345235574956751, "learning_rate": 1.2401817896431268e-06, "loss": 0.5215, "step": 10392 }, { "epoch": 0.84, "grad_norm": 0.9557235832363349, "learning_rate": 1.2389123355638655e-06, "loss": 0.4642, "step": 10393 }, { "epoch": 0.84, "grad_norm": 0.9882934130814738, "learning_rate": 1.237643488620206e-06, "loss": 0.4993, "step": 10394 }, { "epoch": 0.84, "grad_norm": 0.9229014262843889, "learning_rate": 1.2363752489000802e-06, "loss": 0.5369, "step": 10395 }, { "epoch": 0.84, "grad_norm": 0.8677797825145028, "learning_rate": 1.2351076164913767e-06, "loss": 0.5179, "step": 10396 }, { "epoch": 0.85, "grad_norm": 0.9079183428436072, "learning_rate": 1.2338405914819428e-06, "loss": 0.4528, "step": 10397 }, { "epoch": 0.85, "grad_norm": 1.0616692072686915, "learning_rate": 1.2325741739595753e-06, "loss": 0.4966, "step": 10398 }, { "epoch": 0.85, "grad_norm": 1.017928690459014, "learning_rate": 1.2313083640120461e-06, "loss": 0.4986, "step": 10399 }, { "epoch": 0.85, "grad_norm": 1.0044277617256578, "learning_rate": 1.2300431617270669e-06, "loss": 0.5175, "step": 10400 }, { "epoch": 0.85, "grad_norm": 0.9618426399337734, "learning_rate": 1.228778567192318e-06, "loss": 0.5235, "step": 10401 }, { "epoch": 0.85, "grad_norm": 0.9374152649408706, "learning_rate": 1.2275145804954347e-06, "loss": 0.4962, "step": 10402 }, { "epoch": 0.85, "grad_norm": 0.8818312190067937, "learning_rate": 1.2262512017240113e-06, "loss": 0.4294, "step": 10403 }, { "epoch": 0.85, "grad_norm": 0.8987846617538389, "learning_rate": 1.2249884309655935e-06, "loss": 0.4726, "step": 10404 }, { "epoch": 0.85, "grad_norm": 0.8476644702669166, "learning_rate": 1.2237262683076979e-06, "loss": 0.4813, "step": 10405 }, { "epoch": 0.85, "grad_norm": 0.9203532221043269, "learning_rate": 1.2224647138377854e-06, "loss": 0.508, "step": 10406 }, { "epoch": 0.85, "grad_norm": 0.9444794075657026, "learning_rate": 1.221203767643282e-06, "loss": 0.494, "step": 10407 }, { "epoch": 0.85, "grad_norm": 0.9873844191944869, "learning_rate": 1.219943429811571e-06, "loss": 0.4555, "step": 10408 }, { "epoch": 0.85, "grad_norm": 0.9150066558016045, "learning_rate": 1.2186837004299957e-06, "loss": 0.4924, "step": 10409 }, { "epoch": 0.85, "grad_norm": 0.8881440228122434, "learning_rate": 1.2174245795858454e-06, "loss": 0.4637, "step": 10410 }, { "epoch": 0.85, "grad_norm": 0.9006863395728119, "learning_rate": 1.2161660673663855e-06, "loss": 0.4692, "step": 10411 }, { "epoch": 0.85, "grad_norm": 0.9597572812479213, "learning_rate": 1.2149081638588246e-06, "loss": 0.4673, "step": 10412 }, { "epoch": 0.85, "grad_norm": 0.8682495435123212, "learning_rate": 1.2136508691503357e-06, "loss": 0.4435, "step": 10413 }, { "epoch": 0.85, "grad_norm": 0.9486120778717507, "learning_rate": 1.2123941833280472e-06, "loss": 0.5288, "step": 10414 }, { "epoch": 0.85, "grad_norm": 0.9196492823660393, "learning_rate": 1.2111381064790506e-06, "loss": 0.5112, "step": 10415 }, { "epoch": 0.85, "grad_norm": 0.8973238884603776, "learning_rate": 1.2098826386903829e-06, "loss": 0.4586, "step": 10416 }, { "epoch": 0.85, "grad_norm": 0.9190498993137616, "learning_rate": 1.2086277800490554e-06, "loss": 0.4639, "step": 10417 }, { "epoch": 0.85, "grad_norm": 0.8949475889209494, "learning_rate": 1.207373530642022e-06, "loss": 0.4672, "step": 10418 }, { "epoch": 0.85, "grad_norm": 0.9286970525038918, "learning_rate": 1.2061198905562043e-06, "loss": 0.489, "step": 10419 }, { "epoch": 0.85, "grad_norm": 1.0214400088266034, "learning_rate": 1.2048668598784785e-06, "loss": 0.5502, "step": 10420 }, { "epoch": 0.85, "grad_norm": 0.9371816622979336, "learning_rate": 1.2036144386956805e-06, "loss": 0.5025, "step": 10421 }, { "epoch": 0.85, "grad_norm": 0.8994263459924541, "learning_rate": 1.2023626270945943e-06, "loss": 0.4472, "step": 10422 }, { "epoch": 0.85, "grad_norm": 1.0525060037546006, "learning_rate": 1.2011114251619792e-06, "loss": 0.5032, "step": 10423 }, { "epoch": 0.85, "grad_norm": 1.1654049372519806, "learning_rate": 1.1998608329845362e-06, "loss": 0.5165, "step": 10424 }, { "epoch": 0.85, "grad_norm": 1.0218906819426945, "learning_rate": 1.1986108506489314e-06, "loss": 0.5377, "step": 10425 }, { "epoch": 0.85, "grad_norm": 0.9387363153022205, "learning_rate": 1.1973614782417874e-06, "loss": 0.4903, "step": 10426 }, { "epoch": 0.85, "grad_norm": 0.8818776810322095, "learning_rate": 1.1961127158496866e-06, "loss": 0.4606, "step": 10427 }, { "epoch": 0.85, "grad_norm": 0.9745946305449849, "learning_rate": 1.1948645635591627e-06, "loss": 0.4892, "step": 10428 }, { "epoch": 0.85, "grad_norm": 1.1204945414201928, "learning_rate": 1.1936170214567177e-06, "loss": 0.5149, "step": 10429 }, { "epoch": 0.85, "grad_norm": 1.000795057220358, "learning_rate": 1.1923700896288004e-06, "loss": 0.5303, "step": 10430 }, { "epoch": 0.85, "grad_norm": 0.9685116179114811, "learning_rate": 1.1911237681618226e-06, "loss": 0.4936, "step": 10431 }, { "epoch": 0.85, "grad_norm": 0.9088048618312514, "learning_rate": 1.1898780571421554e-06, "loss": 0.4575, "step": 10432 }, { "epoch": 0.85, "grad_norm": 0.8908139699595279, "learning_rate": 1.1886329566561262e-06, "loss": 0.4623, "step": 10433 }, { "epoch": 0.85, "grad_norm": 0.9185747240565809, "learning_rate": 1.1873884667900125e-06, "loss": 0.4527, "step": 10434 }, { "epoch": 0.85, "grad_norm": 1.0019140346220445, "learning_rate": 1.186144587630066e-06, "loss": 0.5332, "step": 10435 }, { "epoch": 0.85, "grad_norm": 0.9822650328652477, "learning_rate": 1.184901319262479e-06, "loss": 0.5219, "step": 10436 }, { "epoch": 0.85, "grad_norm": 0.9366252276807113, "learning_rate": 1.1836586617734114e-06, "loss": 0.4705, "step": 10437 }, { "epoch": 0.85, "grad_norm": 0.9925375074413082, "learning_rate": 1.1824166152489791e-06, "loss": 0.493, "step": 10438 }, { "epoch": 0.85, "grad_norm": 0.9522970269239825, "learning_rate": 1.181175179775257e-06, "loss": 0.4728, "step": 10439 }, { "epoch": 0.85, "grad_norm": 0.9604610233217911, "learning_rate": 1.179934355438267e-06, "loss": 0.4928, "step": 10440 }, { "epoch": 0.85, "grad_norm": 0.8529289245362368, "learning_rate": 1.1786941423240072e-06, "loss": 0.4278, "step": 10441 }, { "epoch": 0.85, "grad_norm": 1.0000596664112409, "learning_rate": 1.1774545405184178e-06, "loss": 0.4782, "step": 10442 }, { "epoch": 0.85, "grad_norm": 0.9101942517684707, "learning_rate": 1.1762155501074024e-06, "loss": 0.5051, "step": 10443 }, { "epoch": 0.85, "grad_norm": 0.9253558360388284, "learning_rate": 1.1749771711768233e-06, "loss": 0.492, "step": 10444 }, { "epoch": 0.85, "grad_norm": 0.919619505554477, "learning_rate": 1.1737394038124994e-06, "loss": 0.4892, "step": 10445 }, { "epoch": 0.85, "grad_norm": 0.9461661472433754, "learning_rate": 1.1725022481002024e-06, "loss": 0.4437, "step": 10446 }, { "epoch": 0.85, "grad_norm": 0.9388303265636923, "learning_rate": 1.1712657041256737e-06, "loss": 0.4766, "step": 10447 }, { "epoch": 0.85, "grad_norm": 0.983735419601325, "learning_rate": 1.170029771974599e-06, "loss": 0.5595, "step": 10448 }, { "epoch": 0.85, "grad_norm": 0.9821181424572664, "learning_rate": 1.1687944517326289e-06, "loss": 0.4663, "step": 10449 }, { "epoch": 0.85, "grad_norm": 0.9464613846937909, "learning_rate": 1.1675597434853692e-06, "loss": 0.4726, "step": 10450 }, { "epoch": 0.85, "grad_norm": 0.9347967181397874, "learning_rate": 1.1663256473183858e-06, "loss": 0.4676, "step": 10451 }, { "epoch": 0.85, "grad_norm": 1.1063597249501547, "learning_rate": 1.1650921633171985e-06, "loss": 0.5331, "step": 10452 }, { "epoch": 0.85, "grad_norm": 0.8829825883905474, "learning_rate": 1.1638592915672908e-06, "loss": 0.4304, "step": 10453 }, { "epoch": 0.85, "grad_norm": 1.0064095451786466, "learning_rate": 1.1626270321540945e-06, "loss": 0.497, "step": 10454 }, { "epoch": 0.85, "grad_norm": 0.9513480613132469, "learning_rate": 1.1613953851630055e-06, "loss": 0.483, "step": 10455 }, { "epoch": 0.85, "grad_norm": 0.9381733547839735, "learning_rate": 1.160164350679377e-06, "loss": 0.4916, "step": 10456 }, { "epoch": 0.85, "grad_norm": 0.9758864109716439, "learning_rate": 1.158933928788518e-06, "loss": 0.4989, "step": 10457 }, { "epoch": 0.85, "grad_norm": 0.9612788161193682, "learning_rate": 1.1577041195756954e-06, "loss": 0.4806, "step": 10458 }, { "epoch": 0.85, "grad_norm": 0.8620183622668911, "learning_rate": 1.1564749231261364e-06, "loss": 0.4454, "step": 10459 }, { "epoch": 0.85, "grad_norm": 0.984063658349551, "learning_rate": 1.155246339525019e-06, "loss": 0.546, "step": 10460 }, { "epoch": 0.85, "grad_norm": 0.9123286243679032, "learning_rate": 1.1540183688574847e-06, "loss": 0.4465, "step": 10461 }, { "epoch": 0.85, "grad_norm": 0.9541839139858166, "learning_rate": 1.1527910112086315e-06, "loss": 0.4698, "step": 10462 }, { "epoch": 0.85, "grad_norm": 1.0870922549213533, "learning_rate": 1.151564266663514e-06, "loss": 0.5383, "step": 10463 }, { "epoch": 0.85, "grad_norm": 0.9692547053719215, "learning_rate": 1.150338135307144e-06, "loss": 0.4782, "step": 10464 }, { "epoch": 0.85, "grad_norm": 0.8367552594366106, "learning_rate": 1.1491126172244915e-06, "loss": 0.4565, "step": 10465 }, { "epoch": 0.85, "grad_norm": 0.9307612165354187, "learning_rate": 1.147887712500486e-06, "loss": 0.4579, "step": 10466 }, { "epoch": 0.85, "grad_norm": 0.8715305493419948, "learning_rate": 1.1466634212200079e-06, "loss": 0.4211, "step": 10467 }, { "epoch": 0.85, "grad_norm": 0.9695418907066585, "learning_rate": 1.1454397434679022e-06, "loss": 0.5061, "step": 10468 }, { "epoch": 0.85, "grad_norm": 0.9374443923373761, "learning_rate": 1.1442166793289677e-06, "loss": 0.566, "step": 10469 }, { "epoch": 0.85, "grad_norm": 0.9450478119481537, "learning_rate": 1.1429942288879626e-06, "loss": 0.5034, "step": 10470 }, { "epoch": 0.85, "grad_norm": 0.983148186080234, "learning_rate": 1.1417723922296008e-06, "loss": 0.492, "step": 10471 }, { "epoch": 0.85, "grad_norm": 0.8598251224417236, "learning_rate": 1.1405511694385584e-06, "loss": 0.4438, "step": 10472 }, { "epoch": 0.85, "grad_norm": 0.9462950488597034, "learning_rate": 1.1393305605994587e-06, "loss": 0.4756, "step": 10473 }, { "epoch": 0.85, "grad_norm": 0.9482638073194297, "learning_rate": 1.1381105657968916e-06, "loss": 0.4996, "step": 10474 }, { "epoch": 0.85, "grad_norm": 0.860704600578187, "learning_rate": 1.1368911851154019e-06, "loss": 0.5384, "step": 10475 }, { "epoch": 0.85, "grad_norm": 0.9570375760818484, "learning_rate": 1.1356724186394918e-06, "loss": 0.4809, "step": 10476 }, { "epoch": 0.85, "grad_norm": 0.9634767434215831, "learning_rate": 1.1344542664536196e-06, "loss": 0.5581, "step": 10477 }, { "epoch": 0.85, "grad_norm": 0.9795070369522804, "learning_rate": 1.1332367286422064e-06, "loss": 0.5486, "step": 10478 }, { "epoch": 0.85, "grad_norm": 0.8677691664078212, "learning_rate": 1.1320198052896203e-06, "loss": 0.474, "step": 10479 }, { "epoch": 0.85, "grad_norm": 0.9434054008110407, "learning_rate": 1.130803496480195e-06, "loss": 0.524, "step": 10480 }, { "epoch": 0.85, "grad_norm": 0.9553611331557014, "learning_rate": 1.129587802298222e-06, "loss": 0.4732, "step": 10481 }, { "epoch": 0.85, "grad_norm": 0.8139743634257981, "learning_rate": 1.128372722827945e-06, "loss": 0.4421, "step": 10482 }, { "epoch": 0.85, "grad_norm": 0.8881985728660753, "learning_rate": 1.12715825815357e-06, "loss": 0.4171, "step": 10483 }, { "epoch": 0.85, "grad_norm": 0.9326228528390237, "learning_rate": 1.1259444083592585e-06, "loss": 0.4373, "step": 10484 }, { "epoch": 0.85, "grad_norm": 0.958442471726399, "learning_rate": 1.1247311735291255e-06, "loss": 0.4941, "step": 10485 }, { "epoch": 0.85, "grad_norm": 0.9091981400958642, "learning_rate": 1.1235185537472537e-06, "loss": 0.5158, "step": 10486 }, { "epoch": 0.85, "grad_norm": 0.9715206218825831, "learning_rate": 1.1223065490976692e-06, "loss": 0.5159, "step": 10487 }, { "epoch": 0.85, "grad_norm": 0.8629851659158863, "learning_rate": 1.1210951596643682e-06, "loss": 0.4602, "step": 10488 }, { "epoch": 0.85, "grad_norm": 0.9510654460373925, "learning_rate": 1.1198843855312958e-06, "loss": 0.5051, "step": 10489 }, { "epoch": 0.85, "grad_norm": 0.9736674492886307, "learning_rate": 1.1186742267823614e-06, "loss": 0.5066, "step": 10490 }, { "epoch": 0.85, "grad_norm": 1.041070879920255, "learning_rate": 1.1174646835014213e-06, "loss": 0.5287, "step": 10491 }, { "epoch": 0.85, "grad_norm": 0.9975479878026994, "learning_rate": 1.1162557557723042e-06, "loss": 0.4458, "step": 10492 }, { "epoch": 0.85, "grad_norm": 0.9844734373597187, "learning_rate": 1.1150474436787806e-06, "loss": 0.5017, "step": 10493 }, { "epoch": 0.85, "grad_norm": 0.9716899197564739, "learning_rate": 1.113839747304588e-06, "loss": 0.4888, "step": 10494 }, { "epoch": 0.85, "grad_norm": 0.9370907891923271, "learning_rate": 1.1126326667334196e-06, "loss": 0.4597, "step": 10495 }, { "epoch": 0.85, "grad_norm": 0.9241490676999198, "learning_rate": 1.1114262020489264e-06, "loss": 0.456, "step": 10496 }, { "epoch": 0.85, "grad_norm": 0.8374254769318923, "learning_rate": 1.1102203533347089e-06, "loss": 0.4738, "step": 10497 }, { "epoch": 0.85, "grad_norm": 0.8877474626590348, "learning_rate": 1.1090151206743393e-06, "loss": 0.4866, "step": 10498 }, { "epoch": 0.85, "grad_norm": 0.9475470780816515, "learning_rate": 1.1078105041513343e-06, "loss": 0.4918, "step": 10499 }, { "epoch": 0.85, "grad_norm": 0.9275589409212524, "learning_rate": 1.1066065038491735e-06, "loss": 0.4926, "step": 10500 }, { "epoch": 0.85, "grad_norm": 0.8795569509194492, "learning_rate": 1.1054031198512938e-06, "loss": 0.4436, "step": 10501 }, { "epoch": 0.85, "grad_norm": 0.8691998892476426, "learning_rate": 1.1042003522410882e-06, "loss": 0.4589, "step": 10502 }, { "epoch": 0.85, "grad_norm": 0.9802431120077391, "learning_rate": 1.102998201101908e-06, "loss": 0.4924, "step": 10503 }, { "epoch": 0.85, "grad_norm": 0.9531451083089679, "learning_rate": 1.1017966665170632e-06, "loss": 0.4823, "step": 10504 }, { "epoch": 0.85, "grad_norm": 0.8918469153039225, "learning_rate": 1.1005957485698115e-06, "loss": 0.4249, "step": 10505 }, { "epoch": 0.85, "grad_norm": 0.8277939680529098, "learning_rate": 1.0993954473433854e-06, "loss": 0.474, "step": 10506 }, { "epoch": 0.85, "grad_norm": 0.9217263067232511, "learning_rate": 1.0981957629209584e-06, "loss": 0.4761, "step": 10507 }, { "epoch": 0.85, "grad_norm": 0.8490029541683193, "learning_rate": 1.096996695385668e-06, "loss": 0.4925, "step": 10508 }, { "epoch": 0.85, "grad_norm": 0.9751680603334909, "learning_rate": 1.0957982448206105e-06, "loss": 0.5118, "step": 10509 }, { "epoch": 0.85, "grad_norm": 0.9325847643538916, "learning_rate": 1.0946004113088381e-06, "loss": 0.4822, "step": 10510 }, { "epoch": 0.85, "grad_norm": 0.8462218962246554, "learning_rate": 1.0934031949333546e-06, "loss": 0.4943, "step": 10511 }, { "epoch": 0.85, "grad_norm": 0.8824927665558341, "learning_rate": 1.0922065957771332e-06, "loss": 0.4399, "step": 10512 }, { "epoch": 0.85, "grad_norm": 0.9460646657620165, "learning_rate": 1.0910106139230913e-06, "loss": 0.4845, "step": 10513 }, { "epoch": 0.85, "grad_norm": 1.010466471062037, "learning_rate": 1.0898152494541124e-06, "loss": 0.5382, "step": 10514 }, { "epoch": 0.85, "grad_norm": 1.0629758525690058, "learning_rate": 1.0886205024530327e-06, "loss": 0.4585, "step": 10515 }, { "epoch": 0.85, "grad_norm": 0.9760959154365196, "learning_rate": 1.0874263730026502e-06, "loss": 0.4848, "step": 10516 }, { "epoch": 0.85, "grad_norm": 0.9352196398984858, "learning_rate": 1.0862328611857109e-06, "loss": 0.4978, "step": 10517 }, { "epoch": 0.85, "grad_norm": 0.9499481473322007, "learning_rate": 1.085039967084931e-06, "loss": 0.4877, "step": 10518 }, { "epoch": 0.85, "grad_norm": 0.8820445238550193, "learning_rate": 1.083847690782972e-06, "loss": 0.4984, "step": 10519 }, { "epoch": 0.86, "grad_norm": 0.8875572183370747, "learning_rate": 1.0826560323624591e-06, "loss": 0.4319, "step": 10520 }, { "epoch": 0.86, "grad_norm": 0.9909807008910421, "learning_rate": 1.081464991905975e-06, "loss": 0.4924, "step": 10521 }, { "epoch": 0.86, "grad_norm": 0.923524185885788, "learning_rate": 1.080274569496057e-06, "loss": 0.4533, "step": 10522 }, { "epoch": 0.86, "grad_norm": 0.933531295118856, "learning_rate": 1.079084765215196e-06, "loss": 0.4548, "step": 10523 }, { "epoch": 0.86, "grad_norm": 0.9891970105285252, "learning_rate": 1.0778955791458513e-06, "loss": 0.4882, "step": 10524 }, { "epoch": 0.86, "grad_norm": 0.9041791644495576, "learning_rate": 1.076707011370427e-06, "loss": 0.4721, "step": 10525 }, { "epoch": 0.86, "grad_norm": 0.8494593233155678, "learning_rate": 1.075519061971293e-06, "loss": 0.4562, "step": 10526 }, { "epoch": 0.86, "grad_norm": 0.9632722345680252, "learning_rate": 1.074331731030771e-06, "loss": 0.5007, "step": 10527 }, { "epoch": 0.86, "grad_norm": 0.9903562734018743, "learning_rate": 1.0731450186311454e-06, "loss": 0.5376, "step": 10528 }, { "epoch": 0.86, "grad_norm": 0.8708142022778427, "learning_rate": 1.0719589248546469e-06, "loss": 0.4663, "step": 10529 }, { "epoch": 0.86, "grad_norm": 0.9039712102106406, "learning_rate": 1.070773449783481e-06, "loss": 0.4545, "step": 10530 }, { "epoch": 0.86, "grad_norm": 0.9787575342806338, "learning_rate": 1.069588593499793e-06, "loss": 0.4533, "step": 10531 }, { "epoch": 0.86, "grad_norm": 0.906339996163714, "learning_rate": 1.0684043560856928e-06, "loss": 0.4621, "step": 10532 }, { "epoch": 0.86, "grad_norm": 0.9995109838408539, "learning_rate": 1.067220737623249e-06, "loss": 0.4951, "step": 10533 }, { "epoch": 0.86, "grad_norm": 0.9337158432010438, "learning_rate": 1.0660377381944876e-06, "loss": 0.4803, "step": 10534 }, { "epoch": 0.86, "grad_norm": 0.8939043705730141, "learning_rate": 1.0648553578813813e-06, "loss": 0.5323, "step": 10535 }, { "epoch": 0.86, "grad_norm": 0.9014274053264499, "learning_rate": 1.0636735967658785e-06, "loss": 0.4608, "step": 10536 }, { "epoch": 0.86, "grad_norm": 0.9596380740959803, "learning_rate": 1.0624924549298666e-06, "loss": 0.5336, "step": 10537 }, { "epoch": 0.86, "grad_norm": 0.9184750871004159, "learning_rate": 1.061311932455199e-06, "loss": 0.472, "step": 10538 }, { "epoch": 0.86, "grad_norm": 0.9820976331082859, "learning_rate": 1.0601320294236872e-06, "loss": 0.4911, "step": 10539 }, { "epoch": 0.86, "grad_norm": 0.9766999400801963, "learning_rate": 1.0589527459170967e-06, "loss": 0.5862, "step": 10540 }, { "epoch": 0.86, "grad_norm": 0.9341904993019802, "learning_rate": 1.0577740820171468e-06, "loss": 0.4902, "step": 10541 }, { "epoch": 0.86, "grad_norm": 0.8466239721311413, "learning_rate": 1.0565960378055263e-06, "loss": 0.4735, "step": 10542 }, { "epoch": 0.86, "grad_norm": 0.9136573595418811, "learning_rate": 1.0554186133638643e-06, "loss": 0.4461, "step": 10543 }, { "epoch": 0.86, "grad_norm": 0.8845945755514436, "learning_rate": 1.0542418087737593e-06, "loss": 0.4808, "step": 10544 }, { "epoch": 0.86, "grad_norm": 0.9664680081992767, "learning_rate": 1.0530656241167613e-06, "loss": 0.4904, "step": 10545 }, { "epoch": 0.86, "grad_norm": 0.9369665727003559, "learning_rate": 1.051890059474382e-06, "loss": 0.4683, "step": 10546 }, { "epoch": 0.86, "grad_norm": 0.9122493649287006, "learning_rate": 1.0507151149280804e-06, "loss": 0.4651, "step": 10547 }, { "epoch": 0.86, "grad_norm": 0.9006968892963719, "learning_rate": 1.049540790559288e-06, "loss": 0.5234, "step": 10548 }, { "epoch": 0.86, "grad_norm": 0.894069435907734, "learning_rate": 1.0483670864493777e-06, "loss": 0.4758, "step": 10549 }, { "epoch": 0.86, "grad_norm": 0.9225468427309412, "learning_rate": 1.0471940026796878e-06, "loss": 0.3915, "step": 10550 }, { "epoch": 0.86, "grad_norm": 0.9415756739526433, "learning_rate": 1.046021539331512e-06, "loss": 0.5204, "step": 10551 }, { "epoch": 0.86, "grad_norm": 0.9793688442119916, "learning_rate": 1.0448496964861044e-06, "loss": 0.443, "step": 10552 }, { "epoch": 0.86, "grad_norm": 0.9143015605359969, "learning_rate": 1.0436784742246652e-06, "loss": 0.44, "step": 10553 }, { "epoch": 0.86, "grad_norm": 0.9957353280803909, "learning_rate": 1.0425078726283667e-06, "loss": 0.5254, "step": 10554 }, { "epoch": 0.86, "grad_norm": 1.095502882617657, "learning_rate": 1.0413378917783267e-06, "loss": 0.5872, "step": 10555 }, { "epoch": 0.86, "grad_norm": 1.018574131899571, "learning_rate": 1.0401685317556232e-06, "loss": 0.5307, "step": 10556 }, { "epoch": 0.86, "grad_norm": 1.0398118534675804, "learning_rate": 1.0389997926412942e-06, "loss": 0.5394, "step": 10557 }, { "epoch": 0.86, "grad_norm": 0.9024083075890255, "learning_rate": 1.037831674516332e-06, "loss": 0.4886, "step": 10558 }, { "epoch": 0.86, "grad_norm": 0.9127674671846514, "learning_rate": 1.0366641774616826e-06, "loss": 0.504, "step": 10559 }, { "epoch": 0.86, "grad_norm": 0.8940640858916057, "learning_rate": 1.0354973015582582e-06, "loss": 0.4301, "step": 10560 }, { "epoch": 0.86, "grad_norm": 0.9477172972392047, "learning_rate": 1.0343310468869171e-06, "loss": 0.4708, "step": 10561 }, { "epoch": 0.86, "grad_norm": 0.9215397595796041, "learning_rate": 1.033165413528483e-06, "loss": 0.4648, "step": 10562 }, { "epoch": 0.86, "grad_norm": 0.9870791421156618, "learning_rate": 1.0320004015637319e-06, "loss": 0.4537, "step": 10563 }, { "epoch": 0.86, "grad_norm": 0.9182962662979826, "learning_rate": 1.0308360110733994e-06, "loss": 0.4726, "step": 10564 }, { "epoch": 0.86, "grad_norm": 0.9498092146289201, "learning_rate": 1.0296722421381733e-06, "loss": 0.4866, "step": 10565 }, { "epoch": 0.86, "grad_norm": 1.003002994594343, "learning_rate": 1.0285090948387065e-06, "loss": 0.4608, "step": 10566 }, { "epoch": 0.86, "grad_norm": 0.8831435494424852, "learning_rate": 1.0273465692556006e-06, "loss": 0.4632, "step": 10567 }, { "epoch": 0.86, "grad_norm": 0.9964518745942484, "learning_rate": 1.0261846654694184e-06, "loss": 0.4706, "step": 10568 }, { "epoch": 0.86, "grad_norm": 0.8837425792652869, "learning_rate": 1.0250233835606805e-06, "loss": 0.4616, "step": 10569 }, { "epoch": 0.86, "grad_norm": 0.8938867215972366, "learning_rate": 1.0238627236098619e-06, "loss": 0.4166, "step": 10570 }, { "epoch": 0.86, "grad_norm": 0.8980332943128998, "learning_rate": 1.0227026856973909e-06, "loss": 0.5217, "step": 10571 }, { "epoch": 0.86, "grad_norm": 0.9194033443879248, "learning_rate": 1.0215432699036643e-06, "loss": 0.4979, "step": 10572 }, { "epoch": 0.86, "grad_norm": 1.0607340177473767, "learning_rate": 1.0203844763090243e-06, "loss": 0.4971, "step": 10573 }, { "epoch": 0.86, "grad_norm": 0.7714088041106679, "learning_rate": 1.0192263049937745e-06, "loss": 0.4175, "step": 10574 }, { "epoch": 0.86, "grad_norm": 0.8751825274589954, "learning_rate": 1.0180687560381764e-06, "loss": 0.4855, "step": 10575 }, { "epoch": 0.86, "grad_norm": 0.9285605238440512, "learning_rate": 1.0169118295224488e-06, "loss": 0.4911, "step": 10576 }, { "epoch": 0.86, "grad_norm": 0.8986266092966625, "learning_rate": 1.0157555255267581e-06, "loss": 0.4886, "step": 10577 }, { "epoch": 0.86, "grad_norm": 1.0638855808898067, "learning_rate": 1.0145998441312455e-06, "loss": 0.5261, "step": 10578 }, { "epoch": 0.86, "grad_norm": 0.9644109630050147, "learning_rate": 1.0134447854159913e-06, "loss": 0.4931, "step": 10579 }, { "epoch": 0.86, "grad_norm": 0.9806403030365124, "learning_rate": 1.0122903494610426e-06, "loss": 0.4277, "step": 10580 }, { "epoch": 0.86, "grad_norm": 0.9415593828325067, "learning_rate": 1.011136536346401e-06, "loss": 0.4543, "step": 10581 }, { "epoch": 0.86, "grad_norm": 0.8746620984571902, "learning_rate": 1.009983346152026e-06, "loss": 0.4319, "step": 10582 }, { "epoch": 0.86, "grad_norm": 0.9425651262758186, "learning_rate": 1.0088307789578266e-06, "loss": 0.4588, "step": 10583 }, { "epoch": 0.86, "grad_norm": 1.001027698974047, "learning_rate": 1.0076788348436827e-06, "loss": 0.5128, "step": 10584 }, { "epoch": 0.86, "grad_norm": 0.9521648979712655, "learning_rate": 1.0065275138894182e-06, "loss": 0.4982, "step": 10585 }, { "epoch": 0.86, "grad_norm": 0.8767109127856868, "learning_rate": 1.00537681617482e-06, "loss": 0.4291, "step": 10586 }, { "epoch": 0.86, "grad_norm": 0.9233767299328738, "learning_rate": 1.0042267417796292e-06, "loss": 0.4564, "step": 10587 }, { "epoch": 0.86, "grad_norm": 0.8818885464968629, "learning_rate": 1.0030772907835484e-06, "loss": 0.4689, "step": 10588 }, { "epoch": 0.86, "grad_norm": 0.8781990150711255, "learning_rate": 1.0019284632662274e-06, "loss": 0.4965, "step": 10589 }, { "epoch": 0.86, "grad_norm": 0.9439405869978958, "learning_rate": 1.000780259307287e-06, "loss": 0.5321, "step": 10590 }, { "epoch": 0.86, "grad_norm": 1.0273924686655207, "learning_rate": 9.996326789862897e-07, "loss": 0.5068, "step": 10591 }, { "epoch": 0.86, "grad_norm": 0.9148259332012102, "learning_rate": 9.984857223827637e-07, "loss": 0.5091, "step": 10592 }, { "epoch": 0.86, "grad_norm": 0.9510707852522677, "learning_rate": 9.97339389576194e-07, "loss": 0.4678, "step": 10593 }, { "epoch": 0.86, "grad_norm": 0.9856242720669215, "learning_rate": 9.961936806460194e-07, "loss": 0.4869, "step": 10594 }, { "epoch": 0.86, "grad_norm": 0.9662211813822499, "learning_rate": 9.950485956716349e-07, "loss": 0.478, "step": 10595 }, { "epoch": 0.86, "grad_norm": 0.9995700843908029, "learning_rate": 9.939041347323986e-07, "loss": 0.4696, "step": 10596 }, { "epoch": 0.86, "grad_norm": 0.9763678246865335, "learning_rate": 9.927602979076146e-07, "loss": 0.496, "step": 10597 }, { "epoch": 0.86, "grad_norm": 0.9359566163419666, "learning_rate": 9.91617085276554e-07, "loss": 0.5372, "step": 10598 }, { "epoch": 0.86, "grad_norm": 0.9833137658610944, "learning_rate": 9.904744969184377e-07, "loss": 0.5355, "step": 10599 }, { "epoch": 0.86, "grad_norm": 0.8636732279612003, "learning_rate": 9.89332532912447e-07, "loss": 0.4137, "step": 10600 }, { "epoch": 0.86, "grad_norm": 1.017749469597937, "learning_rate": 9.881911933377197e-07, "loss": 0.5332, "step": 10601 }, { "epoch": 0.86, "grad_norm": 0.9590708469565017, "learning_rate": 9.870504782733515e-07, "loss": 0.4546, "step": 10602 }, { "epoch": 0.86, "grad_norm": 0.875187921212929, "learning_rate": 9.85910387798389e-07, "loss": 0.4611, "step": 10603 }, { "epoch": 0.86, "grad_norm": 0.9268857981738736, "learning_rate": 9.8477092199184e-07, "loss": 0.4842, "step": 10604 }, { "epoch": 0.86, "grad_norm": 0.9375699374447519, "learning_rate": 9.836320809326704e-07, "loss": 0.524, "step": 10605 }, { "epoch": 0.86, "grad_norm": 1.0500442909628693, "learning_rate": 9.824938646998005e-07, "loss": 0.5259, "step": 10606 }, { "epoch": 0.86, "grad_norm": 0.9722789226492576, "learning_rate": 9.813562733721072e-07, "loss": 0.4629, "step": 10607 }, { "epoch": 0.86, "grad_norm": 0.8146063568062981, "learning_rate": 9.80219307028426e-07, "loss": 0.4079, "step": 10608 }, { "epoch": 0.86, "grad_norm": 1.0053294621074196, "learning_rate": 9.790829657475443e-07, "loss": 0.5076, "step": 10609 }, { "epoch": 0.86, "grad_norm": 0.9424850866724473, "learning_rate": 9.77947249608211e-07, "loss": 0.4957, "step": 10610 }, { "epoch": 0.86, "grad_norm": 0.9350508643965163, "learning_rate": 9.768121586891322e-07, "loss": 0.5018, "step": 10611 }, { "epoch": 0.86, "grad_norm": 0.9163531937596598, "learning_rate": 9.75677693068966e-07, "loss": 0.4881, "step": 10612 }, { "epoch": 0.86, "grad_norm": 1.0296132540056735, "learning_rate": 9.745438528263319e-07, "loss": 0.5175, "step": 10613 }, { "epoch": 0.86, "grad_norm": 0.907944549759089, "learning_rate": 9.734106380398022e-07, "loss": 0.4734, "step": 10614 }, { "epoch": 0.86, "grad_norm": 0.9589597429120368, "learning_rate": 9.722780487879124e-07, "loss": 0.4598, "step": 10615 }, { "epoch": 0.86, "grad_norm": 0.9015626629553082, "learning_rate": 9.711460851491427e-07, "loss": 0.4868, "step": 10616 }, { "epoch": 0.86, "grad_norm": 0.9564663604750182, "learning_rate": 9.700147472019416e-07, "loss": 0.5047, "step": 10617 }, { "epoch": 0.86, "grad_norm": 0.9176161448635084, "learning_rate": 9.688840350247085e-07, "loss": 0.464, "step": 10618 }, { "epoch": 0.86, "grad_norm": 0.9724339749225807, "learning_rate": 9.67753948695801e-07, "loss": 0.4263, "step": 10619 }, { "epoch": 0.86, "grad_norm": 1.0027820480186806, "learning_rate": 9.666244882935339e-07, "loss": 0.5469, "step": 10620 }, { "epoch": 0.86, "grad_norm": 1.0048726193719604, "learning_rate": 9.65495653896179e-07, "loss": 0.508, "step": 10621 }, { "epoch": 0.86, "grad_norm": 1.0317181478099673, "learning_rate": 9.643674455819597e-07, "loss": 0.4928, "step": 10622 }, { "epoch": 0.86, "grad_norm": 0.8361262764620936, "learning_rate": 9.632398634290607e-07, "loss": 0.4458, "step": 10623 }, { "epoch": 0.86, "grad_norm": 1.0278494857565195, "learning_rate": 9.621129075156256e-07, "loss": 0.4697, "step": 10624 }, { "epoch": 0.86, "grad_norm": 1.0175732831964581, "learning_rate": 9.60986577919748e-07, "loss": 0.5281, "step": 10625 }, { "epoch": 0.86, "grad_norm": 0.8834953084316942, "learning_rate": 9.598608747194826e-07, "loss": 0.4439, "step": 10626 }, { "epoch": 0.86, "grad_norm": 0.9985919876888131, "learning_rate": 9.587357979928414e-07, "loss": 0.5225, "step": 10627 }, { "epoch": 0.86, "grad_norm": 0.9658488397988649, "learning_rate": 9.576113478177905e-07, "loss": 0.4927, "step": 10628 }, { "epoch": 0.86, "grad_norm": 0.9535489105894337, "learning_rate": 9.564875242722516e-07, "loss": 0.472, "step": 10629 }, { "epoch": 0.86, "grad_norm": 1.0003753547033694, "learning_rate": 9.55364327434105e-07, "loss": 0.4753, "step": 10630 }, { "epoch": 0.86, "grad_norm": 0.8012825219021779, "learning_rate": 9.54241757381188e-07, "loss": 0.4108, "step": 10631 }, { "epoch": 0.86, "grad_norm": 1.0029078064696209, "learning_rate": 9.531198141912945e-07, "loss": 0.5254, "step": 10632 }, { "epoch": 0.86, "grad_norm": 1.0070371903454114, "learning_rate": 9.519984979421725e-07, "loss": 0.5345, "step": 10633 }, { "epoch": 0.86, "grad_norm": 0.9225172268554366, "learning_rate": 9.508778087115289e-07, "loss": 0.4823, "step": 10634 }, { "epoch": 0.86, "grad_norm": 0.9962315849223458, "learning_rate": 9.497577465770292e-07, "loss": 0.5062, "step": 10635 }, { "epoch": 0.86, "grad_norm": 0.9698496145273163, "learning_rate": 9.486383116162878e-07, "loss": 0.4988, "step": 10636 }, { "epoch": 0.86, "grad_norm": 0.9541529632180376, "learning_rate": 9.475195039068818e-07, "loss": 0.4303, "step": 10637 }, { "epoch": 0.86, "grad_norm": 0.9132257518289799, "learning_rate": 9.464013235263458e-07, "loss": 0.46, "step": 10638 }, { "epoch": 0.86, "grad_norm": 0.9288673980028853, "learning_rate": 9.452837705521678e-07, "loss": 0.4592, "step": 10639 }, { "epoch": 0.86, "grad_norm": 0.8436898988686481, "learning_rate": 9.441668450617924e-07, "loss": 0.4092, "step": 10640 }, { "epoch": 0.86, "grad_norm": 0.9952227583228156, "learning_rate": 9.430505471326246e-07, "loss": 0.503, "step": 10641 }, { "epoch": 0.86, "grad_norm": 0.9311932065408992, "learning_rate": 9.419348768420178e-07, "loss": 0.4727, "step": 10642 }, { "epoch": 0.87, "grad_norm": 0.9032988698297807, "learning_rate": 9.408198342672903e-07, "loss": 0.4627, "step": 10643 }, { "epoch": 0.87, "grad_norm": 0.8884037877380864, "learning_rate": 9.397054194857125e-07, "loss": 0.4278, "step": 10644 }, { "epoch": 0.87, "grad_norm": 0.923458523901903, "learning_rate": 9.385916325745115e-07, "loss": 0.4998, "step": 10645 }, { "epoch": 0.87, "grad_norm": 0.8204070866581368, "learning_rate": 9.374784736108744e-07, "loss": 0.4271, "step": 10646 }, { "epoch": 0.87, "grad_norm": 0.9939294169077609, "learning_rate": 9.363659426719418e-07, "loss": 0.5363, "step": 10647 }, { "epoch": 0.87, "grad_norm": 0.869244868759126, "learning_rate": 9.352540398348087e-07, "loss": 0.4782, "step": 10648 }, { "epoch": 0.87, "grad_norm": 1.0670999004444721, "learning_rate": 9.3414276517653e-07, "loss": 0.4762, "step": 10649 }, { "epoch": 0.87, "grad_norm": 0.9443594709032453, "learning_rate": 9.330321187741154e-07, "loss": 0.511, "step": 10650 }, { "epoch": 0.87, "grad_norm": 0.8889307971922469, "learning_rate": 9.319221007045331e-07, "loss": 0.5285, "step": 10651 }, { "epoch": 0.87, "grad_norm": 0.8981113533125855, "learning_rate": 9.308127110447063e-07, "loss": 0.4873, "step": 10652 }, { "epoch": 0.87, "grad_norm": 0.9017810176305928, "learning_rate": 9.297039498715155e-07, "loss": 0.4455, "step": 10653 }, { "epoch": 0.87, "grad_norm": 1.0546082206019811, "learning_rate": 9.285958172617926e-07, "loss": 0.5087, "step": 10654 }, { "epoch": 0.87, "grad_norm": 1.0022472235181799, "learning_rate": 9.274883132923362e-07, "loss": 0.4739, "step": 10655 }, { "epoch": 0.87, "grad_norm": 0.9002821987327692, "learning_rate": 9.263814380398917e-07, "loss": 0.4679, "step": 10656 }, { "epoch": 0.87, "grad_norm": 1.0302589475729587, "learning_rate": 9.252751915811642e-07, "loss": 0.5019, "step": 10657 }, { "epoch": 0.87, "grad_norm": 0.9775376726614513, "learning_rate": 9.241695739928169e-07, "loss": 0.5532, "step": 10658 }, { "epoch": 0.87, "grad_norm": 1.0537623712647077, "learning_rate": 9.230645853514697e-07, "loss": 0.4992, "step": 10659 }, { "epoch": 0.87, "grad_norm": 0.8776796418795295, "learning_rate": 9.219602257336913e-07, "loss": 0.4862, "step": 10660 }, { "epoch": 0.87, "grad_norm": 0.962781037418971, "learning_rate": 9.208564952160215e-07, "loss": 0.493, "step": 10661 }, { "epoch": 0.87, "grad_norm": 1.0364614771872267, "learning_rate": 9.197533938749414e-07, "loss": 0.5501, "step": 10662 }, { "epoch": 0.87, "grad_norm": 0.9556984026805189, "learning_rate": 9.186509217868966e-07, "loss": 0.4743, "step": 10663 }, { "epoch": 0.87, "grad_norm": 0.9616683878097968, "learning_rate": 9.175490790282882e-07, "loss": 0.4714, "step": 10664 }, { "epoch": 0.87, "grad_norm": 0.9021918453873632, "learning_rate": 9.164478656754739e-07, "loss": 0.4759, "step": 10665 }, { "epoch": 0.87, "grad_norm": 0.9790575486335737, "learning_rate": 9.153472818047627e-07, "loss": 0.5063, "step": 10666 }, { "epoch": 0.87, "grad_norm": 0.8468403295990357, "learning_rate": 9.142473274924291e-07, "loss": 0.4937, "step": 10667 }, { "epoch": 0.87, "grad_norm": 0.9145902214127684, "learning_rate": 9.131480028146955e-07, "loss": 0.4602, "step": 10668 }, { "epoch": 0.87, "grad_norm": 0.9168147252846369, "learning_rate": 9.120493078477455e-07, "loss": 0.4463, "step": 10669 }, { "epoch": 0.87, "grad_norm": 0.8323131927350101, "learning_rate": 9.109512426677169e-07, "loss": 0.4231, "step": 10670 }, { "epoch": 0.87, "grad_norm": 0.9473418109738616, "learning_rate": 9.09853807350708e-07, "loss": 0.5073, "step": 10671 }, { "epoch": 0.87, "grad_norm": 0.9973434046450055, "learning_rate": 9.08757001972762e-07, "loss": 0.5461, "step": 10672 }, { "epoch": 0.87, "grad_norm": 0.9805885913133771, "learning_rate": 9.076608266098974e-07, "loss": 0.4762, "step": 10673 }, { "epoch": 0.87, "grad_norm": 0.9568985687028682, "learning_rate": 9.065652813380699e-07, "loss": 0.5519, "step": 10674 }, { "epoch": 0.87, "grad_norm": 0.92717019860699, "learning_rate": 9.054703662332021e-07, "loss": 0.477, "step": 10675 }, { "epoch": 0.87, "grad_norm": 0.8976396587311715, "learning_rate": 9.04376081371171e-07, "loss": 0.4784, "step": 10676 }, { "epoch": 0.87, "grad_norm": 0.8976010389778204, "learning_rate": 9.032824268278129e-07, "loss": 0.411, "step": 10677 }, { "epoch": 0.87, "grad_norm": 0.9962401365447401, "learning_rate": 9.021894026789091e-07, "loss": 0.5412, "step": 10678 }, { "epoch": 0.87, "grad_norm": 0.9492064342678247, "learning_rate": 9.010970090002135e-07, "loss": 0.4523, "step": 10679 }, { "epoch": 0.87, "grad_norm": 0.8457178835348091, "learning_rate": 9.000052458674224e-07, "loss": 0.4362, "step": 10680 }, { "epoch": 0.87, "grad_norm": 1.1243935410473074, "learning_rate": 8.989141133561974e-07, "loss": 0.5494, "step": 10681 }, { "epoch": 0.87, "grad_norm": 0.9755791044220888, "learning_rate": 8.978236115421501e-07, "loss": 0.5408, "step": 10682 }, { "epoch": 0.87, "grad_norm": 0.9845268895265248, "learning_rate": 8.967337405008558e-07, "loss": 0.4561, "step": 10683 }, { "epoch": 0.87, "grad_norm": 0.8520746587525816, "learning_rate": 8.956445003078351e-07, "loss": 0.4592, "step": 10684 }, { "epoch": 0.87, "grad_norm": 0.9837026512736496, "learning_rate": 8.945558910385776e-07, "loss": 0.4943, "step": 10685 }, { "epoch": 0.87, "grad_norm": 0.9434157332007918, "learning_rate": 8.934679127685197e-07, "loss": 0.4327, "step": 10686 }, { "epoch": 0.87, "grad_norm": 1.0156228938382437, "learning_rate": 8.923805655730577e-07, "loss": 0.5323, "step": 10687 }, { "epoch": 0.87, "grad_norm": 1.0581383360974685, "learning_rate": 8.912938495275436e-07, "loss": 0.4858, "step": 10688 }, { "epoch": 0.87, "grad_norm": 0.989591810706298, "learning_rate": 8.902077647072883e-07, "loss": 0.4476, "step": 10689 }, { "epoch": 0.87, "grad_norm": 0.9795339881955004, "learning_rate": 8.891223111875513e-07, "loss": 0.438, "step": 10690 }, { "epoch": 0.87, "grad_norm": 0.8201781263068513, "learning_rate": 8.880374890435595e-07, "loss": 0.4644, "step": 10691 }, { "epoch": 0.87, "grad_norm": 0.9322419967945786, "learning_rate": 8.869532983504859e-07, "loss": 0.4709, "step": 10692 }, { "epoch": 0.87, "grad_norm": 0.8780674815289228, "learning_rate": 8.858697391834658e-07, "loss": 0.4736, "step": 10693 }, { "epoch": 0.87, "grad_norm": 1.0073703685587576, "learning_rate": 8.847868116175883e-07, "loss": 0.5376, "step": 10694 }, { "epoch": 0.87, "grad_norm": 0.8760429696909396, "learning_rate": 8.837045157279023e-07, "loss": 0.4444, "step": 10695 }, { "epoch": 0.87, "grad_norm": 0.8900133031097341, "learning_rate": 8.82622851589402e-07, "loss": 0.516, "step": 10696 }, { "epoch": 0.87, "grad_norm": 1.0236461508169916, "learning_rate": 8.815418192770553e-07, "loss": 0.5519, "step": 10697 }, { "epoch": 0.87, "grad_norm": 0.9442346465952964, "learning_rate": 8.804614188657712e-07, "loss": 0.5174, "step": 10698 }, { "epoch": 0.87, "grad_norm": 1.0651480720040911, "learning_rate": 8.793816504304209e-07, "loss": 0.4848, "step": 10699 }, { "epoch": 0.87, "grad_norm": 0.9156691302175118, "learning_rate": 8.783025140458334e-07, "loss": 0.4594, "step": 10700 }, { "epoch": 0.87, "grad_norm": 0.9191454740517091, "learning_rate": 8.772240097867912e-07, "loss": 0.4513, "step": 10701 }, { "epoch": 0.87, "grad_norm": 0.842071309153491, "learning_rate": 8.761461377280311e-07, "loss": 0.4486, "step": 10702 }, { "epoch": 0.87, "grad_norm": 1.001306254241895, "learning_rate": 8.750688979442534e-07, "loss": 0.5239, "step": 10703 }, { "epoch": 0.87, "grad_norm": 0.8248599393774404, "learning_rate": 8.739922905101051e-07, "loss": 0.4204, "step": 10704 }, { "epoch": 0.87, "grad_norm": 0.9491459913050245, "learning_rate": 8.729163155001975e-07, "loss": 0.5073, "step": 10705 }, { "epoch": 0.87, "grad_norm": 1.2972173703532743, "learning_rate": 8.71840972989092e-07, "loss": 0.4949, "step": 10706 }, { "epoch": 0.87, "grad_norm": 0.8906322183288898, "learning_rate": 8.707662630513136e-07, "loss": 0.4899, "step": 10707 }, { "epoch": 0.87, "grad_norm": 0.976107320047754, "learning_rate": 8.696921857613317e-07, "loss": 0.521, "step": 10708 }, { "epoch": 0.87, "grad_norm": 0.9772816546434508, "learning_rate": 8.686187411935854e-07, "loss": 0.4138, "step": 10709 }, { "epoch": 0.87, "grad_norm": 0.9880903316140534, "learning_rate": 8.675459294224597e-07, "loss": 0.5624, "step": 10710 }, { "epoch": 0.87, "grad_norm": 0.9692471090732485, "learning_rate": 8.664737505223009e-07, "loss": 0.4838, "step": 10711 }, { "epoch": 0.87, "grad_norm": 0.9192191189203573, "learning_rate": 8.654022045674093e-07, "loss": 0.4744, "step": 10712 }, { "epoch": 0.87, "grad_norm": 0.8948526661133622, "learning_rate": 8.643312916320446e-07, "loss": 0.4988, "step": 10713 }, { "epoch": 0.87, "grad_norm": 0.9549774148056884, "learning_rate": 8.632610117904139e-07, "loss": 0.4531, "step": 10714 }, { "epoch": 0.87, "grad_norm": 0.8601975926054979, "learning_rate": 8.621913651166947e-07, "loss": 0.4734, "step": 10715 }, { "epoch": 0.87, "grad_norm": 0.9024200533857866, "learning_rate": 8.611223516850076e-07, "loss": 0.4966, "step": 10716 }, { "epoch": 0.87, "grad_norm": 0.8210537135329794, "learning_rate": 8.600539715694344e-07, "loss": 0.4491, "step": 10717 }, { "epoch": 0.87, "grad_norm": 1.0435012893610338, "learning_rate": 8.58986224844014e-07, "loss": 0.492, "step": 10718 }, { "epoch": 0.87, "grad_norm": 0.905447549472864, "learning_rate": 8.579191115827423e-07, "loss": 0.4656, "step": 10719 }, { "epoch": 0.87, "grad_norm": 0.9702528122983963, "learning_rate": 8.568526318595638e-07, "loss": 0.5027, "step": 10720 }, { "epoch": 0.87, "grad_norm": 0.9028108539852475, "learning_rate": 8.557867857483915e-07, "loss": 0.4443, "step": 10721 }, { "epoch": 0.87, "grad_norm": 0.9828446495002682, "learning_rate": 8.547215733230818e-07, "loss": 0.4912, "step": 10722 }, { "epoch": 0.87, "grad_norm": 1.1314179435472174, "learning_rate": 8.536569946574546e-07, "loss": 0.5257, "step": 10723 }, { "epoch": 0.87, "grad_norm": 1.0384549778518128, "learning_rate": 8.525930498252855e-07, "loss": 0.5085, "step": 10724 }, { "epoch": 0.87, "grad_norm": 1.0259094490019722, "learning_rate": 8.515297389003063e-07, "loss": 0.4782, "step": 10725 }, { "epoch": 0.87, "grad_norm": 0.8670612352353387, "learning_rate": 8.504670619561983e-07, "loss": 0.4417, "step": 10726 }, { "epoch": 0.87, "grad_norm": 0.9064697350293002, "learning_rate": 8.494050190666103e-07, "loss": 0.4855, "step": 10727 }, { "epoch": 0.87, "grad_norm": 0.9116173287893347, "learning_rate": 8.483436103051357e-07, "loss": 0.4691, "step": 10728 }, { "epoch": 0.87, "grad_norm": 0.9671185448154255, "learning_rate": 8.472828357453323e-07, "loss": 0.5249, "step": 10729 }, { "epoch": 0.87, "grad_norm": 0.9073917664980375, "learning_rate": 8.4622269546071e-07, "loss": 0.4435, "step": 10730 }, { "epoch": 0.87, "grad_norm": 0.9893708961286303, "learning_rate": 8.45163189524737e-07, "loss": 0.5178, "step": 10731 }, { "epoch": 0.87, "grad_norm": 0.8057742513191817, "learning_rate": 8.441043180108299e-07, "loss": 0.4719, "step": 10732 }, { "epoch": 0.87, "grad_norm": 0.9424113091164239, "learning_rate": 8.430460809923768e-07, "loss": 0.4641, "step": 10733 }, { "epoch": 0.87, "grad_norm": 1.0577210260964864, "learning_rate": 8.419884785427068e-07, "loss": 0.5043, "step": 10734 }, { "epoch": 0.87, "grad_norm": 1.017229119006042, "learning_rate": 8.409315107351112e-07, "loss": 0.4864, "step": 10735 }, { "epoch": 0.87, "grad_norm": 0.9160592998089141, "learning_rate": 8.398751776428393e-07, "loss": 0.483, "step": 10736 }, { "epoch": 0.87, "grad_norm": 0.8600297093969423, "learning_rate": 8.388194793390924e-07, "loss": 0.4128, "step": 10737 }, { "epoch": 0.87, "grad_norm": 0.8505559599580625, "learning_rate": 8.377644158970277e-07, "loss": 0.4854, "step": 10738 }, { "epoch": 0.87, "grad_norm": 0.9108433349901007, "learning_rate": 8.367099873897644e-07, "loss": 0.541, "step": 10739 }, { "epoch": 0.87, "grad_norm": 0.9776569227810032, "learning_rate": 8.356561938903707e-07, "loss": 0.5111, "step": 10740 }, { "epoch": 0.87, "grad_norm": 0.9698205117012005, "learning_rate": 8.346030354718727e-07, "loss": 0.4707, "step": 10741 }, { "epoch": 0.87, "grad_norm": 0.9094940212730316, "learning_rate": 8.335505122072551e-07, "loss": 0.4906, "step": 10742 }, { "epoch": 0.87, "grad_norm": 0.9679125119758466, "learning_rate": 8.324986241694566e-07, "loss": 0.4892, "step": 10743 }, { "epoch": 0.87, "grad_norm": 0.9341757886112941, "learning_rate": 8.31447371431372e-07, "loss": 0.4337, "step": 10744 }, { "epoch": 0.87, "grad_norm": 0.8865816044503693, "learning_rate": 8.303967540658531e-07, "loss": 0.4954, "step": 10745 }, { "epoch": 0.87, "grad_norm": 1.0152423724483588, "learning_rate": 8.293467721457038e-07, "loss": 0.5048, "step": 10746 }, { "epoch": 0.87, "grad_norm": 1.0778633743848953, "learning_rate": 8.282974257436904e-07, "loss": 0.4798, "step": 10747 }, { "epoch": 0.87, "grad_norm": 0.9664900899900147, "learning_rate": 8.272487149325281e-07, "loss": 0.504, "step": 10748 }, { "epoch": 0.87, "grad_norm": 0.8752434555018468, "learning_rate": 8.262006397848954e-07, "loss": 0.4622, "step": 10749 }, { "epoch": 0.87, "grad_norm": 0.9312621506287185, "learning_rate": 8.251532003734197e-07, "loss": 0.4799, "step": 10750 }, { "epoch": 0.87, "grad_norm": 0.9890867364594251, "learning_rate": 8.24106396770692e-07, "loss": 0.4658, "step": 10751 }, { "epoch": 0.87, "grad_norm": 0.7633149847762869, "learning_rate": 8.230602290492485e-07, "loss": 0.4521, "step": 10752 }, { "epoch": 0.87, "grad_norm": 0.8720687155099099, "learning_rate": 8.220146972815946e-07, "loss": 0.4463, "step": 10753 }, { "epoch": 0.87, "grad_norm": 0.9359147457315855, "learning_rate": 8.209698015401791e-07, "loss": 0.4866, "step": 10754 }, { "epoch": 0.87, "grad_norm": 0.958715743537337, "learning_rate": 8.19925541897415e-07, "loss": 0.5369, "step": 10755 }, { "epoch": 0.87, "grad_norm": 1.0407037298595796, "learning_rate": 8.188819184256669e-07, "loss": 0.5594, "step": 10756 }, { "epoch": 0.87, "grad_norm": 0.9444534547445447, "learning_rate": 8.178389311972612e-07, "loss": 0.5005, "step": 10757 }, { "epoch": 0.87, "grad_norm": 0.8801319988525427, "learning_rate": 8.167965802844691e-07, "loss": 0.4225, "step": 10758 }, { "epoch": 0.87, "grad_norm": 0.9247134193102192, "learning_rate": 8.157548657595327e-07, "loss": 0.4827, "step": 10759 }, { "epoch": 0.87, "grad_norm": 0.9280867544437789, "learning_rate": 8.147137876946354e-07, "loss": 0.427, "step": 10760 }, { "epoch": 0.87, "grad_norm": 0.9742366224152857, "learning_rate": 8.136733461619251e-07, "loss": 0.4937, "step": 10761 }, { "epoch": 0.87, "grad_norm": 0.9089698408617726, "learning_rate": 8.12633541233504e-07, "loss": 0.4541, "step": 10762 }, { "epoch": 0.87, "grad_norm": 0.9281472717487014, "learning_rate": 8.11594372981429e-07, "loss": 0.4826, "step": 10763 }, { "epoch": 0.87, "grad_norm": 0.8543302871144066, "learning_rate": 8.105558414777137e-07, "loss": 0.4622, "step": 10764 }, { "epoch": 0.87, "grad_norm": 0.8930667784194314, "learning_rate": 8.095179467943293e-07, "loss": 0.4421, "step": 10765 }, { "epoch": 0.88, "grad_norm": 1.0264206424491298, "learning_rate": 8.084806890031982e-07, "loss": 0.5471, "step": 10766 }, { "epoch": 0.88, "grad_norm": 0.8440403105832182, "learning_rate": 8.074440681762019e-07, "loss": 0.4352, "step": 10767 }, { "epoch": 0.88, "grad_norm": 0.9838278479205321, "learning_rate": 8.064080843851785e-07, "loss": 0.5155, "step": 10768 }, { "epoch": 0.88, "grad_norm": 0.9654762427606757, "learning_rate": 8.053727377019194e-07, "loss": 0.5047, "step": 10769 }, { "epoch": 0.88, "grad_norm": 0.9212823039571989, "learning_rate": 8.043380281981739e-07, "loss": 0.4555, "step": 10770 }, { "epoch": 0.88, "grad_norm": 0.9962002914394492, "learning_rate": 8.03303955945649e-07, "loss": 0.4671, "step": 10771 }, { "epoch": 0.88, "grad_norm": 0.9551262436521267, "learning_rate": 8.022705210159997e-07, "loss": 0.4842, "step": 10772 }, { "epoch": 0.88, "grad_norm": 1.0214526838284614, "learning_rate": 8.012377234808455e-07, "loss": 0.4988, "step": 10773 }, { "epoch": 0.88, "grad_norm": 0.8715570587673264, "learning_rate": 8.002055634117578e-07, "loss": 0.4734, "step": 10774 }, { "epoch": 0.88, "grad_norm": 0.9266589001004784, "learning_rate": 7.991740408802651e-07, "loss": 0.5139, "step": 10775 }, { "epoch": 0.88, "grad_norm": 1.0169422673348818, "learning_rate": 7.98143155957849e-07, "loss": 0.4969, "step": 10776 }, { "epoch": 0.88, "grad_norm": 1.0076439713948204, "learning_rate": 7.971129087159524e-07, "loss": 0.5081, "step": 10777 }, { "epoch": 0.88, "grad_norm": 0.9002053839249186, "learning_rate": 7.960832992259671e-07, "loss": 0.4021, "step": 10778 }, { "epoch": 0.88, "grad_norm": 0.9091524190557226, "learning_rate": 7.950543275592449e-07, "loss": 0.4644, "step": 10779 }, { "epoch": 0.88, "grad_norm": 1.0333079754352386, "learning_rate": 7.94025993787092e-07, "loss": 0.514, "step": 10780 }, { "epoch": 0.88, "grad_norm": 0.9823506631557626, "learning_rate": 7.929982979807738e-07, "loss": 0.5355, "step": 10781 }, { "epoch": 0.88, "grad_norm": 0.9000299533512557, "learning_rate": 7.919712402115054e-07, "loss": 0.422, "step": 10782 }, { "epoch": 0.88, "grad_norm": 1.0293709792663799, "learning_rate": 7.909448205504633e-07, "loss": 0.536, "step": 10783 }, { "epoch": 0.88, "grad_norm": 0.9110279158047219, "learning_rate": 7.899190390687783e-07, "loss": 0.4659, "step": 10784 }, { "epoch": 0.88, "grad_norm": 0.9492518309308532, "learning_rate": 7.888938958375325e-07, "loss": 0.5341, "step": 10785 }, { "epoch": 0.88, "grad_norm": 1.0609084182976722, "learning_rate": 7.878693909277702e-07, "loss": 0.5124, "step": 10786 }, { "epoch": 0.88, "grad_norm": 1.0009821719336907, "learning_rate": 7.868455244104878e-07, "loss": 0.4542, "step": 10787 }, { "epoch": 0.88, "grad_norm": 0.8843299754398897, "learning_rate": 7.858222963566386e-07, "loss": 0.4965, "step": 10788 }, { "epoch": 0.88, "grad_norm": 0.8676271737361042, "learning_rate": 7.847997068371305e-07, "loss": 0.4224, "step": 10789 }, { "epoch": 0.88, "grad_norm": 0.9568586304329556, "learning_rate": 7.83777755922831e-07, "loss": 0.4841, "step": 10790 }, { "epoch": 0.88, "grad_norm": 0.9568289014292817, "learning_rate": 7.827564436845569e-07, "loss": 0.4305, "step": 10791 }, { "epoch": 0.88, "grad_norm": 0.9007924177189064, "learning_rate": 7.81735770193085e-07, "loss": 0.4777, "step": 10792 }, { "epoch": 0.88, "grad_norm": 1.0356572627261904, "learning_rate": 7.807157355191475e-07, "loss": 0.4975, "step": 10793 }, { "epoch": 0.88, "grad_norm": 0.9153262416116543, "learning_rate": 7.796963397334323e-07, "loss": 0.4281, "step": 10794 }, { "epoch": 0.88, "grad_norm": 0.9513672979130858, "learning_rate": 7.786775829065829e-07, "loss": 0.5165, "step": 10795 }, { "epoch": 0.88, "grad_norm": 0.9632117362919128, "learning_rate": 7.776594651091995e-07, "loss": 0.487, "step": 10796 }, { "epoch": 0.88, "grad_norm": 0.9401247195442137, "learning_rate": 7.766419864118325e-07, "loss": 0.4191, "step": 10797 }, { "epoch": 0.88, "grad_norm": 0.9176285200237425, "learning_rate": 7.756251468849951e-07, "loss": 0.4436, "step": 10798 }, { "epoch": 0.88, "grad_norm": 0.8611310925495618, "learning_rate": 7.746089465991525e-07, "loss": 0.4971, "step": 10799 }, { "epoch": 0.88, "grad_norm": 0.9305324159779296, "learning_rate": 7.735933856247269e-07, "loss": 0.4685, "step": 10800 }, { "epoch": 0.88, "grad_norm": 0.9107641464674445, "learning_rate": 7.725784640320966e-07, "loss": 0.4931, "step": 10801 }, { "epoch": 0.88, "grad_norm": 0.9806018240071644, "learning_rate": 7.715641818915953e-07, "loss": 0.5458, "step": 10802 }, { "epoch": 0.88, "grad_norm": 0.9257241322988893, "learning_rate": 7.70550539273508e-07, "loss": 0.4582, "step": 10803 }, { "epoch": 0.88, "grad_norm": 0.8949923140018721, "learning_rate": 7.695375362480839e-07, "loss": 0.4443, "step": 10804 }, { "epoch": 0.88, "grad_norm": 1.0077955943455112, "learning_rate": 7.685251728855203e-07, "loss": 0.4722, "step": 10805 }, { "epoch": 0.88, "grad_norm": 0.8865636886945156, "learning_rate": 7.675134492559733e-07, "loss": 0.5064, "step": 10806 }, { "epoch": 0.88, "grad_norm": 1.0190987369655813, "learning_rate": 7.665023654295556e-07, "loss": 0.4824, "step": 10807 }, { "epoch": 0.88, "grad_norm": 0.9215282547212966, "learning_rate": 7.654919214763357e-07, "loss": 0.4839, "step": 10808 }, { "epoch": 0.88, "grad_norm": 0.9555787188807393, "learning_rate": 7.644821174663308e-07, "loss": 0.4842, "step": 10809 }, { "epoch": 0.88, "grad_norm": 0.9429919388180836, "learning_rate": 7.634729534695273e-07, "loss": 0.4577, "step": 10810 }, { "epoch": 0.88, "grad_norm": 0.8458292575931327, "learning_rate": 7.624644295558525e-07, "loss": 0.4069, "step": 10811 }, { "epoch": 0.88, "grad_norm": 0.8661683221124166, "learning_rate": 7.614565457952005e-07, "loss": 0.4705, "step": 10812 }, { "epoch": 0.88, "grad_norm": 0.9333855764767786, "learning_rate": 7.604493022574144e-07, "loss": 0.5074, "step": 10813 }, { "epoch": 0.88, "grad_norm": 0.9956363320104379, "learning_rate": 7.594426990122972e-07, "loss": 0.5214, "step": 10814 }, { "epoch": 0.88, "grad_norm": 0.9448774180808857, "learning_rate": 7.58436736129603e-07, "loss": 0.4514, "step": 10815 }, { "epoch": 0.88, "grad_norm": 0.9361065086180625, "learning_rate": 7.574314136790472e-07, "loss": 0.4398, "step": 10816 }, { "epoch": 0.88, "grad_norm": 0.9122194932866072, "learning_rate": 7.564267317302965e-07, "loss": 0.4273, "step": 10817 }, { "epoch": 0.88, "grad_norm": 0.9753113897717414, "learning_rate": 7.554226903529726e-07, "loss": 0.5101, "step": 10818 }, { "epoch": 0.88, "grad_norm": 0.914937109857151, "learning_rate": 7.544192896166569e-07, "loss": 0.4923, "step": 10819 }, { "epoch": 0.88, "grad_norm": 0.9201451087784769, "learning_rate": 7.534165295908857e-07, "loss": 0.4767, "step": 10820 }, { "epoch": 0.88, "grad_norm": 0.9650755957588284, "learning_rate": 7.524144103451436e-07, "loss": 0.4629, "step": 10821 }, { "epoch": 0.88, "grad_norm": 0.9972999225253916, "learning_rate": 7.514129319488839e-07, "loss": 0.5472, "step": 10822 }, { "epoch": 0.88, "grad_norm": 0.9633231367308103, "learning_rate": 7.504120944715021e-07, "loss": 0.4474, "step": 10823 }, { "epoch": 0.88, "grad_norm": 0.9708221061585173, "learning_rate": 7.494118979823584e-07, "loss": 0.4915, "step": 10824 }, { "epoch": 0.88, "grad_norm": 0.9672049816339459, "learning_rate": 7.48412342550765e-07, "loss": 0.5398, "step": 10825 }, { "epoch": 0.88, "grad_norm": 0.8233298296033982, "learning_rate": 7.47413428245991e-07, "loss": 0.4419, "step": 10826 }, { "epoch": 0.88, "grad_norm": 0.9569979497855227, "learning_rate": 7.464151551372567e-07, "loss": 0.4963, "step": 10827 }, { "epoch": 0.88, "grad_norm": 0.9372064091819019, "learning_rate": 7.454175232937478e-07, "loss": 0.557, "step": 10828 }, { "epoch": 0.88, "grad_norm": 0.9857819829803073, "learning_rate": 7.444205327845932e-07, "loss": 0.5188, "step": 10829 }, { "epoch": 0.88, "grad_norm": 0.8485091146600624, "learning_rate": 7.43424183678887e-07, "loss": 0.445, "step": 10830 }, { "epoch": 0.88, "grad_norm": 0.9093988479334427, "learning_rate": 7.424284760456734e-07, "loss": 0.4602, "step": 10831 }, { "epoch": 0.88, "grad_norm": 0.8697372110048283, "learning_rate": 7.414334099539577e-07, "loss": 0.3928, "step": 10832 }, { "epoch": 0.88, "grad_norm": 0.8500637707605192, "learning_rate": 7.404389854726901e-07, "loss": 0.4605, "step": 10833 }, { "epoch": 0.88, "grad_norm": 0.9450728224522065, "learning_rate": 7.39445202670791e-07, "loss": 0.495, "step": 10834 }, { "epoch": 0.88, "grad_norm": 0.9922786195641765, "learning_rate": 7.384520616171232e-07, "loss": 0.5178, "step": 10835 }, { "epoch": 0.88, "grad_norm": 0.9618426009645406, "learning_rate": 7.374595623805137e-07, "loss": 0.4966, "step": 10836 }, { "epoch": 0.88, "grad_norm": 0.9462859467040637, "learning_rate": 7.364677050297398e-07, "loss": 0.4739, "step": 10837 }, { "epoch": 0.88, "grad_norm": 1.0227678882461722, "learning_rate": 7.354764896335398e-07, "loss": 0.5576, "step": 10838 }, { "epoch": 0.88, "grad_norm": 0.9980851593236497, "learning_rate": 7.344859162605966e-07, "loss": 0.4946, "step": 10839 }, { "epoch": 0.88, "grad_norm": 0.9237865868634112, "learning_rate": 7.334959849795653e-07, "loss": 0.4839, "step": 10840 }, { "epoch": 0.88, "grad_norm": 0.9178993647703343, "learning_rate": 7.32506695859041e-07, "loss": 0.4641, "step": 10841 }, { "epoch": 0.88, "grad_norm": 0.9519479260351561, "learning_rate": 7.315180489675822e-07, "loss": 0.5189, "step": 10842 }, { "epoch": 0.88, "grad_norm": 0.960758292084623, "learning_rate": 7.305300443737018e-07, "loss": 0.5433, "step": 10843 }, { "epoch": 0.88, "grad_norm": 0.9801777325896154, "learning_rate": 7.295426821458684e-07, "loss": 0.5057, "step": 10844 }, { "epoch": 0.88, "grad_norm": 0.9286446248352245, "learning_rate": 7.285559623525018e-07, "loss": 0.4899, "step": 10845 }, { "epoch": 0.88, "grad_norm": 0.9399120756588593, "learning_rate": 7.275698850619861e-07, "loss": 0.5041, "step": 10846 }, { "epoch": 0.88, "grad_norm": 0.8796425834099239, "learning_rate": 7.265844503426512e-07, "loss": 0.4751, "step": 10847 }, { "epoch": 0.88, "grad_norm": 0.9478441410449189, "learning_rate": 7.255996582627878e-07, "loss": 0.4731, "step": 10848 }, { "epoch": 0.88, "grad_norm": 0.9602936406666883, "learning_rate": 7.246155088906426e-07, "loss": 0.4869, "step": 10849 }, { "epoch": 0.88, "grad_norm": 0.8380944081829118, "learning_rate": 7.236320022944166e-07, "loss": 0.4285, "step": 10850 }, { "epoch": 0.88, "grad_norm": 1.0144426336924712, "learning_rate": 7.226491385422618e-07, "loss": 0.5351, "step": 10851 }, { "epoch": 0.88, "grad_norm": 0.9866428772036293, "learning_rate": 7.21666917702295e-07, "loss": 0.4802, "step": 10852 }, { "epoch": 0.88, "grad_norm": 1.0833877967513377, "learning_rate": 7.206853398425806e-07, "loss": 0.4986, "step": 10853 }, { "epoch": 0.88, "grad_norm": 0.9699105979255526, "learning_rate": 7.197044050311408e-07, "loss": 0.495, "step": 10854 }, { "epoch": 0.88, "grad_norm": 0.8461947804009808, "learning_rate": 7.187241133359535e-07, "loss": 0.4701, "step": 10855 }, { "epoch": 0.88, "grad_norm": 1.0087068121306357, "learning_rate": 7.177444648249554e-07, "loss": 0.5119, "step": 10856 }, { "epoch": 0.88, "grad_norm": 0.927842032367001, "learning_rate": 7.167654595660279e-07, "loss": 0.4756, "step": 10857 }, { "epoch": 0.88, "grad_norm": 1.0954507707339836, "learning_rate": 7.157870976270243e-07, "loss": 0.5273, "step": 10858 }, { "epoch": 0.88, "grad_norm": 0.9649063436888304, "learning_rate": 7.148093790757371e-07, "loss": 0.5375, "step": 10859 }, { "epoch": 0.88, "grad_norm": 0.8600627356479995, "learning_rate": 7.138323039799256e-07, "loss": 0.4304, "step": 10860 }, { "epoch": 0.88, "grad_norm": 1.0326935851609784, "learning_rate": 7.128558724072976e-07, "loss": 0.489, "step": 10861 }, { "epoch": 0.88, "grad_norm": 0.8968902380842844, "learning_rate": 7.118800844255214e-07, "loss": 0.5305, "step": 10862 }, { "epoch": 0.88, "grad_norm": 0.9482496308476169, "learning_rate": 7.109049401022139e-07, "loss": 0.5105, "step": 10863 }, { "epoch": 0.88, "grad_norm": 1.0149047865729435, "learning_rate": 7.099304395049566e-07, "loss": 0.554, "step": 10864 }, { "epoch": 0.88, "grad_norm": 0.9135791307678812, "learning_rate": 7.089565827012801e-07, "loss": 0.5188, "step": 10865 }, { "epoch": 0.88, "grad_norm": 0.8845742531981535, "learning_rate": 7.079833697586702e-07, "loss": 0.4493, "step": 10866 }, { "epoch": 0.88, "grad_norm": 1.0187836100942078, "learning_rate": 7.070108007445708e-07, "loss": 0.5208, "step": 10867 }, { "epoch": 0.88, "grad_norm": 0.9641153712470332, "learning_rate": 7.060388757263815e-07, "loss": 0.4721, "step": 10868 }, { "epoch": 0.88, "grad_norm": 0.9580963683240067, "learning_rate": 7.050675947714514e-07, "loss": 0.525, "step": 10869 }, { "epoch": 0.88, "grad_norm": 0.9812100492397047, "learning_rate": 7.040969579470947e-07, "loss": 0.4649, "step": 10870 }, { "epoch": 0.88, "grad_norm": 0.9725844565798529, "learning_rate": 7.03126965320573e-07, "loss": 0.5216, "step": 10871 }, { "epoch": 0.88, "grad_norm": 1.0468445203398065, "learning_rate": 7.02157616959106e-07, "loss": 0.4856, "step": 10872 }, { "epoch": 0.88, "grad_norm": 0.938268266463854, "learning_rate": 7.011889129298688e-07, "loss": 0.4724, "step": 10873 }, { "epoch": 0.88, "grad_norm": 0.9344094084559463, "learning_rate": 7.002208532999933e-07, "loss": 0.4824, "step": 10874 }, { "epoch": 0.88, "grad_norm": 1.0943727000052077, "learning_rate": 6.992534381365612e-07, "loss": 0.4795, "step": 10875 }, { "epoch": 0.88, "grad_norm": 0.9361214667194621, "learning_rate": 6.98286667506618e-07, "loss": 0.46, "step": 10876 }, { "epoch": 0.88, "grad_norm": 0.9742787944574746, "learning_rate": 6.973205414771567e-07, "loss": 0.4995, "step": 10877 }, { "epoch": 0.88, "grad_norm": 0.9823861996132334, "learning_rate": 6.963550601151326e-07, "loss": 0.4549, "step": 10878 }, { "epoch": 0.88, "grad_norm": 0.8710597836786871, "learning_rate": 6.953902234874488e-07, "loss": 0.4224, "step": 10879 }, { "epoch": 0.88, "grad_norm": 0.9640184246732854, "learning_rate": 6.944260316609696e-07, "loss": 0.4772, "step": 10880 }, { "epoch": 0.88, "grad_norm": 0.9443593319000072, "learning_rate": 6.934624847025117e-07, "loss": 0.4872, "step": 10881 }, { "epoch": 0.88, "grad_norm": 0.9102355113413119, "learning_rate": 6.924995826788516e-07, "loss": 0.4886, "step": 10882 }, { "epoch": 0.88, "grad_norm": 0.8545741021936641, "learning_rate": 6.915373256567104e-07, "loss": 0.4124, "step": 10883 }, { "epoch": 0.88, "grad_norm": 0.8849429457331734, "learning_rate": 6.905757137027791e-07, "loss": 0.5111, "step": 10884 }, { "epoch": 0.88, "grad_norm": 0.9274095690131027, "learning_rate": 6.896147468836923e-07, "loss": 0.4492, "step": 10885 }, { "epoch": 0.88, "grad_norm": 0.9868491348465521, "learning_rate": 6.886544252660455e-07, "loss": 0.4611, "step": 10886 }, { "epoch": 0.88, "grad_norm": 0.9705796300856186, "learning_rate": 6.876947489163877e-07, "loss": 0.5528, "step": 10887 }, { "epoch": 0.88, "grad_norm": 0.8459196623914732, "learning_rate": 6.867357179012257e-07, "loss": 0.4765, "step": 10888 }, { "epoch": 0.88, "grad_norm": 0.9718706349770099, "learning_rate": 6.857773322870132e-07, "loss": 0.5085, "step": 10889 }, { "epoch": 0.89, "grad_norm": 1.0231757989355228, "learning_rate": 6.848195921401745e-07, "loss": 0.5608, "step": 10890 }, { "epoch": 0.89, "grad_norm": 0.9926785324714231, "learning_rate": 6.838624975270724e-07, "loss": 0.556, "step": 10891 }, { "epoch": 0.89, "grad_norm": 0.8648754199032377, "learning_rate": 6.829060485140371e-07, "loss": 0.4641, "step": 10892 }, { "epoch": 0.89, "grad_norm": 0.9565565653738822, "learning_rate": 6.819502451673477e-07, "loss": 0.4483, "step": 10893 }, { "epoch": 0.89, "grad_norm": 0.9210008914980209, "learning_rate": 6.809950875532434e-07, "loss": 0.4526, "step": 10894 }, { "epoch": 0.89, "grad_norm": 0.9462179584061005, "learning_rate": 6.800405757379103e-07, "loss": 0.458, "step": 10895 }, { "epoch": 0.89, "grad_norm": 0.9380696791158867, "learning_rate": 6.790867097875009e-07, "loss": 0.4847, "step": 10896 }, { "epoch": 0.89, "grad_norm": 0.8742693957607353, "learning_rate": 6.781334897681136e-07, "loss": 0.4233, "step": 10897 }, { "epoch": 0.89, "grad_norm": 0.9531969331088594, "learning_rate": 6.771809157458076e-07, "loss": 0.4273, "step": 10898 }, { "epoch": 0.89, "grad_norm": 0.9797729723207618, "learning_rate": 6.762289877865946e-07, "loss": 0.5301, "step": 10899 }, { "epoch": 0.89, "grad_norm": 0.9302885601403638, "learning_rate": 6.752777059564431e-07, "loss": 0.498, "step": 10900 }, { "epoch": 0.89, "grad_norm": 0.9756389310003124, "learning_rate": 6.743270703212734e-07, "loss": 0.5063, "step": 10901 }, { "epoch": 0.89, "grad_norm": 0.8954492108953634, "learning_rate": 6.733770809469686e-07, "loss": 0.4318, "step": 10902 }, { "epoch": 0.89, "grad_norm": 0.9195584154690314, "learning_rate": 6.724277378993582e-07, "loss": 0.463, "step": 10903 }, { "epoch": 0.89, "grad_norm": 0.8998700767219938, "learning_rate": 6.714790412442318e-07, "loss": 0.4945, "step": 10904 }, { "epoch": 0.89, "grad_norm": 0.9843924532722907, "learning_rate": 6.705309910473334e-07, "loss": 0.5438, "step": 10905 }, { "epoch": 0.89, "grad_norm": 0.8902657261878095, "learning_rate": 6.695835873743639e-07, "loss": 0.4437, "step": 10906 }, { "epoch": 0.89, "grad_norm": 0.9797169219451506, "learning_rate": 6.686368302909729e-07, "loss": 0.5289, "step": 10907 }, { "epoch": 0.89, "grad_norm": 1.074011462740834, "learning_rate": 6.676907198627758e-07, "loss": 0.5319, "step": 10908 }, { "epoch": 0.89, "grad_norm": 0.8932783969377371, "learning_rate": 6.667452561553312e-07, "loss": 0.4721, "step": 10909 }, { "epoch": 0.89, "grad_norm": 1.007817520165382, "learning_rate": 6.658004392341633e-07, "loss": 0.4764, "step": 10910 }, { "epoch": 0.89, "grad_norm": 0.9753415517502435, "learning_rate": 6.648562691647443e-07, "loss": 0.5611, "step": 10911 }, { "epoch": 0.89, "grad_norm": 0.8876119878062856, "learning_rate": 6.639127460125061e-07, "loss": 0.4456, "step": 10912 }, { "epoch": 0.89, "grad_norm": 0.8825191330874997, "learning_rate": 6.629698698428333e-07, "loss": 0.4839, "step": 10913 }, { "epoch": 0.89, "grad_norm": 0.8801074389167012, "learning_rate": 6.620276407210691e-07, "loss": 0.435, "step": 10914 }, { "epoch": 0.89, "grad_norm": 0.907815847163721, "learning_rate": 6.610860587125046e-07, "loss": 0.4488, "step": 10915 }, { "epoch": 0.89, "grad_norm": 0.8857465115695343, "learning_rate": 6.60145123882392e-07, "loss": 0.4601, "step": 10916 }, { "epoch": 0.89, "grad_norm": 1.0385760901105907, "learning_rate": 6.59204836295938e-07, "loss": 0.5081, "step": 10917 }, { "epoch": 0.89, "grad_norm": 0.8568978773404449, "learning_rate": 6.582651960183039e-07, "loss": 0.4356, "step": 10918 }, { "epoch": 0.89, "grad_norm": 0.988066516943962, "learning_rate": 6.573262031146055e-07, "loss": 0.4722, "step": 10919 }, { "epoch": 0.89, "grad_norm": 0.8581986976490972, "learning_rate": 6.563878576499161e-07, "loss": 0.4413, "step": 10920 }, { "epoch": 0.89, "grad_norm": 0.7929721822827478, "learning_rate": 6.554501596892582e-07, "loss": 0.4118, "step": 10921 }, { "epoch": 0.89, "grad_norm": 0.9392852715664914, "learning_rate": 6.545131092976165e-07, "loss": 0.5108, "step": 10922 }, { "epoch": 0.89, "grad_norm": 0.8630871527253957, "learning_rate": 6.535767065399268e-07, "loss": 0.4296, "step": 10923 }, { "epoch": 0.89, "grad_norm": 0.9950951675346889, "learning_rate": 6.526409514810805e-07, "loss": 0.4728, "step": 10924 }, { "epoch": 0.89, "grad_norm": 0.9887197876289022, "learning_rate": 6.517058441859248e-07, "loss": 0.4998, "step": 10925 }, { "epoch": 0.89, "grad_norm": 0.9341212052113047, "learning_rate": 6.507713847192643e-07, "loss": 0.488, "step": 10926 }, { "epoch": 0.89, "grad_norm": 0.9132481462189752, "learning_rate": 6.498375731458529e-07, "loss": 0.4746, "step": 10927 }, { "epoch": 0.89, "grad_norm": 0.9152190176417002, "learning_rate": 6.489044095304031e-07, "loss": 0.4698, "step": 10928 }, { "epoch": 0.89, "grad_norm": 0.90130991738369, "learning_rate": 6.479718939375846e-07, "loss": 0.4477, "step": 10929 }, { "epoch": 0.89, "grad_norm": 0.91942422370572, "learning_rate": 6.470400264320176e-07, "loss": 0.5178, "step": 10930 }, { "epoch": 0.89, "grad_norm": 1.0685961246932574, "learning_rate": 6.461088070782806e-07, "loss": 0.5362, "step": 10931 }, { "epoch": 0.89, "grad_norm": 0.8916758914035164, "learning_rate": 6.451782359409076e-07, "loss": 0.4689, "step": 10932 }, { "epoch": 0.89, "grad_norm": 0.872337058372946, "learning_rate": 6.442483130843857e-07, "loss": 0.4741, "step": 10933 }, { "epoch": 0.89, "grad_norm": 0.9024681459121816, "learning_rate": 6.433190385731558e-07, "loss": 0.4499, "step": 10934 }, { "epoch": 0.89, "grad_norm": 0.9169060857895663, "learning_rate": 6.423904124716174e-07, "loss": 0.4825, "step": 10935 }, { "epoch": 0.89, "grad_norm": 0.9640055684093934, "learning_rate": 6.414624348441223e-07, "loss": 0.4722, "step": 10936 }, { "epoch": 0.89, "grad_norm": 0.9129689547723554, "learning_rate": 6.405351057549803e-07, "loss": 0.4385, "step": 10937 }, { "epoch": 0.89, "grad_norm": 1.0296726654633954, "learning_rate": 6.396084252684532e-07, "loss": 0.4515, "step": 10938 }, { "epoch": 0.89, "grad_norm": 0.9326947083487568, "learning_rate": 6.386823934487619e-07, "loss": 0.5332, "step": 10939 }, { "epoch": 0.89, "grad_norm": 0.9960249084282743, "learning_rate": 6.377570103600749e-07, "loss": 0.4267, "step": 10940 }, { "epoch": 0.89, "grad_norm": 0.8830118434965389, "learning_rate": 6.368322760665235e-07, "loss": 0.467, "step": 10941 }, { "epoch": 0.89, "grad_norm": 0.9758683488076908, "learning_rate": 6.359081906321896e-07, "loss": 0.4982, "step": 10942 }, { "epoch": 0.89, "grad_norm": 0.9488295398679685, "learning_rate": 6.349847541211119e-07, "loss": 0.4827, "step": 10943 }, { "epoch": 0.89, "grad_norm": 0.9750251787938423, "learning_rate": 6.340619665972847e-07, "loss": 0.46, "step": 10944 }, { "epoch": 0.89, "grad_norm": 1.0234616225302677, "learning_rate": 6.33139828124657e-07, "loss": 0.4685, "step": 10945 }, { "epoch": 0.89, "grad_norm": 1.0455466925977317, "learning_rate": 6.322183387671299e-07, "loss": 0.4855, "step": 10946 }, { "epoch": 0.89, "grad_norm": 0.9754433315723913, "learning_rate": 6.312974985885612e-07, "loss": 0.4757, "step": 10947 }, { "epoch": 0.89, "grad_norm": 0.9357797846488201, "learning_rate": 6.303773076527663e-07, "loss": 0.5077, "step": 10948 }, { "epoch": 0.89, "grad_norm": 0.9014709161998172, "learning_rate": 6.294577660235146e-07, "loss": 0.4161, "step": 10949 }, { "epoch": 0.89, "grad_norm": 0.968122195587275, "learning_rate": 6.28538873764526e-07, "loss": 0.413, "step": 10950 }, { "epoch": 0.89, "grad_norm": 0.9321219084135877, "learning_rate": 6.276206309394839e-07, "loss": 0.4747, "step": 10951 }, { "epoch": 0.89, "grad_norm": 0.9857430350810453, "learning_rate": 6.267030376120154e-07, "loss": 0.4877, "step": 10952 }, { "epoch": 0.89, "grad_norm": 0.9257006654534886, "learning_rate": 6.25786093845715e-07, "loss": 0.5431, "step": 10953 }, { "epoch": 0.89, "grad_norm": 0.7813395537425469, "learning_rate": 6.248697997041219e-07, "loss": 0.3824, "step": 10954 }, { "epoch": 0.89, "grad_norm": 0.9357085513534893, "learning_rate": 6.239541552507367e-07, "loss": 0.5074, "step": 10955 }, { "epoch": 0.89, "grad_norm": 0.9781819541718835, "learning_rate": 6.230391605490105e-07, "loss": 0.5044, "step": 10956 }, { "epoch": 0.89, "grad_norm": 1.0309509193357862, "learning_rate": 6.22124815662356e-07, "loss": 0.5121, "step": 10957 }, { "epoch": 0.89, "grad_norm": 1.0824573399034947, "learning_rate": 6.212111206541305e-07, "loss": 0.5119, "step": 10958 }, { "epoch": 0.89, "grad_norm": 1.0262200241800767, "learning_rate": 6.202980755876575e-07, "loss": 0.4679, "step": 10959 }, { "epoch": 0.89, "grad_norm": 0.9675571655609572, "learning_rate": 6.193856805262078e-07, "loss": 0.483, "step": 10960 }, { "epoch": 0.89, "grad_norm": 0.9213920082584263, "learning_rate": 6.184739355330083e-07, "loss": 0.5297, "step": 10961 }, { "epoch": 0.89, "grad_norm": 1.052124394282512, "learning_rate": 6.175628406712452e-07, "loss": 0.4136, "step": 10962 }, { "epoch": 0.89, "grad_norm": 0.9134209043859236, "learning_rate": 6.166523960040549e-07, "loss": 0.4796, "step": 10963 }, { "epoch": 0.89, "grad_norm": 1.0368144330888827, "learning_rate": 6.157426015945289e-07, "loss": 0.5225, "step": 10964 }, { "epoch": 0.89, "grad_norm": 1.0664674535151522, "learning_rate": 6.148334575057191e-07, "loss": 0.5076, "step": 10965 }, { "epoch": 0.89, "grad_norm": 1.0175371255955596, "learning_rate": 6.139249638006251e-07, "loss": 0.5053, "step": 10966 }, { "epoch": 0.89, "grad_norm": 1.0233060979369943, "learning_rate": 6.130171205422053e-07, "loss": 0.4673, "step": 10967 }, { "epoch": 0.89, "grad_norm": 0.9631441370141315, "learning_rate": 6.121099277933728e-07, "loss": 0.5094, "step": 10968 }, { "epoch": 0.89, "grad_norm": 1.0057396520217246, "learning_rate": 6.112033856169974e-07, "loss": 0.5465, "step": 10969 }, { "epoch": 0.89, "grad_norm": 0.9561951029569171, "learning_rate": 6.102974940758954e-07, "loss": 0.4974, "step": 10970 }, { "epoch": 0.89, "grad_norm": 0.9866723996930895, "learning_rate": 6.093922532328522e-07, "loss": 0.542, "step": 10971 }, { "epoch": 0.89, "grad_norm": 1.052357825958954, "learning_rate": 6.084876631505943e-07, "loss": 0.5748, "step": 10972 }, { "epoch": 0.89, "grad_norm": 0.8457013897677658, "learning_rate": 6.075837238918114e-07, "loss": 0.4256, "step": 10973 }, { "epoch": 0.89, "grad_norm": 0.9843655807602537, "learning_rate": 6.066804355191458e-07, "loss": 0.5103, "step": 10974 }, { "epoch": 0.89, "grad_norm": 0.9114613086221101, "learning_rate": 6.057777980951951e-07, "loss": 0.3927, "step": 10975 }, { "epoch": 0.89, "grad_norm": 1.0110177998447731, "learning_rate": 6.048758116825071e-07, "loss": 0.5294, "step": 10976 }, { "epoch": 0.89, "grad_norm": 0.9426252423474122, "learning_rate": 6.039744763435951e-07, "loss": 0.4731, "step": 10977 }, { "epoch": 0.89, "grad_norm": 0.9087938185398369, "learning_rate": 6.030737921409169e-07, "loss": 0.4269, "step": 10978 }, { "epoch": 0.89, "grad_norm": 0.8901526154211044, "learning_rate": 6.021737591368892e-07, "loss": 0.483, "step": 10979 }, { "epoch": 0.89, "grad_norm": 0.929054298926748, "learning_rate": 6.012743773938845e-07, "loss": 0.4684, "step": 10980 }, { "epoch": 0.89, "grad_norm": 0.9998305319817405, "learning_rate": 6.003756469742294e-07, "loss": 0.5043, "step": 10981 }, { "epoch": 0.89, "grad_norm": 0.9654263645064304, "learning_rate": 5.994775679402021e-07, "loss": 0.4799, "step": 10982 }, { "epoch": 0.89, "grad_norm": 0.8812144427476013, "learning_rate": 5.985801403540436e-07, "loss": 0.4791, "step": 10983 }, { "epoch": 0.89, "grad_norm": 1.0297036512654214, "learning_rate": 5.976833642779422e-07, "loss": 0.4931, "step": 10984 }, { "epoch": 0.89, "grad_norm": 1.0959168985236405, "learning_rate": 5.967872397740427e-07, "loss": 0.5191, "step": 10985 }, { "epoch": 0.89, "grad_norm": 0.9548852649181496, "learning_rate": 5.958917669044472e-07, "loss": 0.4726, "step": 10986 }, { "epoch": 0.89, "grad_norm": 0.8860636695272686, "learning_rate": 5.949969457312122e-07, "loss": 0.4133, "step": 10987 }, { "epoch": 0.89, "grad_norm": 0.9551069857313534, "learning_rate": 5.941027763163432e-07, "loss": 0.4779, "step": 10988 }, { "epoch": 0.89, "grad_norm": 1.9587762764673151, "learning_rate": 5.93209258721812e-07, "loss": 0.4449, "step": 10989 }, { "epoch": 0.89, "grad_norm": 0.9764154005326386, "learning_rate": 5.923163930095344e-07, "loss": 0.5057, "step": 10990 }, { "epoch": 0.89, "grad_norm": 0.8694608537022765, "learning_rate": 5.914241792413855e-07, "loss": 0.433, "step": 10991 }, { "epoch": 0.89, "grad_norm": 0.8938395567692832, "learning_rate": 5.905326174791959e-07, "loss": 0.4547, "step": 10992 }, { "epoch": 0.89, "grad_norm": 0.8700693290069454, "learning_rate": 5.896417077847505e-07, "loss": 0.443, "step": 10993 }, { "epoch": 0.89, "grad_norm": 0.9365351744115846, "learning_rate": 5.887514502197855e-07, "loss": 0.5051, "step": 10994 }, { "epoch": 0.89, "grad_norm": 0.953988434394032, "learning_rate": 5.878618448460005e-07, "loss": 0.4766, "step": 10995 }, { "epoch": 0.89, "grad_norm": 1.0255533396332113, "learning_rate": 5.869728917250394e-07, "loss": 0.4966, "step": 10996 }, { "epoch": 0.89, "grad_norm": 0.9351106480000976, "learning_rate": 5.860845909185076e-07, "loss": 0.4979, "step": 10997 }, { "epoch": 0.89, "grad_norm": 0.9267295875852679, "learning_rate": 5.851969424879633e-07, "loss": 0.5132, "step": 10998 }, { "epoch": 0.89, "grad_norm": 1.044249424301423, "learning_rate": 5.843099464949198e-07, "loss": 0.5185, "step": 10999 }, { "epoch": 0.89, "grad_norm": 1.064022787085525, "learning_rate": 5.834236030008455e-07, "loss": 0.5138, "step": 11000 }, { "epoch": 0.89, "grad_norm": 0.8423978228749698, "learning_rate": 5.825379120671649e-07, "loss": 0.4502, "step": 11001 }, { "epoch": 0.89, "grad_norm": 0.8854906110383404, "learning_rate": 5.816528737552496e-07, "loss": 0.4603, "step": 11002 }, { "epoch": 0.89, "grad_norm": 0.9662435120986029, "learning_rate": 5.807684881264397e-07, "loss": 0.4929, "step": 11003 }, { "epoch": 0.89, "grad_norm": 0.8918757606853558, "learning_rate": 5.798847552420184e-07, "loss": 0.4759, "step": 11004 }, { "epoch": 0.89, "grad_norm": 0.9781143034468275, "learning_rate": 5.790016751632266e-07, "loss": 0.529, "step": 11005 }, { "epoch": 0.89, "grad_norm": 0.9408427539253995, "learning_rate": 5.781192479512621e-07, "loss": 0.4402, "step": 11006 }, { "epoch": 0.89, "grad_norm": 0.9169600366711169, "learning_rate": 5.772374736672793e-07, "loss": 0.4747, "step": 11007 }, { "epoch": 0.89, "grad_norm": 0.9420777741526181, "learning_rate": 5.763563523723769e-07, "loss": 0.4748, "step": 11008 }, { "epoch": 0.89, "grad_norm": 0.9565403624864097, "learning_rate": 5.754758841276243e-07, "loss": 0.4469, "step": 11009 }, { "epoch": 0.89, "grad_norm": 0.991885868237849, "learning_rate": 5.745960689940322e-07, "loss": 0.4439, "step": 11010 }, { "epoch": 0.89, "grad_norm": 0.9151544363228036, "learning_rate": 5.73716907032572e-07, "loss": 0.4787, "step": 11011 }, { "epoch": 0.89, "grad_norm": 0.8132533514402628, "learning_rate": 5.728383983041696e-07, "loss": 0.5255, "step": 11012 }, { "epoch": 0.9, "grad_norm": 0.9310339443213517, "learning_rate": 5.719605428697051e-07, "loss": 0.5024, "step": 11013 }, { "epoch": 0.9, "grad_norm": 0.8574936208717879, "learning_rate": 5.710833407900096e-07, "loss": 0.433, "step": 11014 }, { "epoch": 0.9, "grad_norm": 0.8741036577067072, "learning_rate": 5.70206792125878e-07, "loss": 0.4794, "step": 11015 }, { "epoch": 0.9, "grad_norm": 0.967796820842037, "learning_rate": 5.693308969380495e-07, "loss": 0.479, "step": 11016 }, { "epoch": 0.9, "grad_norm": 0.9932310538013736, "learning_rate": 5.684556552872256e-07, "loss": 0.4961, "step": 11017 }, { "epoch": 0.9, "grad_norm": 0.9871267665159187, "learning_rate": 5.675810672340587e-07, "loss": 0.4598, "step": 11018 }, { "epoch": 0.9, "grad_norm": 0.986056563416071, "learning_rate": 5.667071328391593e-07, "loss": 0.532, "step": 11019 }, { "epoch": 0.9, "grad_norm": 0.9476270127764385, "learning_rate": 5.658338521630846e-07, "loss": 0.4759, "step": 11020 }, { "epoch": 0.9, "grad_norm": 0.9657676173085136, "learning_rate": 5.649612252663583e-07, "loss": 0.5025, "step": 11021 }, { "epoch": 0.9, "grad_norm": 0.8858286395635273, "learning_rate": 5.640892522094499e-07, "loss": 0.434, "step": 11022 }, { "epoch": 0.9, "grad_norm": 0.9552774366100201, "learning_rate": 5.632179330527865e-07, "loss": 0.4871, "step": 11023 }, { "epoch": 0.9, "grad_norm": 0.9143189633790539, "learning_rate": 5.623472678567498e-07, "loss": 0.4927, "step": 11024 }, { "epoch": 0.9, "grad_norm": 0.8714387741551404, "learning_rate": 5.614772566816773e-07, "loss": 0.4832, "step": 11025 }, { "epoch": 0.9, "grad_norm": 0.8858930867826196, "learning_rate": 5.606078995878562e-07, "loss": 0.4106, "step": 11026 }, { "epoch": 0.9, "grad_norm": 0.9568935096034147, "learning_rate": 5.597391966355381e-07, "loss": 0.4394, "step": 11027 }, { "epoch": 0.9, "grad_norm": 1.0728162873381937, "learning_rate": 5.588711478849185e-07, "loss": 0.5342, "step": 11028 }, { "epoch": 0.9, "grad_norm": 0.9180312152603842, "learning_rate": 5.580037533961546e-07, "loss": 0.4655, "step": 11029 }, { "epoch": 0.9, "grad_norm": 0.9517370360129557, "learning_rate": 5.571370132293552e-07, "loss": 0.4426, "step": 11030 }, { "epoch": 0.9, "grad_norm": 0.9945553716176935, "learning_rate": 5.562709274445866e-07, "loss": 0.5017, "step": 11031 }, { "epoch": 0.9, "grad_norm": 0.8730934042078565, "learning_rate": 5.554054961018628e-07, "loss": 0.4803, "step": 11032 }, { "epoch": 0.9, "grad_norm": 0.9385185317818835, "learning_rate": 5.54540719261164e-07, "loss": 0.441, "step": 11033 }, { "epoch": 0.9, "grad_norm": 0.9249336081996793, "learning_rate": 5.536765969824132e-07, "loss": 0.4769, "step": 11034 }, { "epoch": 0.9, "grad_norm": 0.9050719472637894, "learning_rate": 5.528131293254957e-07, "loss": 0.4702, "step": 11035 }, { "epoch": 0.9, "grad_norm": 0.9136625775583729, "learning_rate": 5.519503163502493e-07, "loss": 0.4389, "step": 11036 }, { "epoch": 0.9, "grad_norm": 0.9086878888383371, "learning_rate": 5.510881581164662e-07, "loss": 0.3929, "step": 11037 }, { "epoch": 0.9, "grad_norm": 0.92276744581274, "learning_rate": 5.502266546838897e-07, "loss": 0.5428, "step": 11038 }, { "epoch": 0.9, "grad_norm": 0.8805940036916493, "learning_rate": 5.493658061122276e-07, "loss": 0.4426, "step": 11039 }, { "epoch": 0.9, "grad_norm": 0.9771763240276137, "learning_rate": 5.48505612461131e-07, "loss": 0.4668, "step": 11040 }, { "epoch": 0.9, "grad_norm": 0.8734235333940492, "learning_rate": 5.476460737902111e-07, "loss": 0.4037, "step": 11041 }, { "epoch": 0.9, "grad_norm": 1.0466353923440146, "learning_rate": 5.467871901590349e-07, "loss": 0.5033, "step": 11042 }, { "epoch": 0.9, "grad_norm": 0.9785685435203199, "learning_rate": 5.459289616271224e-07, "loss": 0.4902, "step": 11043 }, { "epoch": 0.9, "grad_norm": 0.9503035291633423, "learning_rate": 5.450713882539449e-07, "loss": 0.4766, "step": 11044 }, { "epoch": 0.9, "grad_norm": 1.0429553331877732, "learning_rate": 5.44214470098936e-07, "loss": 0.5244, "step": 11045 }, { "epoch": 0.9, "grad_norm": 0.97021397045449, "learning_rate": 5.43358207221476e-07, "loss": 0.5031, "step": 11046 }, { "epoch": 0.9, "grad_norm": 0.9445804788065731, "learning_rate": 5.425025996809042e-07, "loss": 0.4727, "step": 11047 }, { "epoch": 0.9, "grad_norm": 0.9205907205100843, "learning_rate": 5.416476475365129e-07, "loss": 0.4863, "step": 11048 }, { "epoch": 0.9, "grad_norm": 1.3195746670830866, "learning_rate": 5.407933508475515e-07, "loss": 0.5233, "step": 11049 }, { "epoch": 0.9, "grad_norm": 0.9794249235949587, "learning_rate": 5.399397096732184e-07, "loss": 0.5045, "step": 11050 }, { "epoch": 0.9, "grad_norm": 0.894756142856474, "learning_rate": 5.39086724072675e-07, "loss": 0.3943, "step": 11051 }, { "epoch": 0.9, "grad_norm": 0.9204342660541922, "learning_rate": 5.382343941050272e-07, "loss": 0.471, "step": 11052 }, { "epoch": 0.9, "grad_norm": 0.9181966409748372, "learning_rate": 5.373827198293446e-07, "loss": 0.4943, "step": 11053 }, { "epoch": 0.9, "grad_norm": 0.9282760705343676, "learning_rate": 5.365317013046456e-07, "loss": 0.4279, "step": 11054 }, { "epoch": 0.9, "grad_norm": 0.9893124938884263, "learning_rate": 5.356813385899074e-07, "loss": 0.5161, "step": 11055 }, { "epoch": 0.9, "grad_norm": 0.9524158665869225, "learning_rate": 5.348316317440549e-07, "loss": 0.4846, "step": 11056 }, { "epoch": 0.9, "grad_norm": 0.9733809276947557, "learning_rate": 5.339825808259779e-07, "loss": 0.4934, "step": 11057 }, { "epoch": 0.9, "grad_norm": 0.9904644382388924, "learning_rate": 5.331341858945094e-07, "loss": 0.5697, "step": 11058 }, { "epoch": 0.9, "grad_norm": 0.9217495342550637, "learning_rate": 5.322864470084455e-07, "loss": 0.4578, "step": 11059 }, { "epoch": 0.9, "grad_norm": 0.864877204773377, "learning_rate": 5.314393642265314e-07, "loss": 0.4253, "step": 11060 }, { "epoch": 0.9, "grad_norm": 0.981346430166337, "learning_rate": 5.305929376074725e-07, "loss": 0.5363, "step": 11061 }, { "epoch": 0.9, "grad_norm": 0.972993618315643, "learning_rate": 5.29747167209923e-07, "loss": 0.4663, "step": 11062 }, { "epoch": 0.9, "grad_norm": 0.8667091636509169, "learning_rate": 5.28902053092496e-07, "loss": 0.4178, "step": 11063 }, { "epoch": 0.9, "grad_norm": 0.8573274759690804, "learning_rate": 5.280575953137545e-07, "loss": 0.4511, "step": 11064 }, { "epoch": 0.9, "grad_norm": 0.9976398282977128, "learning_rate": 5.272137939322208e-07, "loss": 0.487, "step": 11065 }, { "epoch": 0.9, "grad_norm": 0.9493121201956117, "learning_rate": 5.26370649006368e-07, "loss": 0.4588, "step": 11066 }, { "epoch": 0.9, "grad_norm": 0.8258826117106118, "learning_rate": 5.255281605946261e-07, "loss": 0.4348, "step": 11067 }, { "epoch": 0.9, "grad_norm": 0.9915494613066878, "learning_rate": 5.246863287553794e-07, "loss": 0.4855, "step": 11068 }, { "epoch": 0.9, "grad_norm": 0.9091035541846134, "learning_rate": 5.238451535469658e-07, "loss": 0.4527, "step": 11069 }, { "epoch": 0.9, "grad_norm": 0.8570386649899411, "learning_rate": 5.230046350276774e-07, "loss": 0.4344, "step": 11070 }, { "epoch": 0.9, "grad_norm": 0.9304912751627228, "learning_rate": 5.221647732557611e-07, "loss": 0.5141, "step": 11071 }, { "epoch": 0.9, "grad_norm": 1.0122734919257559, "learning_rate": 5.213255682894192e-07, "loss": 0.483, "step": 11072 }, { "epoch": 0.9, "grad_norm": 0.9107296385416488, "learning_rate": 5.204870201868084e-07, "loss": 0.4708, "step": 11073 }, { "epoch": 0.9, "grad_norm": 0.9662778251197135, "learning_rate": 5.19649129006039e-07, "loss": 0.5313, "step": 11074 }, { "epoch": 0.9, "grad_norm": 0.9539025751341297, "learning_rate": 5.188118948051768e-07, "loss": 0.4911, "step": 11075 }, { "epoch": 0.9, "grad_norm": 0.9027565677142386, "learning_rate": 5.179753176422386e-07, "loss": 0.4842, "step": 11076 }, { "epoch": 0.9, "grad_norm": 1.100122923406752, "learning_rate": 5.171393975752015e-07, "loss": 0.5235, "step": 11077 }, { "epoch": 0.9, "grad_norm": 0.9943585472020804, "learning_rate": 5.163041346619913e-07, "loss": 0.4866, "step": 11078 }, { "epoch": 0.9, "grad_norm": 0.9150676597442815, "learning_rate": 5.154695289604938e-07, "loss": 0.4607, "step": 11079 }, { "epoch": 0.9, "grad_norm": 0.8694083085530301, "learning_rate": 5.146355805285452e-07, "loss": 0.465, "step": 11080 }, { "epoch": 0.9, "grad_norm": 1.134853604728181, "learning_rate": 5.138022894239369e-07, "loss": 0.5574, "step": 11081 }, { "epoch": 0.9, "grad_norm": 0.8853817334786239, "learning_rate": 5.129696557044173e-07, "loss": 0.4293, "step": 11082 }, { "epoch": 0.9, "grad_norm": 0.98048161606491, "learning_rate": 5.121376794276834e-07, "loss": 0.5062, "step": 11083 }, { "epoch": 0.9, "grad_norm": 0.9456961383840721, "learning_rate": 5.113063606513935e-07, "loss": 0.4381, "step": 11084 }, { "epoch": 0.9, "grad_norm": 0.9519112022167273, "learning_rate": 5.104756994331561e-07, "loss": 0.4312, "step": 11085 }, { "epoch": 0.9, "grad_norm": 0.948613829358276, "learning_rate": 5.096456958305351e-07, "loss": 0.4807, "step": 11086 }, { "epoch": 0.9, "grad_norm": 0.9351289986800121, "learning_rate": 5.088163499010502e-07, "loss": 0.5065, "step": 11087 }, { "epoch": 0.9, "grad_norm": 0.8956220061900297, "learning_rate": 5.07987661702174e-07, "loss": 0.4606, "step": 11088 }, { "epoch": 0.9, "grad_norm": 0.9545684634715169, "learning_rate": 5.071596312913329e-07, "loss": 0.4764, "step": 11089 }, { "epoch": 0.9, "grad_norm": 1.012292041949086, "learning_rate": 5.06332258725909e-07, "loss": 0.4834, "step": 11090 }, { "epoch": 0.9, "grad_norm": 0.9546015528272861, "learning_rate": 5.055055440632384e-07, "loss": 0.4927, "step": 11091 }, { "epoch": 0.9, "grad_norm": 0.8922578203787093, "learning_rate": 5.046794873606131e-07, "loss": 0.464, "step": 11092 }, { "epoch": 0.9, "grad_norm": 1.002581031774825, "learning_rate": 5.038540886752752e-07, "loss": 0.5291, "step": 11093 }, { "epoch": 0.9, "grad_norm": 0.956594306713076, "learning_rate": 5.030293480644289e-07, "loss": 0.5018, "step": 11094 }, { "epoch": 0.9, "grad_norm": 1.0593410366903622, "learning_rate": 5.022052655852228e-07, "loss": 0.554, "step": 11095 }, { "epoch": 0.9, "grad_norm": 0.9620155031743906, "learning_rate": 5.013818412947669e-07, "loss": 0.4683, "step": 11096 }, { "epoch": 0.9, "grad_norm": 0.9369348023729992, "learning_rate": 5.005590752501244e-07, "loss": 0.4979, "step": 11097 }, { "epoch": 0.9, "grad_norm": 0.9417310972907301, "learning_rate": 4.997369675083131e-07, "loss": 0.4617, "step": 11098 }, { "epoch": 0.9, "grad_norm": 0.934368976889229, "learning_rate": 4.989155181263017e-07, "loss": 0.487, "step": 11099 }, { "epoch": 0.9, "grad_norm": 0.9313517760063982, "learning_rate": 4.980947271610192e-07, "loss": 0.4887, "step": 11100 }, { "epoch": 0.9, "grad_norm": 0.9351666968503773, "learning_rate": 4.972745946693414e-07, "loss": 0.4626, "step": 11101 }, { "epoch": 0.9, "grad_norm": 0.9262804697487227, "learning_rate": 4.964551207081081e-07, "loss": 0.4518, "step": 11102 }, { "epoch": 0.9, "grad_norm": 0.9592488579633498, "learning_rate": 4.95636305334104e-07, "loss": 0.5235, "step": 11103 }, { "epoch": 0.9, "grad_norm": 0.99004950050435, "learning_rate": 4.948181486040737e-07, "loss": 0.4683, "step": 11104 }, { "epoch": 0.9, "grad_norm": 0.9846886660796365, "learning_rate": 4.940006505747142e-07, "loss": 0.513, "step": 11105 }, { "epoch": 0.9, "grad_norm": 0.9454931036011615, "learning_rate": 4.931838113026798e-07, "loss": 0.541, "step": 11106 }, { "epoch": 0.9, "grad_norm": 0.920001078733727, "learning_rate": 4.923676308445713e-07, "loss": 0.5109, "step": 11107 }, { "epoch": 0.9, "grad_norm": 0.9192704813594433, "learning_rate": 4.915521092569553e-07, "loss": 0.4743, "step": 11108 }, { "epoch": 0.9, "grad_norm": 0.955057808863621, "learning_rate": 4.907372465963434e-07, "loss": 0.4339, "step": 11109 }, { "epoch": 0.9, "grad_norm": 0.9254482314436216, "learning_rate": 4.899230429192059e-07, "loss": 0.466, "step": 11110 }, { "epoch": 0.9, "grad_norm": 0.8747271004206972, "learning_rate": 4.891094982819656e-07, "loss": 0.4546, "step": 11111 }, { "epoch": 0.9, "grad_norm": 0.9775214852283891, "learning_rate": 4.882966127410016e-07, "loss": 0.4646, "step": 11112 }, { "epoch": 0.9, "grad_norm": 1.0915192222629713, "learning_rate": 4.874843863526435e-07, "loss": 0.5919, "step": 11113 }, { "epoch": 0.9, "grad_norm": 0.990407126815721, "learning_rate": 4.866728191731829e-07, "loss": 0.4546, "step": 11114 }, { "epoch": 0.9, "grad_norm": 0.9709797991935482, "learning_rate": 4.858619112588559e-07, "loss": 0.4967, "step": 11115 }, { "epoch": 0.9, "grad_norm": 0.9241489813877177, "learning_rate": 4.850516626658585e-07, "loss": 0.4624, "step": 11116 }, { "epoch": 0.9, "grad_norm": 0.9445547169009616, "learning_rate": 4.842420734503428e-07, "loss": 0.48, "step": 11117 }, { "epoch": 0.9, "grad_norm": 0.9599734450815728, "learning_rate": 4.834331436684114e-07, "loss": 0.4661, "step": 11118 }, { "epoch": 0.9, "grad_norm": 0.9320491380802918, "learning_rate": 4.826248733761185e-07, "loss": 0.5014, "step": 11119 }, { "epoch": 0.9, "grad_norm": 0.964224195814961, "learning_rate": 4.818172626294837e-07, "loss": 0.4735, "step": 11120 }, { "epoch": 0.9, "grad_norm": 0.9579047810051492, "learning_rate": 4.810103114844688e-07, "loss": 0.4381, "step": 11121 }, { "epoch": 0.9, "grad_norm": 0.8961668308564504, "learning_rate": 4.802040199969959e-07, "loss": 0.462, "step": 11122 }, { "epoch": 0.9, "grad_norm": 0.9865262747521256, "learning_rate": 4.793983882229402e-07, "loss": 0.4953, "step": 11123 }, { "epoch": 0.9, "grad_norm": 1.106608268670239, "learning_rate": 4.785934162181305e-07, "loss": 0.572, "step": 11124 }, { "epoch": 0.9, "grad_norm": 0.928105720357525, "learning_rate": 4.777891040383531e-07, "loss": 0.4759, "step": 11125 }, { "epoch": 0.9, "grad_norm": 0.9034973838841738, "learning_rate": 4.769854517393447e-07, "loss": 0.4502, "step": 11126 }, { "epoch": 0.9, "grad_norm": 0.9056139419125202, "learning_rate": 4.761824593767961e-07, "loss": 0.4717, "step": 11127 }, { "epoch": 0.9, "grad_norm": 0.91672933848264, "learning_rate": 4.753801270063574e-07, "loss": 0.4453, "step": 11128 }, { "epoch": 0.9, "grad_norm": 0.9868633642457375, "learning_rate": 4.7457845468362627e-07, "loss": 0.522, "step": 11129 }, { "epoch": 0.9, "grad_norm": 0.9925761450136723, "learning_rate": 4.7377744246415837e-07, "loss": 0.5019, "step": 11130 }, { "epoch": 0.9, "grad_norm": 0.8302684218548028, "learning_rate": 4.7297709040346474e-07, "loss": 0.466, "step": 11131 }, { "epoch": 0.9, "grad_norm": 0.8809889972200473, "learning_rate": 4.7217739855700995e-07, "loss": 0.4608, "step": 11132 }, { "epoch": 0.9, "grad_norm": 0.949125194133086, "learning_rate": 4.7137836698020747e-07, "loss": 0.5073, "step": 11133 }, { "epoch": 0.9, "grad_norm": 0.9828922076326778, "learning_rate": 4.7057999572843516e-07, "loss": 0.5245, "step": 11134 }, { "epoch": 0.9, "grad_norm": 0.9807408745494204, "learning_rate": 4.6978228485701437e-07, "loss": 0.4997, "step": 11135 }, { "epoch": 0.91, "grad_norm": 0.8783668016785334, "learning_rate": 4.689852344212287e-07, "loss": 0.416, "step": 11136 }, { "epoch": 0.91, "grad_norm": 0.8545384196573856, "learning_rate": 4.681888444763116e-07, "loss": 0.4716, "step": 11137 }, { "epoch": 0.91, "grad_norm": 1.006594980432949, "learning_rate": 4.673931150774547e-07, "loss": 0.5269, "step": 11138 }, { "epoch": 0.91, "grad_norm": 0.9954424066558223, "learning_rate": 4.6659804627979697e-07, "loss": 0.5066, "step": 11139 }, { "epoch": 0.91, "grad_norm": 0.9063227086751301, "learning_rate": 4.658036381384412e-07, "loss": 0.4942, "step": 11140 }, { "epoch": 0.91, "grad_norm": 0.871552804947988, "learning_rate": 4.650098907084355e-07, "loss": 0.4441, "step": 11141 }, { "epoch": 0.91, "grad_norm": 1.0236407870819795, "learning_rate": 4.6421680404478587e-07, "loss": 0.5124, "step": 11142 }, { "epoch": 0.91, "grad_norm": 0.824417457432014, "learning_rate": 4.634243782024539e-07, "loss": 0.4079, "step": 11143 }, { "epoch": 0.91, "grad_norm": 1.133583472750711, "learning_rate": 4.6263261323635455e-07, "loss": 0.5321, "step": 11144 }, { "epoch": 0.91, "grad_norm": 0.8843697218228761, "learning_rate": 4.6184150920135395e-07, "loss": 0.4356, "step": 11145 }, { "epoch": 0.91, "grad_norm": 0.9000063294594467, "learning_rate": 4.610510661522782e-07, "loss": 0.4835, "step": 11146 }, { "epoch": 0.91, "grad_norm": 0.9434582772507041, "learning_rate": 4.602612841439014e-07, "loss": 0.5203, "step": 11147 }, { "epoch": 0.91, "grad_norm": 0.9692831264058703, "learning_rate": 4.594721632309551e-07, "loss": 0.4553, "step": 11148 }, { "epoch": 0.91, "grad_norm": 0.9482907368711121, "learning_rate": 4.5868370346812685e-07, "loss": 0.4821, "step": 11149 }, { "epoch": 0.91, "grad_norm": 0.848294913758441, "learning_rate": 4.5789590491005507e-07, "loss": 0.433, "step": 11150 }, { "epoch": 0.91, "grad_norm": 0.9580137905855243, "learning_rate": 4.571087676113306e-07, "loss": 0.4262, "step": 11151 }, { "epoch": 0.91, "grad_norm": 0.9885469522737916, "learning_rate": 4.5632229162650755e-07, "loss": 0.4879, "step": 11152 }, { "epoch": 0.91, "grad_norm": 0.9540913556936466, "learning_rate": 4.555364770100823e-07, "loss": 0.4256, "step": 11153 }, { "epoch": 0.91, "grad_norm": 0.901228943473374, "learning_rate": 4.5475132381651356e-07, "loss": 0.5005, "step": 11154 }, { "epoch": 0.91, "grad_norm": 0.8633625027678152, "learning_rate": 4.5396683210021107e-07, "loss": 0.4495, "step": 11155 }, { "epoch": 0.91, "grad_norm": 0.975255504720242, "learning_rate": 4.531830019155425e-07, "loss": 0.5066, "step": 11156 }, { "epoch": 0.91, "grad_norm": 0.9191865185057262, "learning_rate": 4.5239983331682e-07, "loss": 0.4824, "step": 11157 }, { "epoch": 0.91, "grad_norm": 0.9105743658096604, "learning_rate": 4.516173263583234e-07, "loss": 0.475, "step": 11158 }, { "epoch": 0.91, "grad_norm": 1.0033483384909352, "learning_rate": 4.50835481094275e-07, "loss": 0.518, "step": 11159 }, { "epoch": 0.91, "grad_norm": 0.9174121202867608, "learning_rate": 4.50054297578858e-07, "loss": 0.5079, "step": 11160 }, { "epoch": 0.91, "grad_norm": 0.9138216995553686, "learning_rate": 4.492737758662069e-07, "loss": 0.4926, "step": 11161 }, { "epoch": 0.91, "grad_norm": 0.8698376707960717, "learning_rate": 4.484939160104129e-07, "loss": 0.478, "step": 11162 }, { "epoch": 0.91, "grad_norm": 1.0407956612094187, "learning_rate": 4.4771471806551614e-07, "loss": 0.5134, "step": 11163 }, { "epoch": 0.91, "grad_norm": 0.9911654660963026, "learning_rate": 4.469361820855189e-07, "loss": 0.4979, "step": 11164 }, { "epoch": 0.91, "grad_norm": 0.8965315448269694, "learning_rate": 4.4615830812437035e-07, "loss": 0.5161, "step": 11165 }, { "epoch": 0.91, "grad_norm": 0.9476143911761191, "learning_rate": 4.4538109623597617e-07, "loss": 0.4384, "step": 11166 }, { "epoch": 0.91, "grad_norm": 0.9106293415533426, "learning_rate": 4.446045464741966e-07, "loss": 0.4845, "step": 11167 }, { "epoch": 0.91, "grad_norm": 0.9679582344937153, "learning_rate": 4.4382865889284756e-07, "loss": 0.5291, "step": 11168 }, { "epoch": 0.91, "grad_norm": 1.003241904827064, "learning_rate": 4.4305343354569483e-07, "loss": 0.5008, "step": 11169 }, { "epoch": 0.91, "grad_norm": 0.9200873262321915, "learning_rate": 4.4227887048646335e-07, "loss": 0.4728, "step": 11170 }, { "epoch": 0.91, "grad_norm": 0.8862449874934346, "learning_rate": 4.4150496976882783e-07, "loss": 0.4437, "step": 11171 }, { "epoch": 0.91, "grad_norm": 0.8813859946917624, "learning_rate": 4.407317314464199e-07, "loss": 0.4773, "step": 11172 }, { "epoch": 0.91, "grad_norm": 0.9116257945503873, "learning_rate": 4.399591555728233e-07, "loss": 0.457, "step": 11173 }, { "epoch": 0.91, "grad_norm": 0.8952447845796265, "learning_rate": 4.391872422015786e-07, "loss": 0.4996, "step": 11174 }, { "epoch": 0.91, "grad_norm": 1.0264398330218911, "learning_rate": 4.384159913861752e-07, "loss": 0.5262, "step": 11175 }, { "epoch": 0.91, "grad_norm": 0.8986090002767066, "learning_rate": 4.3764540318006587e-07, "loss": 0.5164, "step": 11176 }, { "epoch": 0.91, "grad_norm": 0.8649641214293282, "learning_rate": 4.368754776366457e-07, "loss": 0.4174, "step": 11177 }, { "epoch": 0.91, "grad_norm": 0.880916340853826, "learning_rate": 4.3610621480927315e-07, "loss": 0.4489, "step": 11178 }, { "epoch": 0.91, "grad_norm": 0.9090094229803269, "learning_rate": 4.3533761475125666e-07, "loss": 0.4797, "step": 11179 }, { "epoch": 0.91, "grad_norm": 0.9788562410334294, "learning_rate": 4.3456967751586143e-07, "loss": 0.4673, "step": 11180 }, { "epoch": 0.91, "grad_norm": 1.0350007495095783, "learning_rate": 4.338024031562993e-07, "loss": 0.4968, "step": 11181 }, { "epoch": 0.91, "grad_norm": 0.9862582443544196, "learning_rate": 4.3303579172574884e-07, "loss": 0.5074, "step": 11182 }, { "epoch": 0.91, "grad_norm": 0.9358507276894988, "learning_rate": 4.3226984327733093e-07, "loss": 0.4914, "step": 11183 }, { "epoch": 0.91, "grad_norm": 0.9753101507137873, "learning_rate": 4.3150455786412526e-07, "loss": 0.4711, "step": 11184 }, { "epoch": 0.91, "grad_norm": 0.944450308660901, "learning_rate": 4.3073993553916726e-07, "loss": 0.4919, "step": 11185 }, { "epoch": 0.91, "grad_norm": 0.8749586270650181, "learning_rate": 4.2997597635544563e-07, "loss": 0.4242, "step": 11186 }, { "epoch": 0.91, "grad_norm": 0.9996020749611297, "learning_rate": 4.292126803658969e-07, "loss": 0.4821, "step": 11187 }, { "epoch": 0.91, "grad_norm": 0.863509502257761, "learning_rate": 4.2845004762342325e-07, "loss": 0.459, "step": 11188 }, { "epoch": 0.91, "grad_norm": 0.8681959186479662, "learning_rate": 4.27688078180869e-07, "loss": 0.4579, "step": 11189 }, { "epoch": 0.91, "grad_norm": 0.8916377298074757, "learning_rate": 4.269267720910419e-07, "loss": 0.4552, "step": 11190 }, { "epoch": 0.91, "grad_norm": 0.913625486076061, "learning_rate": 4.2616612940669657e-07, "loss": 0.4839, "step": 11191 }, { "epoch": 0.91, "grad_norm": 0.9608375155236945, "learning_rate": 4.254061501805484e-07, "loss": 0.5321, "step": 11192 }, { "epoch": 0.91, "grad_norm": 0.9390229983486676, "learning_rate": 4.246468344652599e-07, "loss": 0.5257, "step": 11193 }, { "epoch": 0.91, "grad_norm": 1.028975729366445, "learning_rate": 4.238881823134533e-07, "loss": 0.4979, "step": 11194 }, { "epoch": 0.91, "grad_norm": 0.873398927746758, "learning_rate": 4.2313019377770104e-07, "loss": 0.435, "step": 11195 }, { "epoch": 0.91, "grad_norm": 0.9497373872488999, "learning_rate": 4.223728689105322e-07, "loss": 0.5039, "step": 11196 }, { "epoch": 0.91, "grad_norm": 0.9519321022576744, "learning_rate": 4.216162077644281e-07, "loss": 0.5677, "step": 11197 }, { "epoch": 0.91, "grad_norm": 0.8908282880627073, "learning_rate": 4.208602103918258e-07, "loss": 0.4711, "step": 11198 }, { "epoch": 0.91, "grad_norm": 1.0498824109635534, "learning_rate": 4.2010487684511105e-07, "loss": 0.5055, "step": 11199 }, { "epoch": 0.91, "grad_norm": 0.9252361422177733, "learning_rate": 4.1935020717663423e-07, "loss": 0.4992, "step": 11200 }, { "epoch": 0.91, "grad_norm": 0.9402917514441262, "learning_rate": 4.1859620143868793e-07, "loss": 0.4984, "step": 11201 }, { "epoch": 0.91, "grad_norm": 1.0012578522166926, "learning_rate": 4.178428596835271e-07, "loss": 0.4871, "step": 11202 }, { "epoch": 0.91, "grad_norm": 0.872894891355588, "learning_rate": 4.170901819633566e-07, "loss": 0.4665, "step": 11203 }, { "epoch": 0.91, "grad_norm": 0.977140130196723, "learning_rate": 4.1633816833033804e-07, "loss": 0.497, "step": 11204 }, { "epoch": 0.91, "grad_norm": 1.0056578141335417, "learning_rate": 4.155868188365808e-07, "loss": 0.4943, "step": 11205 }, { "epoch": 0.91, "grad_norm": 0.9423785526526659, "learning_rate": 4.1483613353415775e-07, "loss": 0.4813, "step": 11206 }, { "epoch": 0.91, "grad_norm": 1.0023662451744835, "learning_rate": 4.1408611247508723e-07, "loss": 0.491, "step": 11207 }, { "epoch": 0.91, "grad_norm": 1.0159130710754287, "learning_rate": 4.133367557113477e-07, "loss": 0.5373, "step": 11208 }, { "epoch": 0.91, "grad_norm": 0.8312726422881775, "learning_rate": 4.1258806329486644e-07, "loss": 0.4175, "step": 11209 }, { "epoch": 0.91, "grad_norm": 0.8091738993603446, "learning_rate": 4.118400352775287e-07, "loss": 0.4436, "step": 11210 }, { "epoch": 0.91, "grad_norm": 0.8804197075085122, "learning_rate": 4.1109267171117184e-07, "loss": 0.468, "step": 11211 }, { "epoch": 0.91, "grad_norm": 0.9020380820861756, "learning_rate": 4.103459726475889e-07, "loss": 0.4497, "step": 11212 }, { "epoch": 0.91, "grad_norm": 0.9761918417426805, "learning_rate": 4.095999381385229e-07, "loss": 0.5338, "step": 11213 }, { "epoch": 0.91, "grad_norm": 0.8782319385368993, "learning_rate": 4.088545682356748e-07, "loss": 0.433, "step": 11214 }, { "epoch": 0.91, "grad_norm": 0.950696236898299, "learning_rate": 4.0810986299069656e-07, "loss": 0.436, "step": 11215 }, { "epoch": 0.91, "grad_norm": 0.9189322841765247, "learning_rate": 4.0736582245519795e-07, "loss": 0.4815, "step": 11216 }, { "epoch": 0.91, "grad_norm": 0.884327380500117, "learning_rate": 4.066224466807389e-07, "loss": 0.4702, "step": 11217 }, { "epoch": 0.91, "grad_norm": 0.9787955704244248, "learning_rate": 4.0587973571883596e-07, "loss": 0.5555, "step": 11218 }, { "epoch": 0.91, "grad_norm": 0.9858389648304431, "learning_rate": 4.051376896209558e-07, "loss": 0.5023, "step": 11219 }, { "epoch": 0.91, "grad_norm": 0.9994956181583511, "learning_rate": 4.0439630843852383e-07, "loss": 0.5066, "step": 11220 }, { "epoch": 0.91, "grad_norm": 1.0228356795696085, "learning_rate": 4.0365559222291684e-07, "loss": 0.4589, "step": 11221 }, { "epoch": 0.91, "grad_norm": 0.9459753139302353, "learning_rate": 4.029155410254637e-07, "loss": 0.4816, "step": 11222 }, { "epoch": 0.91, "grad_norm": 1.091750919560779, "learning_rate": 4.021761548974523e-07, "loss": 0.5559, "step": 11223 }, { "epoch": 0.91, "grad_norm": 0.9724075012713982, "learning_rate": 4.014374338901206e-07, "loss": 0.4748, "step": 11224 }, { "epoch": 0.91, "grad_norm": 1.006268228578613, "learning_rate": 4.0069937805466084e-07, "loss": 0.4083, "step": 11225 }, { "epoch": 0.91, "grad_norm": 0.9153690149268117, "learning_rate": 3.999619874422178e-07, "loss": 0.4788, "step": 11226 }, { "epoch": 0.91, "grad_norm": 0.9338564214866565, "learning_rate": 3.99225262103895e-07, "loss": 0.4906, "step": 11227 }, { "epoch": 0.91, "grad_norm": 1.0277728033158247, "learning_rate": 3.9848920209074606e-07, "loss": 0.5594, "step": 11228 }, { "epoch": 0.91, "grad_norm": 0.9886771525159102, "learning_rate": 3.977538074537779e-07, "loss": 0.494, "step": 11229 }, { "epoch": 0.91, "grad_norm": 0.9988823203761128, "learning_rate": 3.9701907824395426e-07, "loss": 0.4573, "step": 11230 }, { "epoch": 0.91, "grad_norm": 0.9668732903658148, "learning_rate": 3.9628501451219106e-07, "loss": 0.5663, "step": 11231 }, { "epoch": 0.91, "grad_norm": 1.0333664090017052, "learning_rate": 3.9555161630935756e-07, "loss": 0.5688, "step": 11232 }, { "epoch": 0.91, "grad_norm": 0.870329084161462, "learning_rate": 3.9481888368627764e-07, "loss": 0.4793, "step": 11233 }, { "epoch": 0.91, "grad_norm": 0.926284708098634, "learning_rate": 3.940868166937295e-07, "loss": 0.4408, "step": 11234 }, { "epoch": 0.91, "grad_norm": 0.9604043534785126, "learning_rate": 3.933554153824437e-07, "loss": 0.5178, "step": 11235 }, { "epoch": 0.91, "grad_norm": 0.9889579675663253, "learning_rate": 3.9262467980310747e-07, "loss": 0.4689, "step": 11236 }, { "epoch": 0.91, "grad_norm": 0.899283890650655, "learning_rate": 3.918946100063603e-07, "loss": 0.4952, "step": 11237 }, { "epoch": 0.91, "grad_norm": 1.0361637174713196, "learning_rate": 3.9116520604279285e-07, "loss": 0.4996, "step": 11238 }, { "epoch": 0.91, "grad_norm": 0.9260930406702779, "learning_rate": 3.904364679629535e-07, "loss": 0.442, "step": 11239 }, { "epoch": 0.91, "grad_norm": 0.9405497553842672, "learning_rate": 3.897083958173431e-07, "loss": 0.4688, "step": 11240 }, { "epoch": 0.91, "grad_norm": 1.039178970007914, "learning_rate": 3.889809896564167e-07, "loss": 0.5402, "step": 11241 }, { "epoch": 0.91, "grad_norm": 0.8742429461856939, "learning_rate": 3.88254249530583e-07, "loss": 0.5077, "step": 11242 }, { "epoch": 0.91, "grad_norm": 0.9717294505105695, "learning_rate": 3.8752817549020494e-07, "loss": 0.4859, "step": 11243 }, { "epoch": 0.91, "grad_norm": 0.9651825344259987, "learning_rate": 3.868027675855968e-07, "loss": 0.5584, "step": 11244 }, { "epoch": 0.91, "grad_norm": 0.949190123858509, "learning_rate": 3.8607802586703045e-07, "loss": 0.479, "step": 11245 }, { "epoch": 0.91, "grad_norm": 1.0633748862786825, "learning_rate": 3.853539503847292e-07, "loss": 0.5105, "step": 11246 }, { "epoch": 0.91, "grad_norm": 0.907406015353476, "learning_rate": 3.8463054118887064e-07, "loss": 0.4661, "step": 11247 }, { "epoch": 0.91, "grad_norm": 0.9235434459247656, "learning_rate": 3.8390779832958804e-07, "loss": 0.4984, "step": 11248 }, { "epoch": 0.91, "grad_norm": 0.871393155044762, "learning_rate": 3.831857218569646e-07, "loss": 0.4764, "step": 11249 }, { "epoch": 0.91, "grad_norm": 1.002044778848067, "learning_rate": 3.824643118210403e-07, "loss": 0.4984, "step": 11250 }, { "epoch": 0.91, "grad_norm": 0.8642745302575283, "learning_rate": 3.817435682718096e-07, "loss": 0.4246, "step": 11251 }, { "epoch": 0.91, "grad_norm": 0.8914467494927035, "learning_rate": 3.810234912592181e-07, "loss": 0.4409, "step": 11252 }, { "epoch": 0.91, "grad_norm": 0.916483430239664, "learning_rate": 3.803040808331659e-07, "loss": 0.481, "step": 11253 }, { "epoch": 0.91, "grad_norm": 0.898092244042881, "learning_rate": 3.7958533704350763e-07, "loss": 0.4732, "step": 11254 }, { "epoch": 0.91, "grad_norm": 0.9126761753612629, "learning_rate": 3.788672599400534e-07, "loss": 0.4555, "step": 11255 }, { "epoch": 0.91, "grad_norm": 0.8453746843953241, "learning_rate": 3.7814984957256327e-07, "loss": 0.4511, "step": 11256 }, { "epoch": 0.91, "grad_norm": 0.9432585840238175, "learning_rate": 3.7743310599075543e-07, "loss": 0.5324, "step": 11257 }, { "epoch": 0.91, "grad_norm": 0.8631479672202498, "learning_rate": 3.7671702924429677e-07, "loss": 0.4059, "step": 11258 }, { "epoch": 0.92, "grad_norm": 1.003445796403605, "learning_rate": 3.7600161938281196e-07, "loss": 0.4981, "step": 11259 }, { "epoch": 0.92, "grad_norm": 0.8847485019144521, "learning_rate": 3.7528687645587924e-07, "loss": 0.4643, "step": 11260 }, { "epoch": 0.92, "grad_norm": 0.9521515676316561, "learning_rate": 3.745728005130289e-07, "loss": 0.4825, "step": 11261 }, { "epoch": 0.92, "grad_norm": 0.9943019297322592, "learning_rate": 3.7385939160374476e-07, "loss": 0.4789, "step": 11262 }, { "epoch": 0.92, "grad_norm": 1.0443854704039524, "learning_rate": 3.731466497774683e-07, "loss": 0.5328, "step": 11263 }, { "epoch": 0.92, "grad_norm": 0.9224820193567518, "learning_rate": 3.7243457508358784e-07, "loss": 0.4358, "step": 11264 }, { "epoch": 0.92, "grad_norm": 0.9718348345724628, "learning_rate": 3.717231675714539e-07, "loss": 0.5314, "step": 11265 }, { "epoch": 0.92, "grad_norm": 0.9328422043792436, "learning_rate": 3.710124272903626e-07, "loss": 0.5085, "step": 11266 }, { "epoch": 0.92, "grad_norm": 0.8573445232214344, "learning_rate": 3.7030235428956895e-07, "loss": 0.4688, "step": 11267 }, { "epoch": 0.92, "grad_norm": 0.8719481532054565, "learning_rate": 3.6959294861828145e-07, "loss": 0.464, "step": 11268 }, { "epoch": 0.92, "grad_norm": 1.022972115265359, "learning_rate": 3.688842103256607e-07, "loss": 0.5125, "step": 11269 }, { "epoch": 0.92, "grad_norm": 0.907652029442627, "learning_rate": 3.681761394608197e-07, "loss": 0.4468, "step": 11270 }, { "epoch": 0.92, "grad_norm": 0.9742138500191758, "learning_rate": 3.674687360728313e-07, "loss": 0.4637, "step": 11271 }, { "epoch": 0.92, "grad_norm": 1.0138898185358978, "learning_rate": 3.667620002107142e-07, "loss": 0.4617, "step": 11272 }, { "epoch": 0.92, "grad_norm": 0.9212464426233573, "learning_rate": 3.660559319234447e-07, "loss": 0.4477, "step": 11273 }, { "epoch": 0.92, "grad_norm": 0.9109639141362548, "learning_rate": 3.653505312599548e-07, "loss": 0.4575, "step": 11274 }, { "epoch": 0.92, "grad_norm": 0.9821477248340027, "learning_rate": 3.646457982691287e-07, "loss": 0.5487, "step": 11275 }, { "epoch": 0.92, "grad_norm": 0.9427187757868164, "learning_rate": 3.639417329997996e-07, "loss": 0.4833, "step": 11276 }, { "epoch": 0.92, "grad_norm": 0.9306861527841653, "learning_rate": 3.632383355007629e-07, "loss": 0.4816, "step": 11277 }, { "epoch": 0.92, "grad_norm": 0.9649502106841656, "learning_rate": 3.6253560582076075e-07, "loss": 0.4746, "step": 11278 }, { "epoch": 0.92, "grad_norm": 0.9670849975896374, "learning_rate": 3.6183354400849304e-07, "loss": 0.5206, "step": 11279 }, { "epoch": 0.92, "grad_norm": 1.0014901452286138, "learning_rate": 3.6113215011261194e-07, "loss": 0.5014, "step": 11280 }, { "epoch": 0.92, "grad_norm": 0.9168158272006056, "learning_rate": 3.604314241817242e-07, "loss": 0.4216, "step": 11281 }, { "epoch": 0.92, "grad_norm": 1.011810912246533, "learning_rate": 3.5973136626438644e-07, "loss": 0.4856, "step": 11282 }, { "epoch": 0.92, "grad_norm": 1.070538404968705, "learning_rate": 3.5903197640911546e-07, "loss": 0.5257, "step": 11283 }, { "epoch": 0.92, "grad_norm": 0.9009972646160386, "learning_rate": 3.5833325466437697e-07, "loss": 0.4777, "step": 11284 }, { "epoch": 0.92, "grad_norm": 0.8799512675526913, "learning_rate": 3.576352010785911e-07, "loss": 0.4593, "step": 11285 }, { "epoch": 0.92, "grad_norm": 0.9844801336103844, "learning_rate": 3.5693781570013243e-07, "loss": 0.4648, "step": 11286 }, { "epoch": 0.92, "grad_norm": 0.9068441044013625, "learning_rate": 3.5624109857733234e-07, "loss": 0.4397, "step": 11287 }, { "epoch": 0.92, "grad_norm": 1.049042656285058, "learning_rate": 3.555450497584667e-07, "loss": 0.4826, "step": 11288 }, { "epoch": 0.92, "grad_norm": 0.9362852634910507, "learning_rate": 3.548496692917769e-07, "loss": 0.4907, "step": 11289 }, { "epoch": 0.92, "grad_norm": 0.9580446529346488, "learning_rate": 3.541549572254488e-07, "loss": 0.4849, "step": 11290 }, { "epoch": 0.92, "grad_norm": 0.9345954545496868, "learning_rate": 3.5346091360762615e-07, "loss": 0.513, "step": 11291 }, { "epoch": 0.92, "grad_norm": 0.9650252641059298, "learning_rate": 3.52767538486406e-07, "loss": 0.4729, "step": 11292 }, { "epoch": 0.92, "grad_norm": 0.9106790175650638, "learning_rate": 3.520748319098399e-07, "loss": 0.4588, "step": 11293 }, { "epoch": 0.92, "grad_norm": 0.8666227556153863, "learning_rate": 3.513827939259273e-07, "loss": 0.446, "step": 11294 }, { "epoch": 0.92, "grad_norm": 0.8980952689189213, "learning_rate": 3.5069142458263093e-07, "loss": 0.4815, "step": 11295 }, { "epoch": 0.92, "grad_norm": 1.0014655996836292, "learning_rate": 3.500007239278591e-07, "loss": 0.506, "step": 11296 }, { "epoch": 0.92, "grad_norm": 0.985359380024748, "learning_rate": 3.49310692009478e-07, "loss": 0.5175, "step": 11297 }, { "epoch": 0.92, "grad_norm": 0.9953536043633036, "learning_rate": 3.4862132887530485e-07, "loss": 0.5532, "step": 11298 }, { "epoch": 0.92, "grad_norm": 0.8713096817640018, "learning_rate": 3.4793263457311487e-07, "loss": 0.4345, "step": 11299 }, { "epoch": 0.92, "grad_norm": 0.9010164983973311, "learning_rate": 3.4724460915062874e-07, "loss": 0.4683, "step": 11300 }, { "epoch": 0.92, "grad_norm": 0.8829612221593353, "learning_rate": 3.4655725265553276e-07, "loss": 0.4497, "step": 11301 }, { "epoch": 0.92, "grad_norm": 0.9468093798201783, "learning_rate": 3.458705651354544e-07, "loss": 0.4913, "step": 11302 }, { "epoch": 0.92, "grad_norm": 0.8907019359980703, "learning_rate": 3.451845466379833e-07, "loss": 0.4794, "step": 11303 }, { "epoch": 0.92, "grad_norm": 0.9557898281434373, "learning_rate": 3.4449919721065815e-07, "loss": 0.4513, "step": 11304 }, { "epoch": 0.92, "grad_norm": 0.8546099421797825, "learning_rate": 3.4381451690097653e-07, "loss": 0.4572, "step": 11305 }, { "epoch": 0.92, "grad_norm": 0.8776209132717654, "learning_rate": 3.4313050575638164e-07, "loss": 0.4674, "step": 11306 }, { "epoch": 0.92, "grad_norm": 1.0620298487621678, "learning_rate": 3.4244716382427876e-07, "loss": 0.4877, "step": 11307 }, { "epoch": 0.92, "grad_norm": 0.9600773262046887, "learning_rate": 3.417644911520202e-07, "loss": 0.4858, "step": 11308 }, { "epoch": 0.92, "grad_norm": 1.0172245807626483, "learning_rate": 3.410824877869157e-07, "loss": 0.5105, "step": 11309 }, { "epoch": 0.92, "grad_norm": 1.0641696939999097, "learning_rate": 3.404011537762275e-07, "loss": 0.5477, "step": 11310 }, { "epoch": 0.92, "grad_norm": 0.9414882028257976, "learning_rate": 3.3972048916717127e-07, "loss": 0.4423, "step": 11311 }, { "epoch": 0.92, "grad_norm": 0.9345099908540642, "learning_rate": 3.3904049400691585e-07, "loss": 0.5102, "step": 11312 }, { "epoch": 0.92, "grad_norm": 1.0032701922871918, "learning_rate": 3.3836116834258583e-07, "loss": 0.491, "step": 11313 }, { "epoch": 0.92, "grad_norm": 1.2780296973011878, "learning_rate": 3.376825122212568e-07, "loss": 0.5556, "step": 11314 }, { "epoch": 0.92, "grad_norm": 1.0067969180363423, "learning_rate": 3.37004525689959e-07, "loss": 0.4622, "step": 11315 }, { "epoch": 0.92, "grad_norm": 0.9048930187275969, "learning_rate": 3.3632720879567594e-07, "loss": 0.5202, "step": 11316 }, { "epoch": 0.92, "grad_norm": 0.9190783750855306, "learning_rate": 3.356505615853478e-07, "loss": 0.4788, "step": 11317 }, { "epoch": 0.92, "grad_norm": 0.8700597041027842, "learning_rate": 3.349745841058605e-07, "loss": 0.4796, "step": 11318 }, { "epoch": 0.92, "grad_norm": 0.9765173966674195, "learning_rate": 3.3429927640406425e-07, "loss": 0.4858, "step": 11319 }, { "epoch": 0.92, "grad_norm": 1.0247910175502006, "learning_rate": 3.336246385267528e-07, "loss": 0.4862, "step": 11320 }, { "epoch": 0.92, "grad_norm": 0.8672758790970835, "learning_rate": 3.3295067052068086e-07, "loss": 0.4239, "step": 11321 }, { "epoch": 0.92, "grad_norm": 0.9803320123619911, "learning_rate": 3.322773724325523e-07, "loss": 0.4698, "step": 11322 }, { "epoch": 0.92, "grad_norm": 0.8993180142347001, "learning_rate": 3.3160474430902756e-07, "loss": 0.4318, "step": 11323 }, { "epoch": 0.92, "grad_norm": 1.022883190886677, "learning_rate": 3.30932786196716e-07, "loss": 0.5367, "step": 11324 }, { "epoch": 0.92, "grad_norm": 0.8970949373639794, "learning_rate": 3.302614981421881e-07, "loss": 0.4721, "step": 11325 }, { "epoch": 0.92, "grad_norm": 0.9270048427254719, "learning_rate": 3.2959088019196005e-07, "loss": 0.4475, "step": 11326 }, { "epoch": 0.92, "grad_norm": 1.0993593442790337, "learning_rate": 3.2892093239250686e-07, "loss": 0.5155, "step": 11327 }, { "epoch": 0.92, "grad_norm": 0.9603091910994954, "learning_rate": 3.282516547902548e-07, "loss": 0.4498, "step": 11328 }, { "epoch": 0.92, "grad_norm": 0.9606031793318405, "learning_rate": 3.2758304743158554e-07, "loss": 0.4966, "step": 11329 }, { "epoch": 0.92, "grad_norm": 0.990904867623026, "learning_rate": 3.2691511036282875e-07, "loss": 0.4707, "step": 11330 }, { "epoch": 0.92, "grad_norm": 0.9704452825773399, "learning_rate": 3.262478436302774e-07, "loss": 0.4255, "step": 11331 }, { "epoch": 0.92, "grad_norm": 0.9581276075878125, "learning_rate": 3.255812472801689e-07, "loss": 0.5424, "step": 11332 }, { "epoch": 0.92, "grad_norm": 0.9665561902448118, "learning_rate": 3.2491532135869865e-07, "loss": 0.5122, "step": 11333 }, { "epoch": 0.92, "grad_norm": 0.8347893223320174, "learning_rate": 3.24250065912014e-07, "loss": 0.4394, "step": 11334 }, { "epoch": 0.92, "grad_norm": 0.8701345936737639, "learning_rate": 3.235854809862193e-07, "loss": 0.4746, "step": 11335 }, { "epoch": 0.92, "grad_norm": 0.982254247110148, "learning_rate": 3.2292156662736554e-07, "loss": 0.4859, "step": 11336 }, { "epoch": 0.92, "grad_norm": 1.4109891620559623, "learning_rate": 3.2225832288146577e-07, "loss": 0.4721, "step": 11337 }, { "epoch": 0.92, "grad_norm": 0.8886097420198746, "learning_rate": 3.2159574979447996e-07, "loss": 0.4585, "step": 11338 }, { "epoch": 0.92, "grad_norm": 1.0076090882264177, "learning_rate": 3.209338474123225e-07, "loss": 0.4674, "step": 11339 }, { "epoch": 0.92, "grad_norm": 0.8590134875014034, "learning_rate": 3.2027261578086443e-07, "loss": 0.4567, "step": 11340 }, { "epoch": 0.92, "grad_norm": 1.041771758544706, "learning_rate": 3.1961205494593027e-07, "loss": 0.467, "step": 11341 }, { "epoch": 0.92, "grad_norm": 0.9674539778793504, "learning_rate": 3.1895216495329116e-07, "loss": 0.4917, "step": 11342 }, { "epoch": 0.92, "grad_norm": 0.9700115735131218, "learning_rate": 3.1829294584868166e-07, "loss": 0.4876, "step": 11343 }, { "epoch": 0.92, "grad_norm": 0.9355995499943076, "learning_rate": 3.1763439767778293e-07, "loss": 0.4811, "step": 11344 }, { "epoch": 0.92, "grad_norm": 0.9956711944594656, "learning_rate": 3.1697652048623185e-07, "loss": 0.5596, "step": 11345 }, { "epoch": 0.92, "grad_norm": 0.8720294164515175, "learning_rate": 3.163193143196197e-07, "loss": 0.3914, "step": 11346 }, { "epoch": 0.92, "grad_norm": 0.9699170744496788, "learning_rate": 3.156627792234901e-07, "loss": 0.4809, "step": 11347 }, { "epoch": 0.92, "grad_norm": 0.922892098617445, "learning_rate": 3.150069152433377e-07, "loss": 0.4439, "step": 11348 }, { "epoch": 0.92, "grad_norm": 0.9107802722920333, "learning_rate": 3.143517224246184e-07, "loss": 0.4472, "step": 11349 }, { "epoch": 0.92, "grad_norm": 0.8922634635118099, "learning_rate": 3.1369720081273147e-07, "loss": 0.4981, "step": 11350 }, { "epoch": 0.92, "grad_norm": 0.9435902052682902, "learning_rate": 3.1304335045303724e-07, "loss": 0.4766, "step": 11351 }, { "epoch": 0.92, "grad_norm": 0.8859616279603214, "learning_rate": 3.1239017139084725e-07, "loss": 0.4254, "step": 11352 }, { "epoch": 0.92, "grad_norm": 0.8330376400127653, "learning_rate": 3.1173766367142534e-07, "loss": 0.4531, "step": 11353 }, { "epoch": 0.92, "grad_norm": 0.936319051976937, "learning_rate": 3.110858273399886e-07, "loss": 0.4438, "step": 11354 }, { "epoch": 0.92, "grad_norm": 0.9737792784501018, "learning_rate": 3.1043466244171204e-07, "loss": 0.5205, "step": 11355 }, { "epoch": 0.92, "grad_norm": 0.9971314327878192, "learning_rate": 3.097841690217174e-07, "loss": 0.5359, "step": 11356 }, { "epoch": 0.92, "grad_norm": 0.938727495682044, "learning_rate": 3.0913434712508406e-07, "loss": 0.4843, "step": 11357 }, { "epoch": 0.92, "grad_norm": 0.9856444957341504, "learning_rate": 3.0848519679684606e-07, "loss": 0.5354, "step": 11358 }, { "epoch": 0.92, "grad_norm": 0.8538657256871016, "learning_rate": 3.078367180819863e-07, "loss": 0.4759, "step": 11359 }, { "epoch": 0.92, "grad_norm": 0.956572623856092, "learning_rate": 3.0718891102544556e-07, "loss": 0.5073, "step": 11360 }, { "epoch": 0.92, "grad_norm": 0.8913295893657242, "learning_rate": 3.0654177567211675e-07, "loss": 0.4138, "step": 11361 }, { "epoch": 0.92, "grad_norm": 0.9338857544751735, "learning_rate": 3.0589531206684397e-07, "loss": 0.4947, "step": 11362 }, { "epoch": 0.92, "grad_norm": 0.9580188168889444, "learning_rate": 3.05249520254427e-07, "loss": 0.4949, "step": 11363 }, { "epoch": 0.92, "grad_norm": 0.9029820242373844, "learning_rate": 3.046044002796189e-07, "loss": 0.4515, "step": 11364 }, { "epoch": 0.92, "grad_norm": 1.0361265416089567, "learning_rate": 3.039599521871273e-07, "loss": 0.5229, "step": 11365 }, { "epoch": 0.92, "grad_norm": 0.9361873322359499, "learning_rate": 3.0331617602160965e-07, "loss": 0.509, "step": 11366 }, { "epoch": 0.92, "grad_norm": 0.9951052754963993, "learning_rate": 3.026730718276805e-07, "loss": 0.5192, "step": 11367 }, { "epoch": 0.92, "grad_norm": 0.9165161306116084, "learning_rate": 3.020306396499062e-07, "loss": 0.4799, "step": 11368 }, { "epoch": 0.92, "grad_norm": 0.9247733833045126, "learning_rate": 3.0138887953280573e-07, "loss": 0.4666, "step": 11369 }, { "epoch": 0.92, "grad_norm": 1.0017284450473003, "learning_rate": 3.0074779152085345e-07, "loss": 0.4935, "step": 11370 }, { "epoch": 0.92, "grad_norm": 0.8499168034190486, "learning_rate": 3.00107375658476e-07, "loss": 0.4418, "step": 11371 }, { "epoch": 0.92, "grad_norm": 0.9375159709601683, "learning_rate": 2.9946763199005356e-07, "loss": 0.5188, "step": 11372 }, { "epoch": 0.92, "grad_norm": 0.9849719113347157, "learning_rate": 2.988285605599206e-07, "loss": 0.4752, "step": 11373 }, { "epoch": 0.92, "grad_norm": 0.9466116530237808, "learning_rate": 2.981901614123617e-07, "loss": 0.513, "step": 11374 }, { "epoch": 0.92, "grad_norm": 0.9249846932225795, "learning_rate": 2.9755243459162144e-07, "loss": 0.475, "step": 11375 }, { "epoch": 0.92, "grad_norm": 1.1001174601861894, "learning_rate": 2.9691538014189005e-07, "loss": 0.5434, "step": 11376 }, { "epoch": 0.92, "grad_norm": 0.8908442139851055, "learning_rate": 2.9627899810731666e-07, "loss": 0.4922, "step": 11377 }, { "epoch": 0.92, "grad_norm": 1.006276155178277, "learning_rate": 2.956432885320004e-07, "loss": 0.5509, "step": 11378 }, { "epoch": 0.92, "grad_norm": 0.958713801071645, "learning_rate": 2.9500825145999723e-07, "loss": 0.4641, "step": 11379 }, { "epoch": 0.92, "grad_norm": 0.925297448457726, "learning_rate": 2.943738869353141e-07, "loss": 0.4948, "step": 11380 }, { "epoch": 0.92, "grad_norm": 0.9610141487552996, "learning_rate": 2.9374019500191255e-07, "loss": 0.5046, "step": 11381 }, { "epoch": 0.93, "grad_norm": 1.0200439829001273, "learning_rate": 2.9310717570370516e-07, "loss": 0.5139, "step": 11382 }, { "epoch": 0.93, "grad_norm": 0.8904018710302055, "learning_rate": 2.9247482908456027e-07, "loss": 0.4644, "step": 11383 }, { "epoch": 0.93, "grad_norm": 0.9344880041590723, "learning_rate": 2.918431551882994e-07, "loss": 0.465, "step": 11384 }, { "epoch": 0.93, "grad_norm": 0.9531718916462352, "learning_rate": 2.9121215405869653e-07, "loss": 0.5138, "step": 11385 }, { "epoch": 0.93, "grad_norm": 0.8398539963659245, "learning_rate": 2.905818257394799e-07, "loss": 0.4241, "step": 11386 }, { "epoch": 0.93, "grad_norm": 0.8422635777860722, "learning_rate": 2.899521702743313e-07, "loss": 0.4459, "step": 11387 }, { "epoch": 0.93, "grad_norm": 0.9634311386263213, "learning_rate": 2.8932318770688364e-07, "loss": 0.5256, "step": 11388 }, { "epoch": 0.93, "grad_norm": 0.9472397347375905, "learning_rate": 2.886948780807253e-07, "loss": 0.4538, "step": 11389 }, { "epoch": 0.93, "grad_norm": 0.9592197845636875, "learning_rate": 2.8806724143939814e-07, "loss": 0.5103, "step": 11390 }, { "epoch": 0.93, "grad_norm": 1.018974190459149, "learning_rate": 2.874402778263974e-07, "loss": 0.476, "step": 11391 }, { "epoch": 0.93, "grad_norm": 1.0259783487545626, "learning_rate": 2.868139872851694e-07, "loss": 0.5273, "step": 11392 }, { "epoch": 0.93, "grad_norm": 0.9251938911299553, "learning_rate": 2.8618836985911837e-07, "loss": 0.4512, "step": 11393 }, { "epoch": 0.93, "grad_norm": 0.9463515132362895, "learning_rate": 2.8556342559159513e-07, "loss": 0.519, "step": 11394 }, { "epoch": 0.93, "grad_norm": 0.9191038400094846, "learning_rate": 2.849391545259106e-07, "loss": 0.4614, "step": 11395 }, { "epoch": 0.93, "grad_norm": 1.0263031963280673, "learning_rate": 2.843155567053246e-07, "loss": 0.5306, "step": 11396 }, { "epoch": 0.93, "grad_norm": 0.8546711846548672, "learning_rate": 2.8369263217305374e-07, "loss": 0.442, "step": 11397 }, { "epoch": 0.93, "grad_norm": 0.9869733608981098, "learning_rate": 2.830703809722646e-07, "loss": 0.45, "step": 11398 }, { "epoch": 0.93, "grad_norm": 0.8344249591406251, "learning_rate": 2.8244880314607924e-07, "loss": 0.3998, "step": 11399 }, { "epoch": 0.93, "grad_norm": 1.027015109040339, "learning_rate": 2.8182789873757334e-07, "loss": 0.4416, "step": 11400 }, { "epoch": 0.93, "grad_norm": 0.9066407958880655, "learning_rate": 2.812076677897735e-07, "loss": 0.4828, "step": 11401 }, { "epoch": 0.93, "grad_norm": 0.9570317674310846, "learning_rate": 2.8058811034566094e-07, "loss": 0.5149, "step": 11402 }, { "epoch": 0.93, "grad_norm": 0.9276312174699367, "learning_rate": 2.7996922644817126e-07, "loss": 0.5011, "step": 11403 }, { "epoch": 0.93, "grad_norm": 0.9758733729187016, "learning_rate": 2.7935101614019354e-07, "loss": 0.5011, "step": 11404 }, { "epoch": 0.93, "grad_norm": 1.0575231778547123, "learning_rate": 2.7873347946456684e-07, "loss": 0.4679, "step": 11405 }, { "epoch": 0.93, "grad_norm": 0.9270474171243843, "learning_rate": 2.7811661646408915e-07, "loss": 0.4469, "step": 11406 }, { "epoch": 0.93, "grad_norm": 0.9410766556350038, "learning_rate": 2.7750042718150514e-07, "loss": 0.4999, "step": 11407 }, { "epoch": 0.93, "grad_norm": 0.8715469726023198, "learning_rate": 2.768849116595185e-07, "loss": 0.434, "step": 11408 }, { "epoch": 0.93, "grad_norm": 0.9514148647505117, "learning_rate": 2.762700699407828e-07, "loss": 0.4589, "step": 11409 }, { "epoch": 0.93, "grad_norm": 0.9448864220490569, "learning_rate": 2.7565590206790613e-07, "loss": 0.4982, "step": 11410 }, { "epoch": 0.93, "grad_norm": 0.9840067038967264, "learning_rate": 2.7504240808344906e-07, "loss": 0.497, "step": 11411 }, { "epoch": 0.93, "grad_norm": 0.9206231070024818, "learning_rate": 2.744295880299297e-07, "loss": 0.451, "step": 11412 }, { "epoch": 0.93, "grad_norm": 0.995856159062488, "learning_rate": 2.7381744194980963e-07, "loss": 0.5632, "step": 11413 }, { "epoch": 0.93, "grad_norm": 0.9625196276014706, "learning_rate": 2.732059698855172e-07, "loss": 0.5134, "step": 11414 }, { "epoch": 0.93, "grad_norm": 0.948921370108036, "learning_rate": 2.7259517187942174e-07, "loss": 0.4777, "step": 11415 }, { "epoch": 0.93, "grad_norm": 0.9561701392693762, "learning_rate": 2.7198504797385286e-07, "loss": 0.4564, "step": 11416 }, { "epoch": 0.93, "grad_norm": 1.013002742298461, "learning_rate": 2.7137559821109104e-07, "loss": 0.5106, "step": 11417 }, { "epoch": 0.93, "grad_norm": 0.9886559129055978, "learning_rate": 2.7076682263337264e-07, "loss": 0.5182, "step": 11418 }, { "epoch": 0.93, "grad_norm": 0.9297366408530876, "learning_rate": 2.701587212828816e-07, "loss": 0.4637, "step": 11419 }, { "epoch": 0.93, "grad_norm": 0.8628281199103401, "learning_rate": 2.6955129420176193e-07, "loss": 0.462, "step": 11420 }, { "epoch": 0.93, "grad_norm": 0.8915919614811436, "learning_rate": 2.689445414321057e-07, "loss": 0.4719, "step": 11421 }, { "epoch": 0.93, "grad_norm": 0.9077066766545857, "learning_rate": 2.6833846301596246e-07, "loss": 0.4627, "step": 11422 }, { "epoch": 0.93, "grad_norm": 0.9649476009742295, "learning_rate": 2.6773305899533084e-07, "loss": 0.5364, "step": 11423 }, { "epoch": 0.93, "grad_norm": 0.9958264945406542, "learning_rate": 2.6712832941216735e-07, "loss": 0.494, "step": 11424 }, { "epoch": 0.93, "grad_norm": 0.9259154281760127, "learning_rate": 2.6652427430837513e-07, "loss": 0.4736, "step": 11425 }, { "epoch": 0.93, "grad_norm": 0.9181811509348105, "learning_rate": 2.659208937258195e-07, "loss": 0.4899, "step": 11426 }, { "epoch": 0.93, "grad_norm": 0.9557135320962066, "learning_rate": 2.653181877063105e-07, "loss": 0.4539, "step": 11427 }, { "epoch": 0.93, "grad_norm": 0.9424434836374508, "learning_rate": 2.6471615629161564e-07, "loss": 0.4835, "step": 11428 }, { "epoch": 0.93, "grad_norm": 1.006653551445918, "learning_rate": 2.641147995234572e-07, "loss": 0.451, "step": 11429 }, { "epoch": 0.93, "grad_norm": 0.972800610895364, "learning_rate": 2.6351411744350853e-07, "loss": 0.4451, "step": 11430 }, { "epoch": 0.93, "grad_norm": 0.9801622688087769, "learning_rate": 2.6291411009339184e-07, "loss": 0.4533, "step": 11431 }, { "epoch": 0.93, "grad_norm": 0.9436460199587007, "learning_rate": 2.623147775146939e-07, "loss": 0.5323, "step": 11432 }, { "epoch": 0.93, "grad_norm": 0.9656819963627655, "learning_rate": 2.617161197489426e-07, "loss": 0.4455, "step": 11433 }, { "epoch": 0.93, "grad_norm": 0.8882390107869854, "learning_rate": 2.61118136837627e-07, "loss": 0.4535, "step": 11434 }, { "epoch": 0.93, "grad_norm": 0.9408173864700979, "learning_rate": 2.605208288221861e-07, "loss": 0.4936, "step": 11435 }, { "epoch": 0.93, "grad_norm": 0.9464672305317218, "learning_rate": 2.599241957440135e-07, "loss": 0.5004, "step": 11436 }, { "epoch": 0.93, "grad_norm": 0.9689635678352339, "learning_rate": 2.593282376444539e-07, "loss": 0.5032, "step": 11437 }, { "epoch": 0.93, "grad_norm": 0.9571939695204263, "learning_rate": 2.587329545648076e-07, "loss": 0.4825, "step": 11438 }, { "epoch": 0.93, "grad_norm": 1.0129788746581043, "learning_rate": 2.581383465463272e-07, "loss": 0.4993, "step": 11439 }, { "epoch": 0.93, "grad_norm": 0.9450247269430384, "learning_rate": 2.5754441363021854e-07, "loss": 0.4955, "step": 11440 }, { "epoch": 0.93, "grad_norm": 0.9431474803716476, "learning_rate": 2.5695115585763985e-07, "loss": 0.4658, "step": 11441 }, { "epoch": 0.93, "grad_norm": 0.9351517255823307, "learning_rate": 2.5635857326970494e-07, "loss": 0.4728, "step": 11442 }, { "epoch": 0.93, "grad_norm": 1.0523809194784852, "learning_rate": 2.5576666590747647e-07, "loss": 0.6033, "step": 11443 }, { "epoch": 0.93, "grad_norm": 0.9171374814882678, "learning_rate": 2.5517543381197715e-07, "loss": 0.4813, "step": 11444 }, { "epoch": 0.93, "grad_norm": 0.9422827978562496, "learning_rate": 2.5458487702417544e-07, "loss": 0.4849, "step": 11445 }, { "epoch": 0.93, "grad_norm": 0.9136321288515592, "learning_rate": 2.539949955849985e-07, "loss": 0.4322, "step": 11446 }, { "epoch": 0.93, "grad_norm": 0.9397003802292838, "learning_rate": 2.5340578953532256e-07, "loss": 0.4441, "step": 11447 }, { "epoch": 0.93, "grad_norm": 1.0308380015724954, "learning_rate": 2.5281725891598166e-07, "loss": 0.5122, "step": 11448 }, { "epoch": 0.93, "grad_norm": 0.8825546477677132, "learning_rate": 2.522294037677564e-07, "loss": 0.4516, "step": 11449 }, { "epoch": 0.93, "grad_norm": 0.8923664008410982, "learning_rate": 2.516422241313898e-07, "loss": 0.4327, "step": 11450 }, { "epoch": 0.93, "grad_norm": 0.9709155618170434, "learning_rate": 2.5105572004756827e-07, "loss": 0.4749, "step": 11451 }, { "epoch": 0.93, "grad_norm": 0.9445659754416634, "learning_rate": 2.504698915569392e-07, "loss": 0.4851, "step": 11452 }, { "epoch": 0.93, "grad_norm": 1.0358932911976195, "learning_rate": 2.49884738700098e-07, "loss": 0.4879, "step": 11453 }, { "epoch": 0.93, "grad_norm": 0.9421156716446982, "learning_rate": 2.493002615175977e-07, "loss": 0.5585, "step": 11454 }, { "epoch": 0.93, "grad_norm": 0.9830053855246939, "learning_rate": 2.487164600499381e-07, "loss": 0.5162, "step": 11455 }, { "epoch": 0.93, "grad_norm": 0.852493323525767, "learning_rate": 2.481333343375802e-07, "loss": 0.466, "step": 11456 }, { "epoch": 0.93, "grad_norm": 0.9097985826103769, "learning_rate": 2.475508844209318e-07, "loss": 0.5315, "step": 11457 }, { "epoch": 0.93, "grad_norm": 0.977593827825391, "learning_rate": 2.469691103403571e-07, "loss": 0.5219, "step": 11458 }, { "epoch": 0.93, "grad_norm": 0.8948840283251344, "learning_rate": 2.463880121361717e-07, "loss": 0.4955, "step": 11459 }, { "epoch": 0.93, "grad_norm": 0.9137935694279657, "learning_rate": 2.4580758984864675e-07, "loss": 0.4488, "step": 11460 }, { "epoch": 0.93, "grad_norm": 0.9623904350660415, "learning_rate": 2.452278435180011e-07, "loss": 0.4811, "step": 11461 }, { "epoch": 0.93, "grad_norm": 0.9768541913562684, "learning_rate": 2.44648773184416e-07, "loss": 0.5311, "step": 11462 }, { "epoch": 0.93, "grad_norm": 0.9811267247772674, "learning_rate": 2.440703788880172e-07, "loss": 0.5502, "step": 11463 }, { "epoch": 0.93, "grad_norm": 0.9739306300978986, "learning_rate": 2.4349266066888697e-07, "loss": 0.4836, "step": 11464 }, { "epoch": 0.93, "grad_norm": 0.885085228057854, "learning_rate": 2.4291561856706224e-07, "loss": 0.4172, "step": 11465 }, { "epoch": 0.93, "grad_norm": 1.0019125523263028, "learning_rate": 2.42339252622531e-07, "loss": 0.4975, "step": 11466 }, { "epoch": 0.93, "grad_norm": 1.0379549781503792, "learning_rate": 2.417635628752324e-07, "loss": 0.5312, "step": 11467 }, { "epoch": 0.93, "grad_norm": 0.9748225775151601, "learning_rate": 2.411885493650656e-07, "loss": 0.5303, "step": 11468 }, { "epoch": 0.93, "grad_norm": 0.9013719123124777, "learning_rate": 2.4061421213187553e-07, "loss": 0.4759, "step": 11469 }, { "epoch": 0.93, "grad_norm": 0.9363216769145684, "learning_rate": 2.4004055121546354e-07, "loss": 0.4897, "step": 11470 }, { "epoch": 0.93, "grad_norm": 0.9190009329597287, "learning_rate": 2.3946756665558457e-07, "loss": 0.5444, "step": 11471 }, { "epoch": 0.93, "grad_norm": 0.9129701848377056, "learning_rate": 2.3889525849194573e-07, "loss": 0.4854, "step": 11472 }, { "epoch": 0.93, "grad_norm": 0.9861834302459811, "learning_rate": 2.383236267642064e-07, "loss": 0.5198, "step": 11473 }, { "epoch": 0.93, "grad_norm": 0.9487541856662814, "learning_rate": 2.3775267151198268e-07, "loss": 0.4662, "step": 11474 }, { "epoch": 0.93, "grad_norm": 0.9313717143200725, "learning_rate": 2.3718239277483957e-07, "loss": 0.4983, "step": 11475 }, { "epoch": 0.93, "grad_norm": 0.9914572028788192, "learning_rate": 2.3661279059229547e-07, "loss": 0.5113, "step": 11476 }, { "epoch": 0.93, "grad_norm": 0.9384206526654694, "learning_rate": 2.3604386500382658e-07, "loss": 0.4799, "step": 11477 }, { "epoch": 0.93, "grad_norm": 0.91476640221212, "learning_rate": 2.3547561604885693e-07, "loss": 0.4808, "step": 11478 }, { "epoch": 0.93, "grad_norm": 0.959476425363997, "learning_rate": 2.34908043766765e-07, "loss": 0.5028, "step": 11479 }, { "epoch": 0.93, "grad_norm": 0.8586439006338363, "learning_rate": 2.3434114819688492e-07, "loss": 0.4727, "step": 11480 }, { "epoch": 0.93, "grad_norm": 0.9686850870111136, "learning_rate": 2.3377492937850077e-07, "loss": 0.4618, "step": 11481 }, { "epoch": 0.93, "grad_norm": 0.9513867379474965, "learning_rate": 2.332093873508512e-07, "loss": 0.4792, "step": 11482 }, { "epoch": 0.93, "grad_norm": 0.9789415893310047, "learning_rate": 2.3264452215312817e-07, "loss": 0.4585, "step": 11483 }, { "epoch": 0.93, "grad_norm": 0.8857061877304372, "learning_rate": 2.3208033382447703e-07, "loss": 0.4137, "step": 11484 }, { "epoch": 0.93, "grad_norm": 1.1075940052741962, "learning_rate": 2.315168224039932e-07, "loss": 0.524, "step": 11485 }, { "epoch": 0.93, "grad_norm": 1.0648445676642262, "learning_rate": 2.3095398793072988e-07, "loss": 0.4903, "step": 11486 }, { "epoch": 0.93, "grad_norm": 0.9380650984956824, "learning_rate": 2.3039183044368918e-07, "loss": 0.4808, "step": 11487 }, { "epoch": 0.93, "grad_norm": 0.9195255391855791, "learning_rate": 2.2983034998182997e-07, "loss": 0.5148, "step": 11488 }, { "epoch": 0.93, "grad_norm": 0.9767623140075157, "learning_rate": 2.292695465840611e-07, "loss": 0.509, "step": 11489 }, { "epoch": 0.93, "grad_norm": 0.9511340960467927, "learning_rate": 2.2870942028924592e-07, "loss": 0.4871, "step": 11490 }, { "epoch": 0.93, "grad_norm": 0.9257448355190214, "learning_rate": 2.2814997113620008e-07, "loss": 0.4549, "step": 11491 }, { "epoch": 0.93, "grad_norm": 0.9765459552883505, "learning_rate": 2.2759119916369475e-07, "loss": 0.4934, "step": 11492 }, { "epoch": 0.93, "grad_norm": 0.9485530275142726, "learning_rate": 2.2703310441045012e-07, "loss": 0.5296, "step": 11493 }, { "epoch": 0.93, "grad_norm": 0.9788948524867847, "learning_rate": 2.264756869151441e-07, "loss": 0.4941, "step": 11494 }, { "epoch": 0.93, "grad_norm": 0.976896256270598, "learning_rate": 2.2591894671640246e-07, "loss": 0.5197, "step": 11495 }, { "epoch": 0.93, "grad_norm": 0.9282784520344509, "learning_rate": 2.253628838528088e-07, "loss": 0.462, "step": 11496 }, { "epoch": 0.93, "grad_norm": 0.978036939703617, "learning_rate": 2.2480749836289672e-07, "loss": 0.5081, "step": 11497 }, { "epoch": 0.93, "grad_norm": 0.9645700321451685, "learning_rate": 2.2425279028515658e-07, "loss": 0.4979, "step": 11498 }, { "epoch": 0.93, "grad_norm": 0.904580208924153, "learning_rate": 2.2369875965802424e-07, "loss": 0.5012, "step": 11499 }, { "epoch": 0.93, "grad_norm": 1.0060872487309571, "learning_rate": 2.231454065198979e-07, "loss": 0.518, "step": 11500 }, { "epoch": 0.93, "grad_norm": 1.0372439651101641, "learning_rate": 2.225927309091225e-07, "loss": 0.545, "step": 11501 }, { "epoch": 0.93, "grad_norm": 0.7751415828819416, "learning_rate": 2.220407328639973e-07, "loss": 0.4071, "step": 11502 }, { "epoch": 0.93, "grad_norm": 0.9450030394120906, "learning_rate": 2.2148941242277732e-07, "loss": 0.45, "step": 11503 }, { "epoch": 0.93, "grad_norm": 0.9381623403458342, "learning_rate": 2.2093876962366755e-07, "loss": 0.4971, "step": 11504 }, { "epoch": 0.94, "grad_norm": 1.0936081725087992, "learning_rate": 2.2038880450482635e-07, "loss": 0.5285, "step": 11505 }, { "epoch": 0.94, "grad_norm": 0.9464753191272031, "learning_rate": 2.1983951710436768e-07, "loss": 0.5045, "step": 11506 }, { "epoch": 0.94, "grad_norm": 0.9439441443903467, "learning_rate": 2.1929090746035442e-07, "loss": 0.4977, "step": 11507 }, { "epoch": 0.94, "grad_norm": 0.9693561399893567, "learning_rate": 2.1874297561080616e-07, "loss": 0.4464, "step": 11508 }, { "epoch": 0.94, "grad_norm": 0.9700127600386009, "learning_rate": 2.1819572159369362e-07, "loss": 0.5189, "step": 11509 }, { "epoch": 0.94, "grad_norm": 0.9969850062131683, "learning_rate": 2.1764914544694203e-07, "loss": 0.5442, "step": 11510 }, { "epoch": 0.94, "grad_norm": 0.9328818708633978, "learning_rate": 2.1710324720842556e-07, "loss": 0.5314, "step": 11511 }, { "epoch": 0.94, "grad_norm": 0.9966265518299623, "learning_rate": 2.1655802691597837e-07, "loss": 0.4564, "step": 11512 }, { "epoch": 0.94, "grad_norm": 1.1030087220932174, "learning_rate": 2.1601348460738136e-07, "loss": 0.5491, "step": 11513 }, { "epoch": 0.94, "grad_norm": 1.0014104948023606, "learning_rate": 2.1546962032037211e-07, "loss": 0.4876, "step": 11514 }, { "epoch": 0.94, "grad_norm": 1.0001591152367717, "learning_rate": 2.1492643409263826e-07, "loss": 0.469, "step": 11515 }, { "epoch": 0.94, "grad_norm": 0.9063598650995034, "learning_rate": 2.1438392596182522e-07, "loss": 0.5024, "step": 11516 }, { "epoch": 0.94, "grad_norm": 0.969634253237918, "learning_rate": 2.1384209596552297e-07, "loss": 0.5086, "step": 11517 }, { "epoch": 0.94, "grad_norm": 0.9050830053343426, "learning_rate": 2.1330094414128588e-07, "loss": 0.4827, "step": 11518 }, { "epoch": 0.94, "grad_norm": 0.9616622729265908, "learning_rate": 2.1276047052661176e-07, "loss": 0.4942, "step": 11519 }, { "epoch": 0.94, "grad_norm": 0.9292719969220156, "learning_rate": 2.1222067515895618e-07, "loss": 0.5031, "step": 11520 }, { "epoch": 0.94, "grad_norm": 0.8583985298289181, "learning_rate": 2.1168155807572476e-07, "loss": 0.4731, "step": 11521 }, { "epoch": 0.94, "grad_norm": 1.0368097064485176, "learning_rate": 2.11143119314281e-07, "loss": 0.4492, "step": 11522 }, { "epoch": 0.94, "grad_norm": 0.9604901260255988, "learning_rate": 2.1060535891193502e-07, "loss": 0.5228, "step": 11523 }, { "epoch": 0.94, "grad_norm": 0.9001466531715809, "learning_rate": 2.1006827690595478e-07, "loss": 0.4525, "step": 11524 }, { "epoch": 0.94, "grad_norm": 0.9317278582374193, "learning_rate": 2.095318733335594e-07, "loss": 0.4114, "step": 11525 }, { "epoch": 0.94, "grad_norm": 0.933645590445262, "learning_rate": 2.089961482319214e-07, "loss": 0.4291, "step": 11526 }, { "epoch": 0.94, "grad_norm": 0.9756302117349039, "learning_rate": 2.0846110163816547e-07, "loss": 0.49, "step": 11527 }, { "epoch": 0.94, "grad_norm": 1.0351340753968052, "learning_rate": 2.0792673358936978e-07, "loss": 0.5204, "step": 11528 }, { "epoch": 0.94, "grad_norm": 0.893173614049143, "learning_rate": 2.0739304412256578e-07, "loss": 0.4897, "step": 11529 }, { "epoch": 0.94, "grad_norm": 0.93363362407905, "learning_rate": 2.0686003327473837e-07, "loss": 0.4362, "step": 11530 }, { "epoch": 0.94, "grad_norm": 0.94204371654142, "learning_rate": 2.0632770108282462e-07, "loss": 0.4555, "step": 11531 }, { "epoch": 0.94, "grad_norm": 0.9595420828187516, "learning_rate": 2.0579604758371286e-07, "loss": 0.496, "step": 11532 }, { "epoch": 0.94, "grad_norm": 0.9189135525666059, "learning_rate": 2.05265072814248e-07, "loss": 0.4318, "step": 11533 }, { "epoch": 0.94, "grad_norm": 0.8494707099918426, "learning_rate": 2.047347768112262e-07, "loss": 0.4793, "step": 11534 }, { "epoch": 0.94, "grad_norm": 1.0353601474519885, "learning_rate": 2.0420515961139475e-07, "loss": 0.4841, "step": 11535 }, { "epoch": 0.94, "grad_norm": 0.8312928039244486, "learning_rate": 2.0367622125145868e-07, "loss": 0.5352, "step": 11536 }, { "epoch": 0.94, "grad_norm": 0.9498648164663277, "learning_rate": 2.0314796176806984e-07, "loss": 0.4937, "step": 11537 }, { "epoch": 0.94, "grad_norm": 0.9609294146325073, "learning_rate": 2.0262038119783778e-07, "loss": 0.5138, "step": 11538 }, { "epoch": 0.94, "grad_norm": 0.9108235617292304, "learning_rate": 2.0209347957732328e-07, "loss": 0.5079, "step": 11539 }, { "epoch": 0.94, "grad_norm": 0.8957003143894978, "learning_rate": 2.0156725694303936e-07, "loss": 0.4517, "step": 11540 }, { "epoch": 0.94, "grad_norm": 0.9433740502699994, "learning_rate": 2.010417133314535e-07, "loss": 0.4947, "step": 11541 }, { "epoch": 0.94, "grad_norm": 1.0002718530207013, "learning_rate": 2.0051684877898547e-07, "loss": 0.4633, "step": 11542 }, { "epoch": 0.94, "grad_norm": 0.9221570895181908, "learning_rate": 1.999926633220084e-07, "loss": 0.4623, "step": 11543 }, { "epoch": 0.94, "grad_norm": 0.9640475017965786, "learning_rate": 1.9946915699684653e-07, "loss": 0.4682, "step": 11544 }, { "epoch": 0.94, "grad_norm": 0.9504884884382505, "learning_rate": 1.9894632983977868e-07, "loss": 0.4907, "step": 11545 }, { "epoch": 0.94, "grad_norm": 0.9108308409096453, "learning_rate": 1.9842418188703694e-07, "loss": 0.4612, "step": 11546 }, { "epoch": 0.94, "grad_norm": 0.967929844407366, "learning_rate": 1.9790271317480458e-07, "loss": 0.5103, "step": 11547 }, { "epoch": 0.94, "grad_norm": 0.9606433023604526, "learning_rate": 1.973819237392205e-07, "loss": 0.5287, "step": 11548 }, { "epoch": 0.94, "grad_norm": 0.8816364193756345, "learning_rate": 1.968618136163747e-07, "loss": 0.4454, "step": 11549 }, { "epoch": 0.94, "grad_norm": 0.977224015587263, "learning_rate": 1.9634238284230945e-07, "loss": 0.4818, "step": 11550 }, { "epoch": 0.94, "grad_norm": 1.1635547112143767, "learning_rate": 1.9582363145302152e-07, "loss": 0.4756, "step": 11551 }, { "epoch": 0.94, "grad_norm": 1.029802951948784, "learning_rate": 1.9530555948445883e-07, "loss": 0.5323, "step": 11552 }, { "epoch": 0.94, "grad_norm": 1.0420832717806676, "learning_rate": 1.947881669725249e-07, "loss": 0.4768, "step": 11553 }, { "epoch": 0.94, "grad_norm": 0.8901052369947996, "learning_rate": 1.9427145395307322e-07, "loss": 0.4326, "step": 11554 }, { "epoch": 0.94, "grad_norm": 1.0468425019933045, "learning_rate": 1.9375542046191297e-07, "loss": 0.5095, "step": 11555 }, { "epoch": 0.94, "grad_norm": 0.8997505720146292, "learning_rate": 1.9324006653480332e-07, "loss": 0.4905, "step": 11556 }, { "epoch": 0.94, "grad_norm": 0.9338220987399636, "learning_rate": 1.927253922074579e-07, "loss": 0.4623, "step": 11557 }, { "epoch": 0.94, "grad_norm": 0.8516812243067089, "learning_rate": 1.9221139751554373e-07, "loss": 0.4504, "step": 11558 }, { "epoch": 0.94, "grad_norm": 0.9289614121327456, "learning_rate": 1.9169808249468125e-07, "loss": 0.5018, "step": 11559 }, { "epoch": 0.94, "grad_norm": 0.9779634493064184, "learning_rate": 1.9118544718044084e-07, "loss": 0.5542, "step": 11560 }, { "epoch": 0.94, "grad_norm": 0.8994294029334284, "learning_rate": 1.906734916083497e-07, "loss": 0.4226, "step": 11561 }, { "epoch": 0.94, "grad_norm": 0.9682669746822895, "learning_rate": 1.9016221581388272e-07, "loss": 0.5273, "step": 11562 }, { "epoch": 0.94, "grad_norm": 0.9761134799360173, "learning_rate": 1.8965161983247494e-07, "loss": 0.5418, "step": 11563 }, { "epoch": 0.94, "grad_norm": 1.0555396355770037, "learning_rate": 1.891417036995069e-07, "loss": 0.5418, "step": 11564 }, { "epoch": 0.94, "grad_norm": 0.9106038553461743, "learning_rate": 1.8863246745031704e-07, "loss": 0.5275, "step": 11565 }, { "epoch": 0.94, "grad_norm": 0.9721292591901912, "learning_rate": 1.881239111201949e-07, "loss": 0.4808, "step": 11566 }, { "epoch": 0.94, "grad_norm": 1.2443193603745144, "learning_rate": 1.876160347443823e-07, "loss": 0.4499, "step": 11567 }, { "epoch": 0.94, "grad_norm": 0.9113977420502493, "learning_rate": 1.8710883835807437e-07, "loss": 0.4509, "step": 11568 }, { "epoch": 0.94, "grad_norm": 0.9157774007045484, "learning_rate": 1.866023219964208e-07, "loss": 0.4312, "step": 11569 }, { "epoch": 0.94, "grad_norm": 0.9780163504554356, "learning_rate": 1.8609648569452132e-07, "loss": 0.4702, "step": 11570 }, { "epoch": 0.94, "grad_norm": 0.9718901158746666, "learning_rate": 1.8559132948743007e-07, "loss": 0.4922, "step": 11571 }, { "epoch": 0.94, "grad_norm": 1.0308700763315264, "learning_rate": 1.8508685341015465e-07, "loss": 0.4986, "step": 11572 }, { "epoch": 0.94, "grad_norm": 0.8754438065720972, "learning_rate": 1.845830574976548e-07, "loss": 0.4633, "step": 11573 }, { "epoch": 0.94, "grad_norm": 0.9917172090708526, "learning_rate": 1.8407994178484155e-07, "loss": 0.4981, "step": 11574 }, { "epoch": 0.94, "grad_norm": 0.897217849065791, "learning_rate": 1.8357750630658367e-07, "loss": 0.4333, "step": 11575 }, { "epoch": 0.94, "grad_norm": 0.8707656123011874, "learning_rate": 1.830757510976966e-07, "loss": 0.4689, "step": 11576 }, { "epoch": 0.94, "grad_norm": 0.9677192816664028, "learning_rate": 1.8257467619295143e-07, "loss": 0.5139, "step": 11577 }, { "epoch": 0.94, "grad_norm": 1.0527355591460235, "learning_rate": 1.8207428162707374e-07, "loss": 0.497, "step": 11578 }, { "epoch": 0.94, "grad_norm": 0.9315945499644502, "learning_rate": 1.8157456743474133e-07, "loss": 0.5082, "step": 11579 }, { "epoch": 0.94, "grad_norm": 0.8556914362013863, "learning_rate": 1.8107553365057983e-07, "loss": 0.4383, "step": 11580 }, { "epoch": 0.94, "grad_norm": 0.9714958630042003, "learning_rate": 1.8057718030917714e-07, "loss": 0.4837, "step": 11581 }, { "epoch": 0.94, "grad_norm": 0.7953942637319843, "learning_rate": 1.8007950744506454e-07, "loss": 0.4541, "step": 11582 }, { "epoch": 0.94, "grad_norm": 1.0624559488720686, "learning_rate": 1.7958251509273106e-07, "loss": 0.5234, "step": 11583 }, { "epoch": 0.94, "grad_norm": 1.0426144132894177, "learning_rate": 1.790862032866203e-07, "loss": 0.4869, "step": 11584 }, { "epoch": 0.94, "grad_norm": 0.8442765483827533, "learning_rate": 1.7859057206112361e-07, "loss": 0.3807, "step": 11585 }, { "epoch": 0.94, "grad_norm": 1.1444035809631279, "learning_rate": 1.78095621450588e-07, "loss": 0.5447, "step": 11586 }, { "epoch": 0.94, "grad_norm": 0.9206041439662114, "learning_rate": 1.776013514893149e-07, "loss": 0.4671, "step": 11587 }, { "epoch": 0.94, "grad_norm": 1.0077606054336101, "learning_rate": 1.7710776221155578e-07, "loss": 0.4941, "step": 11588 }, { "epoch": 0.94, "grad_norm": 0.8720283736829387, "learning_rate": 1.7661485365151553e-07, "loss": 0.4178, "step": 11589 }, { "epoch": 0.94, "grad_norm": 1.0005935424660755, "learning_rate": 1.761226258433524e-07, "loss": 0.5458, "step": 11590 }, { "epoch": 0.94, "grad_norm": 0.9880090308093082, "learning_rate": 1.756310788211779e-07, "loss": 0.4767, "step": 11591 }, { "epoch": 0.94, "grad_norm": 0.9555884247413868, "learning_rate": 1.751402126190549e-07, "loss": 0.5261, "step": 11592 }, { "epoch": 0.94, "grad_norm": 0.9378522243429254, "learning_rate": 1.7465002727100055e-07, "loss": 0.4732, "step": 11593 }, { "epoch": 0.94, "grad_norm": 1.0058916977963035, "learning_rate": 1.741605228109844e-07, "loss": 0.5062, "step": 11594 }, { "epoch": 0.94, "grad_norm": 0.9147081497526133, "learning_rate": 1.7367169927292925e-07, "loss": 0.4483, "step": 11595 }, { "epoch": 0.94, "grad_norm": 0.8592595462485942, "learning_rate": 1.7318355669070807e-07, "loss": 0.4526, "step": 11596 }, { "epoch": 0.94, "grad_norm": 0.831385833810018, "learning_rate": 1.7269609509815156e-07, "loss": 0.43, "step": 11597 }, { "epoch": 0.94, "grad_norm": 0.924732035163592, "learning_rate": 1.7220931452903712e-07, "loss": 0.4925, "step": 11598 }, { "epoch": 0.94, "grad_norm": 0.9140520056972847, "learning_rate": 1.7172321501710109e-07, "loss": 0.4782, "step": 11599 }, { "epoch": 0.94, "grad_norm": 0.975889917886747, "learning_rate": 1.712377965960288e-07, "loss": 0.5251, "step": 11600 }, { "epoch": 0.94, "grad_norm": 0.9777399777316796, "learning_rate": 1.7075305929945775e-07, "loss": 0.4418, "step": 11601 }, { "epoch": 0.94, "grad_norm": 0.9104619502187835, "learning_rate": 1.7026900316098217e-07, "loss": 0.4639, "step": 11602 }, { "epoch": 0.94, "grad_norm": 1.0389219870998976, "learning_rate": 1.6978562821414635e-07, "loss": 0.5252, "step": 11603 }, { "epoch": 0.94, "grad_norm": 0.8893812909710748, "learning_rate": 1.6930293449244573e-07, "loss": 0.4562, "step": 11604 }, { "epoch": 0.94, "grad_norm": 1.0139611356319098, "learning_rate": 1.6882092202933242e-07, "loss": 0.5476, "step": 11605 }, { "epoch": 0.94, "grad_norm": 0.9920808416502068, "learning_rate": 1.683395908582097e-07, "loss": 0.5021, "step": 11606 }, { "epoch": 0.94, "grad_norm": 0.9401684008955127, "learning_rate": 1.6785894101243205e-07, "loss": 0.4315, "step": 11607 }, { "epoch": 0.94, "grad_norm": 1.0326745169755602, "learning_rate": 1.6737897252530832e-07, "loss": 0.4319, "step": 11608 }, { "epoch": 0.94, "grad_norm": 1.4288513899528752, "learning_rate": 1.6689968543010082e-07, "loss": 0.4886, "step": 11609 }, { "epoch": 0.94, "grad_norm": 0.8585485958823579, "learning_rate": 1.6642107976002186e-07, "loss": 0.4083, "step": 11610 }, { "epoch": 0.94, "grad_norm": 0.9069449809793708, "learning_rate": 1.659431555482416e-07, "loss": 0.4786, "step": 11611 }, { "epoch": 0.94, "grad_norm": 1.1631691820206738, "learning_rate": 1.6546591282787683e-07, "loss": 0.5776, "step": 11612 }, { "epoch": 0.94, "grad_norm": 1.0616439313560415, "learning_rate": 1.6498935163200114e-07, "loss": 0.5342, "step": 11613 }, { "epoch": 0.94, "grad_norm": 0.8923630631274349, "learning_rate": 1.6451347199364032e-07, "loss": 0.517, "step": 11614 }, { "epoch": 0.94, "grad_norm": 0.9362423351584785, "learning_rate": 1.6403827394577244e-07, "loss": 0.4897, "step": 11615 }, { "epoch": 0.94, "grad_norm": 0.9151508211734752, "learning_rate": 1.6356375752132558e-07, "loss": 0.4951, "step": 11616 }, { "epoch": 0.94, "grad_norm": 0.8389818007257338, "learning_rate": 1.630899227531868e-07, "loss": 0.4103, "step": 11617 }, { "epoch": 0.94, "grad_norm": 0.9323848955910646, "learning_rate": 1.6261676967419094e-07, "loss": 0.468, "step": 11618 }, { "epoch": 0.94, "grad_norm": 0.8786132211991765, "learning_rate": 1.621442983171262e-07, "loss": 0.4523, "step": 11619 }, { "epoch": 0.94, "grad_norm": 0.8676920931406709, "learning_rate": 1.616725087147364e-07, "loss": 0.49, "step": 11620 }, { "epoch": 0.94, "grad_norm": 0.9795059798461183, "learning_rate": 1.6120140089971536e-07, "loss": 0.5262, "step": 11621 }, { "epoch": 0.94, "grad_norm": 1.0434011389872648, "learning_rate": 1.6073097490470924e-07, "loss": 0.5017, "step": 11622 }, { "epoch": 0.94, "grad_norm": 0.9551902553910484, "learning_rate": 1.602612307623208e-07, "loss": 0.4933, "step": 11623 }, { "epoch": 0.94, "grad_norm": 0.9575849488802687, "learning_rate": 1.5979216850509848e-07, "loss": 0.4581, "step": 11624 }, { "epoch": 0.94, "grad_norm": 0.903798519803686, "learning_rate": 1.5932378816555405e-07, "loss": 0.4304, "step": 11625 }, { "epoch": 0.94, "grad_norm": 0.903238053594976, "learning_rate": 1.5885608977614043e-07, "loss": 0.4708, "step": 11626 }, { "epoch": 0.94, "grad_norm": 1.0195498094816038, "learning_rate": 1.5838907336927055e-07, "loss": 0.493, "step": 11627 }, { "epoch": 0.95, "grad_norm": 0.948691606472617, "learning_rate": 1.5792273897730858e-07, "loss": 0.5152, "step": 11628 }, { "epoch": 0.95, "grad_norm": 0.9576141491811475, "learning_rate": 1.5745708663257199e-07, "loss": 0.4619, "step": 11629 }, { "epoch": 0.95, "grad_norm": 0.9671330738151036, "learning_rate": 1.5699211636732714e-07, "loss": 0.4614, "step": 11630 }, { "epoch": 0.95, "grad_norm": 0.9491379972972037, "learning_rate": 1.5652782821379942e-07, "loss": 0.4687, "step": 11631 }, { "epoch": 0.95, "grad_norm": 0.8516708278279836, "learning_rate": 1.5606422220416196e-07, "loss": 0.4502, "step": 11632 }, { "epoch": 0.95, "grad_norm": 0.922946174629565, "learning_rate": 1.5560129837054127e-07, "loss": 0.4468, "step": 11633 }, { "epoch": 0.95, "grad_norm": 0.893755085344053, "learning_rate": 1.551390567450195e-07, "loss": 0.4817, "step": 11634 }, { "epoch": 0.95, "grad_norm": 1.0082700354791259, "learning_rate": 1.5467749735962878e-07, "loss": 0.4937, "step": 11635 }, { "epoch": 0.95, "grad_norm": 0.9353409160951671, "learning_rate": 1.5421662024635353e-07, "loss": 0.4265, "step": 11636 }, { "epoch": 0.95, "grad_norm": 0.9418824879588475, "learning_rate": 1.5375642543713488e-07, "loss": 0.4844, "step": 11637 }, { "epoch": 0.95, "grad_norm": 0.9828534306732699, "learning_rate": 1.5329691296386174e-07, "loss": 0.4977, "step": 11638 }, { "epoch": 0.95, "grad_norm": 1.044063249037734, "learning_rate": 1.5283808285837754e-07, "loss": 0.5204, "step": 11639 }, { "epoch": 0.95, "grad_norm": 1.027630071859994, "learning_rate": 1.5237993515248017e-07, "loss": 0.4924, "step": 11640 }, { "epoch": 0.95, "grad_norm": 0.9350323418549664, "learning_rate": 1.519224698779198e-07, "loss": 0.5401, "step": 11641 }, { "epoch": 0.95, "grad_norm": 0.9073285796594803, "learning_rate": 1.5146568706639552e-07, "loss": 0.4147, "step": 11642 }, { "epoch": 0.95, "grad_norm": 0.9303432068459472, "learning_rate": 1.5100958674956424e-07, "loss": 0.4919, "step": 11643 }, { "epoch": 0.95, "grad_norm": 0.8080326063767643, "learning_rate": 1.5055416895903284e-07, "loss": 0.413, "step": 11644 }, { "epoch": 0.95, "grad_norm": 0.8638585497713773, "learning_rate": 1.500994337263606e-07, "loss": 0.4604, "step": 11645 }, { "epoch": 0.95, "grad_norm": 1.0138512478569417, "learning_rate": 1.4964538108306004e-07, "loss": 0.4739, "step": 11646 }, { "epoch": 0.95, "grad_norm": 1.0237844054054825, "learning_rate": 1.4919201106059932e-07, "loss": 0.5367, "step": 11647 }, { "epoch": 0.95, "grad_norm": 0.9469968312258004, "learning_rate": 1.4873932369039223e-07, "loss": 0.4471, "step": 11648 }, { "epoch": 0.95, "grad_norm": 0.9348968855520209, "learning_rate": 1.4828731900381366e-07, "loss": 0.4464, "step": 11649 }, { "epoch": 0.95, "grad_norm": 0.9454783504849241, "learning_rate": 1.478359970321852e-07, "loss": 0.4805, "step": 11650 }, { "epoch": 0.95, "grad_norm": 0.9696086869071582, "learning_rate": 1.47385357806783e-07, "loss": 0.4803, "step": 11651 }, { "epoch": 0.95, "grad_norm": 0.9258787941079295, "learning_rate": 1.4693540135883533e-07, "loss": 0.4343, "step": 11652 }, { "epoch": 0.95, "grad_norm": 0.859426434883943, "learning_rate": 1.4648612771952618e-07, "loss": 0.4454, "step": 11653 }, { "epoch": 0.95, "grad_norm": 0.9625867462394057, "learning_rate": 1.4603753691998735e-07, "loss": 0.4783, "step": 11654 }, { "epoch": 0.95, "grad_norm": 0.9656030188852532, "learning_rate": 1.4558962899130724e-07, "loss": 0.5484, "step": 11655 }, { "epoch": 0.95, "grad_norm": 0.9501134464292218, "learning_rate": 1.4514240396452438e-07, "loss": 0.4818, "step": 11656 }, { "epoch": 0.95, "grad_norm": 1.0097149440429334, "learning_rate": 1.4469586187063289e-07, "loss": 0.524, "step": 11657 }, { "epoch": 0.95, "grad_norm": 1.0225104022893932, "learning_rate": 1.4425000274057577e-07, "loss": 0.5142, "step": 11658 }, { "epoch": 0.95, "grad_norm": 0.8977241993375531, "learning_rate": 1.4380482660525164e-07, "loss": 0.4279, "step": 11659 }, { "epoch": 0.95, "grad_norm": 0.9465529505683918, "learning_rate": 1.4336033349550916e-07, "loss": 0.4816, "step": 11660 }, { "epoch": 0.95, "grad_norm": 0.9588676788041502, "learning_rate": 1.429165234421548e-07, "loss": 0.4907, "step": 11661 }, { "epoch": 0.95, "grad_norm": 0.9247383698200462, "learning_rate": 1.424733964759406e-07, "loss": 0.4896, "step": 11662 }, { "epoch": 0.95, "grad_norm": 0.9732645453172724, "learning_rate": 1.420309526275776e-07, "loss": 0.4748, "step": 11663 }, { "epoch": 0.95, "grad_norm": 0.8902318164715665, "learning_rate": 1.4158919192772458e-07, "loss": 0.4787, "step": 11664 }, { "epoch": 0.95, "grad_norm": 0.9836209649754226, "learning_rate": 1.4114811440699706e-07, "loss": 0.4465, "step": 11665 }, { "epoch": 0.95, "grad_norm": 0.9865694640029874, "learning_rate": 1.4070772009595944e-07, "loss": 0.446, "step": 11666 }, { "epoch": 0.95, "grad_norm": 1.1353592274720519, "learning_rate": 1.4026800902513293e-07, "loss": 0.5372, "step": 11667 }, { "epoch": 0.95, "grad_norm": 0.9894621895910385, "learning_rate": 1.3982898122498755e-07, "loss": 0.4594, "step": 11668 }, { "epoch": 0.95, "grad_norm": 0.8808762541092449, "learning_rate": 1.3939063672594677e-07, "loss": 0.415, "step": 11669 }, { "epoch": 0.95, "grad_norm": 0.9543222001061445, "learning_rate": 1.389529755583885e-07, "loss": 0.5108, "step": 11670 }, { "epoch": 0.95, "grad_norm": 0.9031544153763041, "learning_rate": 1.3851599775264403e-07, "loss": 0.4892, "step": 11671 }, { "epoch": 0.95, "grad_norm": 1.051975022845536, "learning_rate": 1.3807970333899133e-07, "loss": 0.5505, "step": 11672 }, { "epoch": 0.95, "grad_norm": 0.9494252597569944, "learning_rate": 1.3764409234766962e-07, "loss": 0.4922, "step": 11673 }, { "epoch": 0.95, "grad_norm": 0.9844827083958534, "learning_rate": 1.3720916480886359e-07, "loss": 0.5427, "step": 11674 }, { "epoch": 0.95, "grad_norm": 1.0213755760482455, "learning_rate": 1.367749207527147e-07, "loss": 0.4659, "step": 11675 }, { "epoch": 0.95, "grad_norm": 1.1219645031442136, "learning_rate": 1.3634136020931444e-07, "loss": 0.5685, "step": 11676 }, { "epoch": 0.95, "grad_norm": 0.9198137919526792, "learning_rate": 1.3590848320870874e-07, "loss": 0.4975, "step": 11677 }, { "epoch": 0.95, "grad_norm": 0.9814407636972431, "learning_rate": 1.35476289780897e-07, "loss": 0.4589, "step": 11678 }, { "epoch": 0.95, "grad_norm": 0.9265380091276773, "learning_rate": 1.3504477995582744e-07, "loss": 0.4739, "step": 11679 }, { "epoch": 0.95, "grad_norm": 0.9235688603751667, "learning_rate": 1.3461395376340502e-07, "loss": 0.5288, "step": 11680 }, { "epoch": 0.95, "grad_norm": 0.9593894678972065, "learning_rate": 1.3418381123348477e-07, "loss": 0.4638, "step": 11681 }, { "epoch": 0.95, "grad_norm": 0.9690623318955943, "learning_rate": 1.337543523958751e-07, "loss": 0.5005, "step": 11682 }, { "epoch": 0.95, "grad_norm": 0.846608089060443, "learning_rate": 1.333255772803377e-07, "loss": 0.408, "step": 11683 }, { "epoch": 0.95, "grad_norm": 0.969082169503564, "learning_rate": 1.3289748591658546e-07, "loss": 0.4923, "step": 11684 }, { "epoch": 0.95, "grad_norm": 0.8407132879000558, "learning_rate": 1.3247007833428694e-07, "loss": 0.4794, "step": 11685 }, { "epoch": 0.95, "grad_norm": 1.0111315851398217, "learning_rate": 1.320433545630584e-07, "loss": 0.532, "step": 11686 }, { "epoch": 0.95, "grad_norm": 1.0221013452148755, "learning_rate": 1.3161731463247284e-07, "loss": 0.4515, "step": 11687 }, { "epoch": 0.95, "grad_norm": 0.8748986501368735, "learning_rate": 1.3119195857205337e-07, "loss": 0.4978, "step": 11688 }, { "epoch": 0.95, "grad_norm": 0.9497318881951801, "learning_rate": 1.3076728641127857e-07, "loss": 0.478, "step": 11689 }, { "epoch": 0.95, "grad_norm": 0.915779655254726, "learning_rate": 1.3034329817957603e-07, "loss": 0.424, "step": 11690 }, { "epoch": 0.95, "grad_norm": 0.9553825667674619, "learning_rate": 1.2991999390632892e-07, "loss": 0.4347, "step": 11691 }, { "epoch": 0.95, "grad_norm": 0.9041932919429256, "learning_rate": 1.2949737362087156e-07, "loss": 0.4739, "step": 11692 }, { "epoch": 0.95, "grad_norm": 0.8968792184451714, "learning_rate": 1.2907543735249163e-07, "loss": 0.4658, "step": 11693 }, { "epoch": 0.95, "grad_norm": 1.0653933580624912, "learning_rate": 1.286541851304268e-07, "loss": 0.5547, "step": 11694 }, { "epoch": 0.95, "grad_norm": 0.9609039869721445, "learning_rate": 1.282336169838727e-07, "loss": 0.4894, "step": 11695 }, { "epoch": 0.95, "grad_norm": 1.020385805816101, "learning_rate": 1.278137329419715e-07, "loss": 0.5107, "step": 11696 }, { "epoch": 0.95, "grad_norm": 0.8729444977310155, "learning_rate": 1.2739453303382222e-07, "loss": 0.4691, "step": 11697 }, { "epoch": 0.95, "grad_norm": 0.9736436538183029, "learning_rate": 1.2697601728847596e-07, "loss": 0.4704, "step": 11698 }, { "epoch": 0.95, "grad_norm": 1.009484260023586, "learning_rate": 1.2655818573493295e-07, "loss": 0.4842, "step": 11699 }, { "epoch": 0.95, "grad_norm": 0.9396092829306968, "learning_rate": 1.261410384021511e-07, "loss": 0.5107, "step": 11700 }, { "epoch": 0.95, "grad_norm": 0.872628805875551, "learning_rate": 1.2572457531903614e-07, "loss": 0.4342, "step": 11701 }, { "epoch": 0.95, "grad_norm": 0.9414232683288218, "learning_rate": 1.2530879651444949e-07, "loss": 0.4543, "step": 11702 }, { "epoch": 0.95, "grad_norm": 0.864248669810779, "learning_rate": 1.2489370201720473e-07, "loss": 0.4684, "step": 11703 }, { "epoch": 0.95, "grad_norm": 0.9694194096349622, "learning_rate": 1.2447929185606778e-07, "loss": 0.4468, "step": 11704 }, { "epoch": 0.95, "grad_norm": 1.0015075266884892, "learning_rate": 1.2406556605975673e-07, "loss": 0.5852, "step": 11705 }, { "epoch": 0.95, "grad_norm": 0.9396979059242895, "learning_rate": 1.2365252465694088e-07, "loss": 0.4968, "step": 11706 }, { "epoch": 0.95, "grad_norm": 0.9359777283532162, "learning_rate": 1.2324016767624515e-07, "loss": 0.4768, "step": 11707 }, { "epoch": 0.95, "grad_norm": 1.0363612056953866, "learning_rate": 1.228284951462444e-07, "loss": 0.5069, "step": 11708 }, { "epoch": 0.95, "grad_norm": 0.9450225369152239, "learning_rate": 1.2241750709546918e-07, "loss": 0.4555, "step": 11709 }, { "epoch": 0.95, "grad_norm": 0.945398340483411, "learning_rate": 1.2200720355239893e-07, "loss": 0.4209, "step": 11710 }, { "epoch": 0.95, "grad_norm": 0.9641827946918541, "learning_rate": 1.2159758454546643e-07, "loss": 0.465, "step": 11711 }, { "epoch": 0.95, "grad_norm": 1.0174498372544294, "learning_rate": 1.2118865010306124e-07, "loss": 0.5257, "step": 11712 }, { "epoch": 0.95, "grad_norm": 0.8686355378031086, "learning_rate": 1.2078040025351844e-07, "loss": 0.4278, "step": 11713 }, { "epoch": 0.95, "grad_norm": 1.1133168236049928, "learning_rate": 1.2037283502513208e-07, "loss": 0.4626, "step": 11714 }, { "epoch": 0.95, "grad_norm": 0.9425915566814234, "learning_rate": 1.1996595444614511e-07, "loss": 0.4682, "step": 11715 }, { "epoch": 0.95, "grad_norm": 0.8035223312922553, "learning_rate": 1.1955975854475388e-07, "loss": 0.479, "step": 11716 }, { "epoch": 0.95, "grad_norm": 0.8922305833970967, "learning_rate": 1.1915424734910585e-07, "loss": 0.4497, "step": 11717 }, { "epoch": 0.95, "grad_norm": 0.9559911600741078, "learning_rate": 1.1874942088730635e-07, "loss": 0.4816, "step": 11718 }, { "epoch": 0.95, "grad_norm": 0.9758579168957457, "learning_rate": 1.1834527918740624e-07, "loss": 0.5539, "step": 11719 }, { "epoch": 0.95, "grad_norm": 1.0506259051311835, "learning_rate": 1.1794182227741314e-07, "loss": 0.4521, "step": 11720 }, { "epoch": 0.95, "grad_norm": 0.9705461383571655, "learning_rate": 1.1753905018528688e-07, "loss": 0.5648, "step": 11721 }, { "epoch": 0.95, "grad_norm": 1.0157017913842958, "learning_rate": 1.171369629389385e-07, "loss": 0.5057, "step": 11722 }, { "epoch": 0.95, "grad_norm": 0.9483830944456155, "learning_rate": 1.1673556056623237e-07, "loss": 0.4671, "step": 11723 }, { "epoch": 0.95, "grad_norm": 0.9072532108846191, "learning_rate": 1.1633484309498511e-07, "loss": 0.4809, "step": 11724 }, { "epoch": 0.95, "grad_norm": 0.9568152372299135, "learning_rate": 1.1593481055296673e-07, "loss": 0.4797, "step": 11725 }, { "epoch": 0.95, "grad_norm": 0.8821466617527249, "learning_rate": 1.1553546296789952e-07, "loss": 0.4821, "step": 11726 }, { "epoch": 0.95, "grad_norm": 0.9175803795045503, "learning_rate": 1.1513680036745578e-07, "loss": 0.4646, "step": 11727 }, { "epoch": 0.95, "grad_norm": 1.0322661343458974, "learning_rate": 1.1473882277926562e-07, "loss": 0.4446, "step": 11728 }, { "epoch": 0.95, "grad_norm": 1.0408277460676767, "learning_rate": 1.1434153023090589e-07, "loss": 0.5255, "step": 11729 }, { "epoch": 0.95, "grad_norm": 1.027227376134949, "learning_rate": 1.1394492274991009e-07, "loss": 0.5529, "step": 11730 }, { "epoch": 0.95, "grad_norm": 0.9732458654486648, "learning_rate": 1.1354900036376181e-07, "loss": 0.5195, "step": 11731 }, { "epoch": 0.95, "grad_norm": 1.0535313265351196, "learning_rate": 1.13153763099898e-07, "loss": 0.5504, "step": 11732 }, { "epoch": 0.95, "grad_norm": 0.9198569397489443, "learning_rate": 1.1275921098570896e-07, "loss": 0.4748, "step": 11733 }, { "epoch": 0.95, "grad_norm": 0.917712254839116, "learning_rate": 1.1236534404853727e-07, "loss": 0.4632, "step": 11734 }, { "epoch": 0.95, "grad_norm": 0.856909983032463, "learning_rate": 1.1197216231567664e-07, "loss": 0.4514, "step": 11735 }, { "epoch": 0.95, "grad_norm": 0.9469785652163093, "learning_rate": 1.1157966581437419e-07, "loss": 0.4618, "step": 11736 }, { "epoch": 0.95, "grad_norm": 0.9599933656874255, "learning_rate": 1.1118785457183034e-07, "loss": 0.4708, "step": 11737 }, { "epoch": 0.95, "grad_norm": 1.094933152970052, "learning_rate": 1.1079672861519675e-07, "loss": 0.56, "step": 11738 }, { "epoch": 0.95, "grad_norm": 1.073555695579005, "learning_rate": 1.1040628797157727e-07, "loss": 0.4836, "step": 11739 }, { "epoch": 0.95, "grad_norm": 0.9336131282070538, "learning_rate": 1.1001653266803136e-07, "loss": 0.4616, "step": 11740 }, { "epoch": 0.95, "grad_norm": 0.9517994592150936, "learning_rate": 1.0962746273156633e-07, "loss": 0.5279, "step": 11741 }, { "epoch": 0.95, "grad_norm": 0.9092708185894586, "learning_rate": 1.0923907818914614e-07, "loss": 0.4783, "step": 11742 }, { "epoch": 0.95, "grad_norm": 0.9137401074747392, "learning_rate": 1.0885137906768373e-07, "loss": 0.4915, "step": 11743 }, { "epoch": 0.95, "grad_norm": 0.886062036636107, "learning_rate": 1.084643653940487e-07, "loss": 0.41, "step": 11744 }, { "epoch": 0.95, "grad_norm": 1.0168251352297248, "learning_rate": 1.0807803719505849e-07, "loss": 0.4606, "step": 11745 }, { "epoch": 0.95, "grad_norm": 0.8764053557786847, "learning_rate": 1.0769239449748614e-07, "loss": 0.4161, "step": 11746 }, { "epoch": 0.95, "grad_norm": 0.9543498410492781, "learning_rate": 1.0730743732805581e-07, "loss": 0.4976, "step": 11747 }, { "epoch": 0.95, "grad_norm": 0.8988669209801309, "learning_rate": 1.0692316571344619e-07, "loss": 0.4746, "step": 11748 }, { "epoch": 0.95, "grad_norm": 0.9705097304261154, "learning_rate": 1.0653957968028594e-07, "loss": 0.531, "step": 11749 }, { "epoch": 0.95, "grad_norm": 0.8087770163850684, "learning_rate": 1.0615667925515716e-07, "loss": 0.4503, "step": 11750 }, { "epoch": 0.96, "grad_norm": 0.8585770296241498, "learning_rate": 1.0577446446459416e-07, "loss": 0.394, "step": 11751 }, { "epoch": 0.96, "grad_norm": 0.8991131732136636, "learning_rate": 1.0539293533508577e-07, "loss": 0.4604, "step": 11752 }, { "epoch": 0.96, "grad_norm": 1.043978687288304, "learning_rate": 1.0501209189306972e-07, "loss": 0.5689, "step": 11753 }, { "epoch": 0.96, "grad_norm": 1.012373658397403, "learning_rate": 1.0463193416493933e-07, "loss": 0.5616, "step": 11754 }, { "epoch": 0.96, "grad_norm": 0.9632718766958349, "learning_rate": 1.04252462177038e-07, "loss": 0.4913, "step": 11755 }, { "epoch": 0.96, "grad_norm": 0.952867772462053, "learning_rate": 1.0387367595566355e-07, "loss": 0.4671, "step": 11756 }, { "epoch": 0.96, "grad_norm": 0.9312195502524956, "learning_rate": 1.0349557552706613e-07, "loss": 0.5035, "step": 11757 }, { "epoch": 0.96, "grad_norm": 0.9671971360550594, "learning_rate": 1.0311816091744698e-07, "loss": 0.4186, "step": 11758 }, { "epoch": 0.96, "grad_norm": 0.9494946903358517, "learning_rate": 1.0274143215296073e-07, "loss": 0.4798, "step": 11759 }, { "epoch": 0.96, "grad_norm": 0.8579853360947254, "learning_rate": 1.0236538925971429e-07, "loss": 0.4214, "step": 11760 }, { "epoch": 0.96, "grad_norm": 0.9821881441558292, "learning_rate": 1.019900322637668e-07, "loss": 0.4483, "step": 11761 }, { "epoch": 0.96, "grad_norm": 0.919312383287609, "learning_rate": 1.016153611911308e-07, "loss": 0.5016, "step": 11762 }, { "epoch": 0.96, "grad_norm": 0.9690368869195982, "learning_rate": 1.0124137606777107e-07, "loss": 0.4951, "step": 11763 }, { "epoch": 0.96, "grad_norm": 0.9219625016430492, "learning_rate": 1.0086807691960243e-07, "loss": 0.4489, "step": 11764 }, { "epoch": 0.96, "grad_norm": 0.9490039649323291, "learning_rate": 1.0049546377249642e-07, "loss": 0.5141, "step": 11765 }, { "epoch": 0.96, "grad_norm": 1.0328827891759012, "learning_rate": 1.0012353665227458e-07, "loss": 0.4956, "step": 11766 }, { "epoch": 0.96, "grad_norm": 0.8300609810503681, "learning_rate": 9.975229558470967e-08, "loss": 0.4694, "step": 11767 }, { "epoch": 0.96, "grad_norm": 0.9583215834991651, "learning_rate": 9.938174059552885e-08, "loss": 0.4489, "step": 11768 }, { "epoch": 0.96, "grad_norm": 1.1407787654736952, "learning_rate": 9.901187171041271e-08, "loss": 0.5123, "step": 11769 }, { "epoch": 0.96, "grad_norm": 0.9188810440416877, "learning_rate": 9.864268895499074e-08, "loss": 0.475, "step": 11770 }, { "epoch": 0.96, "grad_norm": 0.9768542214641207, "learning_rate": 9.827419235484803e-08, "loss": 0.5083, "step": 11771 }, { "epoch": 0.96, "grad_norm": 0.971834748912786, "learning_rate": 9.790638193552082e-08, "loss": 0.4618, "step": 11772 }, { "epoch": 0.96, "grad_norm": 0.9809466241068123, "learning_rate": 9.753925772249873e-08, "loss": 0.5157, "step": 11773 }, { "epoch": 0.96, "grad_norm": 1.046921175293461, "learning_rate": 9.71728197412225e-08, "loss": 0.5045, "step": 11774 }, { "epoch": 0.96, "grad_norm": 0.9619930134152861, "learning_rate": 9.680706801708517e-08, "loss": 0.4513, "step": 11775 }, { "epoch": 0.96, "grad_norm": 1.0247706813665163, "learning_rate": 9.644200257543534e-08, "loss": 0.5578, "step": 11776 }, { "epoch": 0.96, "grad_norm": 0.9856133551445193, "learning_rate": 9.607762344156946e-08, "loss": 0.5058, "step": 11777 }, { "epoch": 0.96, "grad_norm": 0.9145922251023627, "learning_rate": 9.571393064073953e-08, "loss": 0.4999, "step": 11778 }, { "epoch": 0.96, "grad_norm": 0.9405462474920527, "learning_rate": 9.535092419814873e-08, "loss": 0.4495, "step": 11779 }, { "epoch": 0.96, "grad_norm": 1.0159098070267714, "learning_rate": 9.498860413895472e-08, "loss": 0.5144, "step": 11780 }, { "epoch": 0.96, "grad_norm": 0.8960909471248009, "learning_rate": 9.462697048826408e-08, "loss": 0.4778, "step": 11781 }, { "epoch": 0.96, "grad_norm": 0.9080420987831723, "learning_rate": 9.426602327113788e-08, "loss": 0.4559, "step": 11782 }, { "epoch": 0.96, "grad_norm": 0.9544893878383572, "learning_rate": 9.390576251258943e-08, "loss": 0.4975, "step": 11783 }, { "epoch": 0.96, "grad_norm": 0.9637330708653101, "learning_rate": 9.354618823758654e-08, "loss": 0.4813, "step": 11784 }, { "epoch": 0.96, "grad_norm": 0.9727251888984069, "learning_rate": 9.318730047104484e-08, "loss": 0.4482, "step": 11785 }, { "epoch": 0.96, "grad_norm": 1.0301592271271238, "learning_rate": 9.282909923783557e-08, "loss": 0.4473, "step": 11786 }, { "epoch": 0.96, "grad_norm": 0.9666339201017452, "learning_rate": 9.247158456278327e-08, "loss": 0.4611, "step": 11787 }, { "epoch": 0.96, "grad_norm": 0.9457148871860079, "learning_rate": 9.211475647066148e-08, "loss": 0.4772, "step": 11788 }, { "epoch": 0.96, "grad_norm": 0.9110564036623252, "learning_rate": 9.175861498619821e-08, "loss": 0.4615, "step": 11789 }, { "epoch": 0.96, "grad_norm": 0.9682975620629547, "learning_rate": 9.140316013407479e-08, "loss": 0.4591, "step": 11790 }, { "epoch": 0.96, "grad_norm": 1.0789231927916287, "learning_rate": 9.104839193892379e-08, "loss": 0.4509, "step": 11791 }, { "epoch": 0.96, "grad_norm": 1.07751499957963, "learning_rate": 9.069431042532995e-08, "loss": 0.5149, "step": 11792 }, { "epoch": 0.96, "grad_norm": 0.9618200013702595, "learning_rate": 9.034091561783032e-08, "loss": 0.541, "step": 11793 }, { "epoch": 0.96, "grad_norm": 1.033258536988785, "learning_rate": 8.99882075409153e-08, "loss": 0.5121, "step": 11794 }, { "epoch": 0.96, "grad_norm": 0.7940648088676413, "learning_rate": 8.963618621902759e-08, "loss": 0.4257, "step": 11795 }, { "epoch": 0.96, "grad_norm": 0.9676953589118892, "learning_rate": 8.928485167656208e-08, "loss": 0.482, "step": 11796 }, { "epoch": 0.96, "grad_norm": 0.9316837620156098, "learning_rate": 8.89342039378649e-08, "loss": 0.4366, "step": 11797 }, { "epoch": 0.96, "grad_norm": 0.9919385490875576, "learning_rate": 8.858424302723767e-08, "loss": 0.4911, "step": 11798 }, { "epoch": 0.96, "grad_norm": 0.8620054709348294, "learning_rate": 8.823496896892991e-08, "loss": 0.4312, "step": 11799 }, { "epoch": 0.96, "grad_norm": 0.9563793794175025, "learning_rate": 8.78863817871467e-08, "loss": 0.4966, "step": 11800 }, { "epoch": 0.96, "grad_norm": 0.9019147172069846, "learning_rate": 8.753848150604538e-08, "loss": 0.457, "step": 11801 }, { "epoch": 0.96, "grad_norm": 1.007078630021055, "learning_rate": 8.719126814973556e-08, "loss": 0.491, "step": 11802 }, { "epoch": 0.96, "grad_norm": 0.9815358823107124, "learning_rate": 8.684474174227797e-08, "loss": 0.5185, "step": 11803 }, { "epoch": 0.96, "grad_norm": 0.9684981077551171, "learning_rate": 8.649890230768676e-08, "loss": 0.4662, "step": 11804 }, { "epoch": 0.96, "grad_norm": 0.8355804604535023, "learning_rate": 8.615374986992831e-08, "loss": 0.4454, "step": 11805 }, { "epoch": 0.96, "grad_norm": 0.9829781903898468, "learning_rate": 8.580928445292124e-08, "loss": 0.5163, "step": 11806 }, { "epoch": 0.96, "grad_norm": 0.9820450700426611, "learning_rate": 8.54655060805365e-08, "loss": 0.5061, "step": 11807 }, { "epoch": 0.96, "grad_norm": 0.9170387812859044, "learning_rate": 8.512241477659944e-08, "loss": 0.4526, "step": 11808 }, { "epoch": 0.96, "grad_norm": 0.8588684291904626, "learning_rate": 8.478001056488327e-08, "loss": 0.4278, "step": 11809 }, { "epoch": 0.96, "grad_norm": 1.004476225320024, "learning_rate": 8.443829346911792e-08, "loss": 0.4339, "step": 11810 }, { "epoch": 0.96, "grad_norm": 0.9955030591553591, "learning_rate": 8.409726351298441e-08, "loss": 0.4798, "step": 11811 }, { "epoch": 0.96, "grad_norm": 1.0319475619112393, "learning_rate": 8.375692072011388e-08, "loss": 0.5098, "step": 11812 }, { "epoch": 0.96, "grad_norm": 0.9512626411668296, "learning_rate": 8.341726511409409e-08, "loss": 0.4742, "step": 11813 }, { "epoch": 0.96, "grad_norm": 0.8647860949043975, "learning_rate": 8.307829671846179e-08, "loss": 0.4773, "step": 11814 }, { "epoch": 0.96, "grad_norm": 0.966937294863239, "learning_rate": 8.274001555670597e-08, "loss": 0.5045, "step": 11815 }, { "epoch": 0.96, "grad_norm": 1.0511562530909382, "learning_rate": 8.24024216522723e-08, "loss": 0.5244, "step": 11816 }, { "epoch": 0.96, "grad_norm": 1.0529033296786536, "learning_rate": 8.20655150285521e-08, "loss": 0.4635, "step": 11817 }, { "epoch": 0.96, "grad_norm": 0.9419210827460939, "learning_rate": 8.172929570889553e-08, "loss": 0.4529, "step": 11818 }, { "epoch": 0.96, "grad_norm": 1.057298308091194, "learning_rate": 8.139376371660179e-08, "loss": 0.5259, "step": 11819 }, { "epoch": 0.96, "grad_norm": 0.9911938418309101, "learning_rate": 8.105891907492224e-08, "loss": 0.5373, "step": 11820 }, { "epoch": 0.96, "grad_norm": 0.9799078980237341, "learning_rate": 8.072476180705946e-08, "loss": 0.5207, "step": 11821 }, { "epoch": 0.96, "grad_norm": 0.9169456394142715, "learning_rate": 8.03912919361749e-08, "loss": 0.4771, "step": 11822 }, { "epoch": 0.96, "grad_norm": 0.9701500291611556, "learning_rate": 8.005850948537453e-08, "loss": 0.4812, "step": 11823 }, { "epoch": 0.96, "grad_norm": 0.9376443184836865, "learning_rate": 7.972641447771989e-08, "loss": 0.4849, "step": 11824 }, { "epoch": 0.96, "grad_norm": 0.8836556164802101, "learning_rate": 7.939500693622481e-08, "loss": 0.4832, "step": 11825 }, { "epoch": 0.96, "grad_norm": 1.0238583224039954, "learning_rate": 7.906428688385759e-08, "loss": 0.5001, "step": 11826 }, { "epoch": 0.96, "grad_norm": 0.9368949132542735, "learning_rate": 7.873425434353432e-08, "loss": 0.4871, "step": 11827 }, { "epoch": 0.96, "grad_norm": 0.9951882378946979, "learning_rate": 7.840490933812783e-08, "loss": 0.4845, "step": 11828 }, { "epoch": 0.96, "grad_norm": 0.8502504258314992, "learning_rate": 7.807625189046098e-08, "loss": 0.4095, "step": 11829 }, { "epoch": 0.96, "grad_norm": 0.9569555972363435, "learning_rate": 7.774828202330776e-08, "loss": 0.4657, "step": 11830 }, { "epoch": 0.96, "grad_norm": 0.9069133566490915, "learning_rate": 7.742099975939888e-08, "loss": 0.4634, "step": 11831 }, { "epoch": 0.96, "grad_norm": 0.9656614654654957, "learning_rate": 7.709440512141286e-08, "loss": 0.5422, "step": 11832 }, { "epoch": 0.96, "grad_norm": 0.9992615048473477, "learning_rate": 7.676849813198272e-08, "loss": 0.5122, "step": 11833 }, { "epoch": 0.96, "grad_norm": 0.928341283376421, "learning_rate": 7.644327881369485e-08, "loss": 0.4389, "step": 11834 }, { "epoch": 0.96, "grad_norm": 1.0356863433018246, "learning_rate": 7.611874718908452e-08, "loss": 0.5778, "step": 11835 }, { "epoch": 0.96, "grad_norm": 0.9279200145760254, "learning_rate": 7.579490328064265e-08, "loss": 0.4886, "step": 11836 }, { "epoch": 0.96, "grad_norm": 1.0535959759120828, "learning_rate": 7.547174711081128e-08, "loss": 0.4395, "step": 11837 }, { "epoch": 0.96, "grad_norm": 0.9987245906603993, "learning_rate": 7.514927870198475e-08, "loss": 0.5428, "step": 11838 }, { "epoch": 0.96, "grad_norm": 0.9292302350613081, "learning_rate": 7.482749807650958e-08, "loss": 0.4683, "step": 11839 }, { "epoch": 0.96, "grad_norm": 0.999598281395877, "learning_rate": 7.450640525668573e-08, "loss": 0.5147, "step": 11840 }, { "epoch": 0.96, "grad_norm": 0.9016518616157955, "learning_rate": 7.41860002647643e-08, "loss": 0.4539, "step": 11841 }, { "epoch": 0.96, "grad_norm": 0.9069015870968691, "learning_rate": 7.386628312294863e-08, "loss": 0.3802, "step": 11842 }, { "epoch": 0.96, "grad_norm": 0.9050331418584707, "learning_rate": 7.354725385339546e-08, "loss": 0.4511, "step": 11843 }, { "epoch": 0.96, "grad_norm": 1.083380607724216, "learning_rate": 7.322891247821151e-08, "loss": 0.5082, "step": 11844 }, { "epoch": 0.96, "grad_norm": 0.8287870334091962, "learning_rate": 7.291125901946027e-08, "loss": 0.4287, "step": 11845 }, { "epoch": 0.96, "grad_norm": 0.9519750070137744, "learning_rate": 7.259429349915303e-08, "loss": 0.5054, "step": 11846 }, { "epoch": 0.96, "grad_norm": 0.9186236888951284, "learning_rate": 7.227801593925555e-08, "loss": 0.4396, "step": 11847 }, { "epoch": 0.96, "grad_norm": 1.0156722316213873, "learning_rate": 7.196242636168582e-08, "loss": 0.5054, "step": 11848 }, { "epoch": 0.96, "grad_norm": 0.9044042392774271, "learning_rate": 7.164752478831305e-08, "loss": 0.442, "step": 11849 }, { "epoch": 0.96, "grad_norm": 1.071712246415441, "learning_rate": 7.133331124096087e-08, "loss": 0.5517, "step": 11850 }, { "epoch": 0.96, "grad_norm": 0.9086881899095651, "learning_rate": 7.101978574140411e-08, "loss": 0.5043, "step": 11851 }, { "epoch": 0.96, "grad_norm": 0.9903068859691339, "learning_rate": 7.070694831136871e-08, "loss": 0.5096, "step": 11852 }, { "epoch": 0.96, "grad_norm": 0.7962250008333884, "learning_rate": 7.039479897253509e-08, "loss": 0.4403, "step": 11853 }, { "epoch": 0.96, "grad_norm": 0.9390602910887271, "learning_rate": 7.008333774653376e-08, "loss": 0.4721, "step": 11854 }, { "epoch": 0.96, "grad_norm": 0.9426156451013604, "learning_rate": 6.977256465494853e-08, "loss": 0.4614, "step": 11855 }, { "epoch": 0.96, "grad_norm": 0.9799328098160812, "learning_rate": 6.946247971931774e-08, "loss": 0.474, "step": 11856 }, { "epoch": 0.96, "grad_norm": 0.9250217103653412, "learning_rate": 6.915308296112755e-08, "loss": 0.4106, "step": 11857 }, { "epoch": 0.96, "grad_norm": 0.9025833591773156, "learning_rate": 6.88443744018208e-08, "loss": 0.4388, "step": 11858 }, { "epoch": 0.96, "grad_norm": 0.9340544233362317, "learning_rate": 6.853635406279035e-08, "loss": 0.4985, "step": 11859 }, { "epoch": 0.96, "grad_norm": 0.8905377200903977, "learning_rate": 6.822902196538028e-08, "loss": 0.4445, "step": 11860 }, { "epoch": 0.96, "grad_norm": 0.9160671303363982, "learning_rate": 6.792237813089131e-08, "loss": 0.4647, "step": 11861 }, { "epoch": 0.96, "grad_norm": 1.0165528326770312, "learning_rate": 6.761642258056977e-08, "loss": 0.4915, "step": 11862 }, { "epoch": 0.96, "grad_norm": 0.9790731286567422, "learning_rate": 6.731115533562094e-08, "loss": 0.4872, "step": 11863 }, { "epoch": 0.96, "grad_norm": 0.940150772056775, "learning_rate": 6.7006576417199e-08, "loss": 0.5599, "step": 11864 }, { "epoch": 0.96, "grad_norm": 0.8891355445636352, "learning_rate": 6.670268584641148e-08, "loss": 0.5077, "step": 11865 }, { "epoch": 0.96, "grad_norm": 1.0427807053065226, "learning_rate": 6.639948364431492e-08, "loss": 0.4946, "step": 11866 }, { "epoch": 0.96, "grad_norm": 0.9213346316427382, "learning_rate": 6.60969698319247e-08, "loss": 0.4955, "step": 11867 }, { "epoch": 0.96, "grad_norm": 0.9091893947814887, "learning_rate": 6.579514443020296e-08, "loss": 0.4964, "step": 11868 }, { "epoch": 0.96, "grad_norm": 1.0501205485179521, "learning_rate": 6.549400746006629e-08, "loss": 0.4752, "step": 11869 }, { "epoch": 0.96, "grad_norm": 0.883953055819093, "learning_rate": 6.519355894238245e-08, "loss": 0.4733, "step": 11870 }, { "epoch": 0.96, "grad_norm": 0.9817174202768156, "learning_rate": 6.489379889797254e-08, "loss": 0.4572, "step": 11871 }, { "epoch": 0.96, "grad_norm": 0.9823410939883479, "learning_rate": 6.459472734760997e-08, "loss": 0.521, "step": 11872 }, { "epoch": 0.96, "grad_norm": 1.0098939609336977, "learning_rate": 6.429634431202036e-08, "loss": 0.5193, "step": 11873 }, { "epoch": 0.97, "grad_norm": 1.0662717994985875, "learning_rate": 6.399864981188164e-08, "loss": 0.5038, "step": 11874 }, { "epoch": 0.97, "grad_norm": 0.9839336837767094, "learning_rate": 6.370164386782285e-08, "loss": 0.4969, "step": 11875 }, { "epoch": 0.97, "grad_norm": 0.9035783762848933, "learning_rate": 6.340532650042641e-08, "loss": 0.4771, "step": 11876 }, { "epoch": 0.97, "grad_norm": 0.969875008374207, "learning_rate": 6.310969773022701e-08, "loss": 0.5079, "step": 11877 }, { "epoch": 0.97, "grad_norm": 0.9436460184683367, "learning_rate": 6.281475757771161e-08, "loss": 0.4733, "step": 11878 }, { "epoch": 0.97, "grad_norm": 0.972534101604993, "learning_rate": 6.252050606332049e-08, "loss": 0.501, "step": 11879 }, { "epoch": 0.97, "grad_norm": 0.9578199633119227, "learning_rate": 6.222694320744182e-08, "loss": 0.4724, "step": 11880 }, { "epoch": 0.97, "grad_norm": 0.8301656923384986, "learning_rate": 6.193406903042265e-08, "loss": 0.475, "step": 11881 }, { "epoch": 0.97, "grad_norm": 0.9039558845681417, "learning_rate": 6.164188355255673e-08, "loss": 0.4935, "step": 11882 }, { "epoch": 0.97, "grad_norm": 0.9081915049960225, "learning_rate": 6.135038679409344e-08, "loss": 0.5057, "step": 11883 }, { "epoch": 0.97, "grad_norm": 1.0382548299120893, "learning_rate": 6.105957877523216e-08, "loss": 0.4951, "step": 11884 }, { "epoch": 0.97, "grad_norm": 0.905766580597293, "learning_rate": 6.076945951612678e-08, "loss": 0.4418, "step": 11885 }, { "epoch": 0.97, "grad_norm": 0.8931595039530028, "learning_rate": 6.048002903688121e-08, "loss": 0.4915, "step": 11886 }, { "epoch": 0.97, "grad_norm": 1.0160363862096509, "learning_rate": 6.019128735755386e-08, "loss": 0.5485, "step": 11887 }, { "epoch": 0.97, "grad_norm": 0.8877215619065384, "learning_rate": 5.990323449815316e-08, "loss": 0.4455, "step": 11888 }, { "epoch": 0.97, "grad_norm": 0.9249415508767169, "learning_rate": 5.961587047864204e-08, "loss": 0.4407, "step": 11889 }, { "epoch": 0.97, "grad_norm": 0.8467704348053258, "learning_rate": 5.932919531893344e-08, "loss": 0.4518, "step": 11890 }, { "epoch": 0.97, "grad_norm": 0.928828790733546, "learning_rate": 5.9043209038894825e-08, "loss": 0.4528, "step": 11891 }, { "epoch": 0.97, "grad_norm": 0.9258296897330218, "learning_rate": 5.8757911658343657e-08, "loss": 0.4908, "step": 11892 }, { "epoch": 0.97, "grad_norm": 0.993635545931022, "learning_rate": 5.847330319705191e-08, "loss": 0.5191, "step": 11893 }, { "epoch": 0.97, "grad_norm": 1.0503873247954665, "learning_rate": 5.818938367474159e-08, "loss": 0.4728, "step": 11894 }, { "epoch": 0.97, "grad_norm": 0.909336100675651, "learning_rate": 5.790615311108805e-08, "loss": 0.4634, "step": 11895 }, { "epoch": 0.97, "grad_norm": 1.0197521172435708, "learning_rate": 5.7623611525721155e-08, "loss": 0.5245, "step": 11896 }, { "epoch": 0.97, "grad_norm": 0.8289684625646265, "learning_rate": 5.7341758938217474e-08, "loss": 0.404, "step": 11897 }, { "epoch": 0.97, "grad_norm": 1.0007797587248377, "learning_rate": 5.706059536811137e-08, "loss": 0.4432, "step": 11898 }, { "epoch": 0.97, "grad_norm": 0.8384808151151713, "learning_rate": 5.6780120834887264e-08, "loss": 0.3979, "step": 11899 }, { "epoch": 0.97, "grad_norm": 0.8634048374650863, "learning_rate": 5.650033535798072e-08, "loss": 0.4548, "step": 11900 }, { "epoch": 0.97, "grad_norm": 0.9543880848177454, "learning_rate": 5.6221238956780664e-08, "loss": 0.4442, "step": 11901 }, { "epoch": 0.97, "grad_norm": 0.9048198696439131, "learning_rate": 5.5942831650628303e-08, "loss": 0.4611, "step": 11902 }, { "epoch": 0.97, "grad_norm": 0.9538814286699375, "learning_rate": 5.566511345881931e-08, "loss": 0.5113, "step": 11903 }, { "epoch": 0.97, "grad_norm": 0.9043391175027943, "learning_rate": 5.5388084400594954e-08, "loss": 0.4949, "step": 11904 }, { "epoch": 0.97, "grad_norm": 0.9293652419355056, "learning_rate": 5.511174449515655e-08, "loss": 0.46, "step": 11905 }, { "epoch": 0.97, "grad_norm": 1.0324711413947731, "learning_rate": 5.483609376165322e-08, "loss": 0.5127, "step": 11906 }, { "epoch": 0.97, "grad_norm": 0.8941503163390319, "learning_rate": 5.456113221918746e-08, "loss": 0.4751, "step": 11907 }, { "epoch": 0.97, "grad_norm": 0.919163316555529, "learning_rate": 5.428685988681292e-08, "loss": 0.4502, "step": 11908 }, { "epoch": 0.97, "grad_norm": 0.9342751986032585, "learning_rate": 5.401327678353774e-08, "loss": 0.4755, "step": 11909 }, { "epoch": 0.97, "grad_norm": 0.9684954869778296, "learning_rate": 5.3740382928320065e-08, "loss": 0.4781, "step": 11910 }, { "epoch": 0.97, "grad_norm": 1.0860550064505223, "learning_rate": 5.346817834007145e-08, "loss": 0.4832, "step": 11911 }, { "epoch": 0.97, "grad_norm": 0.915553037036275, "learning_rate": 5.3196663037655695e-08, "loss": 0.5023, "step": 11912 }, { "epoch": 0.97, "grad_norm": 0.9504761418845167, "learning_rate": 5.292583703988885e-08, "loss": 0.4172, "step": 11913 }, { "epoch": 0.97, "grad_norm": 0.9750402476226417, "learning_rate": 5.265570036553813e-08, "loss": 0.5118, "step": 11914 }, { "epoch": 0.97, "grad_norm": 0.950881631306124, "learning_rate": 5.238625303332412e-08, "loss": 0.4694, "step": 11915 }, { "epoch": 0.97, "grad_norm": 0.9596144934035122, "learning_rate": 5.2117495061918544e-08, "loss": 0.4668, "step": 11916 }, { "epoch": 0.97, "grad_norm": 0.9621390065847045, "learning_rate": 5.184942646994762e-08, "loss": 0.5706, "step": 11917 }, { "epoch": 0.97, "grad_norm": 0.9387416367581379, "learning_rate": 5.158204727598759e-08, "loss": 0.4659, "step": 11918 }, { "epoch": 0.97, "grad_norm": 1.0693341512877415, "learning_rate": 5.131535749856698e-08, "loss": 0.4836, "step": 11919 }, { "epoch": 0.97, "grad_norm": 1.0015756147477828, "learning_rate": 5.104935715616766e-08, "loss": 0.5097, "step": 11920 }, { "epoch": 0.97, "grad_norm": 1.272574570392914, "learning_rate": 5.0784046267223775e-08, "loss": 0.4799, "step": 11921 }, { "epoch": 0.97, "grad_norm": 0.8719092888254251, "learning_rate": 5.0519424850119516e-08, "loss": 0.4908, "step": 11922 }, { "epoch": 0.97, "grad_norm": 0.9723855937950857, "learning_rate": 5.025549292319465e-08, "loss": 0.4476, "step": 11923 }, { "epoch": 0.97, "grad_norm": 0.8891720838974949, "learning_rate": 4.999225050473788e-08, "loss": 0.4679, "step": 11924 }, { "epoch": 0.97, "grad_norm": 1.00799732731766, "learning_rate": 4.972969761299351e-08, "loss": 0.4652, "step": 11925 }, { "epoch": 0.97, "grad_norm": 0.9705146089469759, "learning_rate": 4.9467834266154756e-08, "loss": 0.4753, "step": 11926 }, { "epoch": 0.97, "grad_norm": 0.8757512034841, "learning_rate": 4.920666048236933e-08, "loss": 0.438, "step": 11927 }, { "epoch": 0.97, "grad_norm": 0.9998789701294412, "learning_rate": 4.894617627973497e-08, "loss": 0.4694, "step": 11928 }, { "epoch": 0.97, "grad_norm": 0.9252960647533439, "learning_rate": 4.8686381676305015e-08, "loss": 0.4889, "step": 11929 }, { "epoch": 0.97, "grad_norm": 0.9785224715228302, "learning_rate": 4.8427276690081735e-08, "loss": 0.4981, "step": 11930 }, { "epoch": 0.97, "grad_norm": 0.9302323682272238, "learning_rate": 4.8168861339020766e-08, "loss": 0.4768, "step": 11931 }, { "epoch": 0.97, "grad_norm": 0.9128889123665305, "learning_rate": 4.791113564103111e-08, "loss": 0.4583, "step": 11932 }, { "epoch": 0.97, "grad_norm": 0.9159577759229364, "learning_rate": 4.7654099613971825e-08, "loss": 0.4738, "step": 11933 }, { "epoch": 0.97, "grad_norm": 0.9667416898072759, "learning_rate": 4.739775327565532e-08, "loss": 0.5195, "step": 11934 }, { "epoch": 0.97, "grad_norm": 0.9583053889816117, "learning_rate": 4.714209664384739e-08, "loss": 0.4987, "step": 11935 }, { "epoch": 0.97, "grad_norm": 1.0593700920519042, "learning_rate": 4.688712973626386e-08, "loss": 0.4761, "step": 11936 }, { "epoch": 0.97, "grad_norm": 0.9518152352507855, "learning_rate": 4.663285257057393e-08, "loss": 0.4734, "step": 11937 }, { "epoch": 0.97, "grad_norm": 0.9369693183511989, "learning_rate": 4.637926516439795e-08, "loss": 0.4472, "step": 11938 }, { "epoch": 0.97, "grad_norm": 0.9575936036237775, "learning_rate": 4.612636753531075e-08, "loss": 0.4948, "step": 11939 }, { "epoch": 0.97, "grad_norm": 1.0008413774296308, "learning_rate": 4.58741597008372e-08, "loss": 0.4879, "step": 11940 }, { "epoch": 0.97, "grad_norm": 0.9777885044908675, "learning_rate": 4.5622641678454424e-08, "loss": 0.4558, "step": 11941 }, { "epoch": 0.97, "grad_norm": 0.9945116465130329, "learning_rate": 4.537181348559405e-08, "loss": 0.4254, "step": 11942 }, { "epoch": 0.97, "grad_norm": 0.9461087646435123, "learning_rate": 4.512167513963661e-08, "loss": 0.5021, "step": 11943 }, { "epoch": 0.97, "grad_norm": 0.9954162897161014, "learning_rate": 4.487222665791713e-08, "loss": 0.5154, "step": 11944 }, { "epoch": 0.97, "grad_norm": 0.9237004610309977, "learning_rate": 4.4623468057722886e-08, "loss": 0.466, "step": 11945 }, { "epoch": 0.97, "grad_norm": 0.9637871569395646, "learning_rate": 4.437539935629009e-08, "loss": 0.4733, "step": 11946 }, { "epoch": 0.97, "grad_norm": 0.8388679921392534, "learning_rate": 4.412802057081278e-08, "loss": 0.4363, "step": 11947 }, { "epoch": 0.97, "grad_norm": 0.9179034699969338, "learning_rate": 4.388133171843278e-08, "loss": 0.5191, "step": 11948 }, { "epoch": 0.97, "grad_norm": 0.8819156108312363, "learning_rate": 4.3635332816245324e-08, "loss": 0.4188, "step": 11949 }, { "epoch": 0.97, "grad_norm": 0.9320418097418713, "learning_rate": 4.339002388129787e-08, "loss": 0.4578, "step": 11950 }, { "epoch": 0.97, "grad_norm": 0.972402913307539, "learning_rate": 4.3145404930591275e-08, "loss": 0.4979, "step": 11951 }, { "epoch": 0.97, "grad_norm": 0.9727729975849237, "learning_rate": 4.2901475981074195e-08, "loss": 0.5532, "step": 11952 }, { "epoch": 0.97, "grad_norm": 1.035867758045349, "learning_rate": 4.2658237049655325e-08, "loss": 0.4867, "step": 11953 }, { "epoch": 0.97, "grad_norm": 1.0089929989437811, "learning_rate": 4.241568815318675e-08, "loss": 0.4247, "step": 11954 }, { "epoch": 0.97, "grad_norm": 1.0064757121513352, "learning_rate": 4.2173829308479466e-08, "loss": 0.4637, "step": 11955 }, { "epoch": 0.97, "grad_norm": 0.9584510660983068, "learning_rate": 4.193266053229339e-08, "loss": 0.486, "step": 11956 }, { "epoch": 0.97, "grad_norm": 1.0334196207407, "learning_rate": 4.1692181841340716e-08, "loss": 0.4957, "step": 11957 }, { "epoch": 0.97, "grad_norm": 0.971222793955482, "learning_rate": 4.1452393252285894e-08, "loss": 0.4645, "step": 11958 }, { "epoch": 0.97, "grad_norm": 1.0089128046903748, "learning_rate": 4.1213294781748956e-08, "loss": 0.4739, "step": 11959 }, { "epoch": 0.97, "grad_norm": 0.916419587613008, "learning_rate": 4.097488644629555e-08, "loss": 0.4691, "step": 11960 }, { "epoch": 0.97, "grad_norm": 0.9609627617532643, "learning_rate": 4.0737168262450224e-08, "loss": 0.4733, "step": 11961 }, { "epoch": 0.97, "grad_norm": 0.9136837067317344, "learning_rate": 4.050014024668425e-08, "loss": 0.4675, "step": 11962 }, { "epoch": 0.97, "grad_norm": 0.8788214931690304, "learning_rate": 4.02638024154256e-08, "loss": 0.4911, "step": 11963 }, { "epoch": 0.97, "grad_norm": 0.9283939728597378, "learning_rate": 4.002815478505007e-08, "loss": 0.4347, "step": 11964 }, { "epoch": 0.97, "grad_norm": 0.9048435075657899, "learning_rate": 3.9793197371889026e-08, "loss": 0.4434, "step": 11965 }, { "epoch": 0.97, "grad_norm": 1.0281323566018774, "learning_rate": 3.955893019222501e-08, "loss": 0.4806, "step": 11966 }, { "epoch": 0.97, "grad_norm": 0.9526276177157981, "learning_rate": 3.93253532622917e-08, "loss": 0.4953, "step": 11967 }, { "epoch": 0.97, "grad_norm": 0.9364679716036685, "learning_rate": 3.909246659827726e-08, "loss": 0.4233, "step": 11968 }, { "epoch": 0.97, "grad_norm": 0.9841615329388482, "learning_rate": 3.8860270216319885e-08, "loss": 0.5168, "step": 11969 }, { "epoch": 0.97, "grad_norm": 1.047709700000648, "learning_rate": 3.862876413250893e-08, "loss": 0.5589, "step": 11970 }, { "epoch": 0.97, "grad_norm": 0.863210692248217, "learning_rate": 3.839794836288935e-08, "loss": 0.4264, "step": 11971 }, { "epoch": 0.97, "grad_norm": 0.8951007919327821, "learning_rate": 3.816782292345611e-08, "loss": 0.442, "step": 11972 }, { "epoch": 0.97, "grad_norm": 1.0667982781868577, "learning_rate": 3.7938387830156464e-08, "loss": 0.4819, "step": 11973 }, { "epoch": 0.97, "grad_norm": 0.8919483517785504, "learning_rate": 3.7709643098891024e-08, "loss": 0.4757, "step": 11974 }, { "epoch": 0.97, "grad_norm": 1.0189103671289825, "learning_rate": 3.748158874550934e-08, "loss": 0.5292, "step": 11975 }, { "epoch": 0.97, "grad_norm": 0.8422196036000987, "learning_rate": 3.725422478581764e-08, "loss": 0.468, "step": 11976 }, { "epoch": 0.97, "grad_norm": 0.9799552575419327, "learning_rate": 3.702755123557111e-08, "loss": 0.4893, "step": 11977 }, { "epoch": 0.97, "grad_norm": 1.0308217161205298, "learning_rate": 3.6801568110478304e-08, "loss": 0.5068, "step": 11978 }, { "epoch": 0.97, "grad_norm": 0.8779731833204971, "learning_rate": 3.6576275426200014e-08, "loss": 0.4277, "step": 11979 }, { "epoch": 0.97, "grad_norm": 0.9053805546833162, "learning_rate": 3.635167319834709e-08, "loss": 0.465, "step": 11980 }, { "epoch": 0.97, "grad_norm": 0.8666755276899345, "learning_rate": 3.612776144248597e-08, "loss": 0.4476, "step": 11981 }, { "epoch": 0.97, "grad_norm": 0.9849607754708499, "learning_rate": 3.590454017413314e-08, "loss": 0.5167, "step": 11982 }, { "epoch": 0.97, "grad_norm": 0.857166337628939, "learning_rate": 3.568200940875732e-08, "loss": 0.442, "step": 11983 }, { "epoch": 0.97, "grad_norm": 0.9616493391315466, "learning_rate": 3.546016916178063e-08, "loss": 0.5472, "step": 11984 }, { "epoch": 0.97, "grad_norm": 0.9561335160933164, "learning_rate": 3.523901944857522e-08, "loss": 0.4597, "step": 11985 }, { "epoch": 0.97, "grad_norm": 1.0152458863216052, "learning_rate": 3.5018560284466595e-08, "loss": 0.5377, "step": 11986 }, { "epoch": 0.97, "grad_norm": 0.9116349271646171, "learning_rate": 3.4798791684733655e-08, "loss": 0.4812, "step": 11987 }, { "epoch": 0.97, "grad_norm": 0.9578005502526648, "learning_rate": 3.457971366460422e-08, "loss": 0.4809, "step": 11988 }, { "epoch": 0.97, "grad_norm": 0.9950944460120628, "learning_rate": 3.436132623926169e-08, "loss": 0.5159, "step": 11989 }, { "epoch": 0.97, "grad_norm": 1.0323696694058275, "learning_rate": 3.414362942384064e-08, "loss": 0.473, "step": 11990 }, { "epoch": 0.97, "grad_norm": 0.8910655305456949, "learning_rate": 3.392662323342566e-08, "loss": 0.5181, "step": 11991 }, { "epoch": 0.97, "grad_norm": 1.0142482538193889, "learning_rate": 3.371030768305583e-08, "loss": 0.4883, "step": 11992 }, { "epoch": 0.97, "grad_norm": 0.8941388462250626, "learning_rate": 3.349468278772139e-08, "loss": 0.4608, "step": 11993 }, { "epoch": 0.97, "grad_norm": 0.9806960085628176, "learning_rate": 3.3279748562364824e-08, "loss": 0.4906, "step": 11994 }, { "epoch": 0.97, "grad_norm": 0.9284249097360684, "learning_rate": 3.3065505021881995e-08, "loss": 0.5173, "step": 11995 }, { "epoch": 0.97, "grad_norm": 0.962827598357366, "learning_rate": 3.2851952181118805e-08, "loss": 0.4712, "step": 11996 }, { "epoch": 0.98, "grad_norm": 0.9785884863012381, "learning_rate": 3.2639090054874534e-08, "loss": 0.4511, "step": 11997 }, { "epoch": 0.98, "grad_norm": 0.9825408225097995, "learning_rate": 3.242691865790071e-08, "loss": 0.5059, "step": 11998 }, { "epoch": 0.98, "grad_norm": 1.0175010878886457, "learning_rate": 3.22154380048989e-08, "loss": 0.551, "step": 11999 }, { "epoch": 0.98, "grad_norm": 0.8098232963770842, "learning_rate": 3.200464811052628e-08, "loss": 0.4347, "step": 12000 }, { "epoch": 0.98, "grad_norm": 0.943884107957791, "learning_rate": 3.1794548989391163e-08, "loss": 0.4533, "step": 12001 }, { "epoch": 0.98, "grad_norm": 0.9370564155146761, "learning_rate": 3.158514065605078e-08, "loss": 0.4661, "step": 12002 }, { "epoch": 0.98, "grad_norm": 0.8380704807977243, "learning_rate": 3.1376423125019093e-08, "loss": 0.4335, "step": 12003 }, { "epoch": 0.98, "grad_norm": 0.9139190205140328, "learning_rate": 3.116839641075786e-08, "loss": 0.4962, "step": 12004 }, { "epoch": 0.98, "grad_norm": 1.018881269398722, "learning_rate": 3.0961060527685546e-08, "loss": 0.5046, "step": 12005 }, { "epoch": 0.98, "grad_norm": 0.9367199554553238, "learning_rate": 3.0754415490168446e-08, "loss": 0.4828, "step": 12006 }, { "epoch": 0.98, "grad_norm": 0.8418975025483183, "learning_rate": 3.054846131252731e-08, "loss": 0.4576, "step": 12007 }, { "epoch": 0.98, "grad_norm": 0.9456790269529668, "learning_rate": 3.034319800903629e-08, "loss": 0.4752, "step": 12008 }, { "epoch": 0.98, "grad_norm": 0.9483389987156057, "learning_rate": 3.013862559391734e-08, "loss": 0.4741, "step": 12009 }, { "epoch": 0.98, "grad_norm": 0.9600136688183358, "learning_rate": 2.993474408134911e-08, "loss": 0.4849, "step": 12010 }, { "epoch": 0.98, "grad_norm": 0.9480643663449927, "learning_rate": 2.9731553485459197e-08, "loss": 0.4928, "step": 12011 }, { "epoch": 0.98, "grad_norm": 0.8911982744195088, "learning_rate": 2.9529053820329667e-08, "loss": 0.4711, "step": 12012 }, { "epoch": 0.98, "grad_norm": 0.97406248259707, "learning_rate": 2.932724509999263e-08, "loss": 0.4866, "step": 12013 }, { "epoch": 0.98, "grad_norm": 0.9693131231472446, "learning_rate": 2.9126127338432454e-08, "loss": 0.5288, "step": 12014 }, { "epoch": 0.98, "grad_norm": 0.8760148975933071, "learning_rate": 2.8925700549589096e-08, "loss": 0.4298, "step": 12015 }, { "epoch": 0.98, "grad_norm": 0.9383849892653244, "learning_rate": 2.8725964747350342e-08, "loss": 0.4584, "step": 12016 }, { "epoch": 0.98, "grad_norm": 0.9725559701311824, "learning_rate": 2.852691994555623e-08, "loss": 0.4955, "step": 12017 }, { "epoch": 0.98, "grad_norm": 1.0068941623928347, "learning_rate": 2.8328566158002392e-08, "loss": 0.4362, "step": 12018 }, { "epoch": 0.98, "grad_norm": 0.9606479146924113, "learning_rate": 2.8130903398434496e-08, "loss": 0.4623, "step": 12019 }, { "epoch": 0.98, "grad_norm": 0.9535023751808042, "learning_rate": 2.7933931680550476e-08, "loss": 0.5133, "step": 12020 }, { "epoch": 0.98, "grad_norm": 0.8856677096306654, "learning_rate": 2.7737651017998303e-08, "loss": 0.4567, "step": 12021 }, { "epoch": 0.98, "grad_norm": 0.8725884384204944, "learning_rate": 2.754206142438265e-08, "loss": 0.4795, "step": 12022 }, { "epoch": 0.98, "grad_norm": 1.0567535458362625, "learning_rate": 2.734716291325712e-08, "loss": 0.5445, "step": 12023 }, { "epoch": 0.98, "grad_norm": 0.9246990984926732, "learning_rate": 2.7152955498126465e-08, "loss": 0.4927, "step": 12024 }, { "epoch": 0.98, "grad_norm": 0.9147568261561264, "learning_rate": 2.695943919244992e-08, "loss": 0.4427, "step": 12025 }, { "epoch": 0.98, "grad_norm": 0.8809789552613674, "learning_rate": 2.676661400963898e-08, "loss": 0.496, "step": 12026 }, { "epoch": 0.98, "grad_norm": 0.9264478746652443, "learning_rate": 2.6574479963054068e-08, "loss": 0.4888, "step": 12027 }, { "epoch": 0.98, "grad_norm": 1.0732755625344113, "learning_rate": 2.6383037066013417e-08, "loss": 0.4947, "step": 12028 }, { "epoch": 0.98, "grad_norm": 0.9045542732560331, "learning_rate": 2.6192285331779754e-08, "loss": 0.4218, "step": 12029 }, { "epoch": 0.98, "grad_norm": 0.926681609482207, "learning_rate": 2.6002224773574725e-08, "loss": 0.4675, "step": 12030 }, { "epoch": 0.98, "grad_norm": 0.9571897718833516, "learning_rate": 2.5812855404568903e-08, "loss": 0.5279, "step": 12031 }, { "epoch": 0.98, "grad_norm": 0.8379559114592033, "learning_rate": 2.5624177237884017e-08, "loss": 0.4349, "step": 12032 }, { "epoch": 0.98, "grad_norm": 0.9385323324140011, "learning_rate": 2.5436190286597384e-08, "loss": 0.5041, "step": 12033 }, { "epoch": 0.98, "grad_norm": 0.9581379073190894, "learning_rate": 2.524889456373525e-08, "loss": 0.5283, "step": 12034 }, { "epoch": 0.98, "grad_norm": 0.9226299394723871, "learning_rate": 2.506229008227723e-08, "loss": 0.4763, "step": 12035 }, { "epoch": 0.98, "grad_norm": 1.0014369490594117, "learning_rate": 2.4876376855154095e-08, "loss": 0.5233, "step": 12036 }, { "epoch": 0.98, "grad_norm": 0.9786997303191647, "learning_rate": 2.469115489525109e-08, "loss": 0.4833, "step": 12037 }, { "epoch": 0.98, "grad_norm": 1.0225673016820687, "learning_rate": 2.4506624215402396e-08, "loss": 0.4294, "step": 12038 }, { "epoch": 0.98, "grad_norm": 1.0178203672748507, "learning_rate": 2.4322784828395562e-08, "loss": 0.5122, "step": 12039 }, { "epoch": 0.98, "grad_norm": 0.960735625727089, "learning_rate": 2.4139636746972617e-08, "loss": 0.4957, "step": 12040 }, { "epoch": 0.98, "grad_norm": 0.9003834052306827, "learning_rate": 2.395717998382341e-08, "loss": 0.4717, "step": 12041 }, { "epoch": 0.98, "grad_norm": 1.0236375174607777, "learning_rate": 2.377541455159338e-08, "loss": 0.4855, "step": 12042 }, { "epoch": 0.98, "grad_norm": 0.9208499654286599, "learning_rate": 2.3594340462878007e-08, "loss": 0.4828, "step": 12043 }, { "epoch": 0.98, "grad_norm": 0.9747632253006461, "learning_rate": 2.3413957730226144e-08, "loss": 0.5228, "step": 12044 }, { "epoch": 0.98, "grad_norm": 0.8512407956116504, "learning_rate": 2.3234266366137794e-08, "loss": 0.486, "step": 12045 }, { "epoch": 0.98, "grad_norm": 0.9528617655477253, "learning_rate": 2.305526638306521e-08, "loss": 0.4579, "step": 12046 }, { "epoch": 0.98, "grad_norm": 0.9290253967263423, "learning_rate": 2.2876957793412923e-08, "loss": 0.4823, "step": 12047 }, { "epoch": 0.98, "grad_norm": 0.7917551407534277, "learning_rate": 2.2699340609537713e-08, "loss": 0.3943, "step": 12048 }, { "epoch": 0.98, "grad_norm": 0.969076220647739, "learning_rate": 2.2522414843748618e-08, "loss": 0.4756, "step": 12049 }, { "epoch": 0.98, "grad_norm": 0.9774628839866263, "learning_rate": 2.2346180508305836e-08, "loss": 0.4676, "step": 12050 }, { "epoch": 0.98, "grad_norm": 0.9611488238609713, "learning_rate": 2.217063761542293e-08, "loss": 0.4263, "step": 12051 }, { "epoch": 0.98, "grad_norm": 1.0969373743056006, "learning_rate": 2.1995786177264612e-08, "loss": 0.4796, "step": 12052 }, { "epoch": 0.98, "grad_norm": 0.9629598594205779, "learning_rate": 2.1821626205947854e-08, "loss": 0.5476, "step": 12053 }, { "epoch": 0.98, "grad_norm": 0.9234136061403723, "learning_rate": 2.1648157713540786e-08, "loss": 0.4845, "step": 12054 }, { "epoch": 0.98, "grad_norm": 1.0456502401405645, "learning_rate": 2.147538071206712e-08, "loss": 0.4983, "step": 12055 }, { "epoch": 0.98, "grad_norm": 0.9620817812734163, "learning_rate": 2.130329521349728e-08, "loss": 0.5425, "step": 12056 }, { "epoch": 0.98, "grad_norm": 0.9936463735647684, "learning_rate": 2.113190122975839e-08, "loss": 0.4874, "step": 12057 }, { "epoch": 0.98, "grad_norm": 0.9191229093038062, "learning_rate": 2.096119877272873e-08, "loss": 0.4658, "step": 12058 }, { "epoch": 0.98, "grad_norm": 1.0217672518961693, "learning_rate": 2.0791187854234396e-08, "loss": 0.5153, "step": 12059 }, { "epoch": 0.98, "grad_norm": 1.0039471938425795, "learning_rate": 2.0621868486060402e-08, "loss": 0.5135, "step": 12060 }, { "epoch": 0.98, "grad_norm": 0.9101474014009246, "learning_rate": 2.045324067993959e-08, "loss": 0.4847, "step": 12061 }, { "epoch": 0.98, "grad_norm": 0.9451141667951678, "learning_rate": 2.0285304447557052e-08, "loss": 0.4758, "step": 12062 }, { "epoch": 0.98, "grad_norm": 1.1071361661435415, "learning_rate": 2.011805980055015e-08, "loss": 0.5314, "step": 12063 }, { "epoch": 0.98, "grad_norm": 0.9478821974145435, "learning_rate": 1.9951506750510718e-08, "loss": 0.5055, "step": 12064 }, { "epoch": 0.98, "grad_norm": 0.9473836420133486, "learning_rate": 1.9785645308978417e-08, "loss": 0.4738, "step": 12065 }, { "epoch": 0.98, "grad_norm": 1.047067744675001, "learning_rate": 1.962047548744961e-08, "loss": 0.4963, "step": 12066 }, { "epoch": 0.98, "grad_norm": 0.9487194859396089, "learning_rate": 1.9455997297368467e-08, "loss": 0.4543, "step": 12067 }, { "epoch": 0.98, "grad_norm": 0.9020631622345036, "learning_rate": 1.9292210750134766e-08, "loss": 0.4766, "step": 12068 }, { "epoch": 0.98, "grad_norm": 0.9240780684461876, "learning_rate": 1.9129115857097203e-08, "loss": 0.4414, "step": 12069 }, { "epoch": 0.98, "grad_norm": 1.0183690019040301, "learning_rate": 1.896671262955896e-08, "loss": 0.4414, "step": 12070 }, { "epoch": 0.98, "grad_norm": 0.9719576260291167, "learning_rate": 1.8805001078774364e-08, "loss": 0.4942, "step": 12071 }, { "epoch": 0.98, "grad_norm": 0.8229267596156057, "learning_rate": 1.8643981215951125e-08, "loss": 0.4737, "step": 12072 }, { "epoch": 0.98, "grad_norm": 0.926850956701825, "learning_rate": 1.8483653052244754e-08, "loss": 0.4606, "step": 12073 }, { "epoch": 0.98, "grad_norm": 0.9709028631688023, "learning_rate": 1.832401659876859e-08, "loss": 0.4355, "step": 12074 }, { "epoch": 0.98, "grad_norm": 1.0114948920634896, "learning_rate": 1.8165071866583785e-08, "loss": 0.4659, "step": 12075 }, { "epoch": 0.98, "grad_norm": 0.996860587805961, "learning_rate": 1.8006818866705968e-08, "loss": 0.5256, "step": 12076 }, { "epoch": 0.98, "grad_norm": 0.8907455895141424, "learning_rate": 1.7849257610101923e-08, "loss": 0.431, "step": 12077 }, { "epoch": 0.98, "grad_norm": 0.9866829023109346, "learning_rate": 1.7692388107689584e-08, "loss": 0.4946, "step": 12078 }, { "epoch": 0.98, "grad_norm": 0.9840949466758064, "learning_rate": 1.7536210370341366e-08, "loss": 0.4973, "step": 12079 }, { "epoch": 0.98, "grad_norm": 0.934940510323555, "learning_rate": 1.7380724408878613e-08, "loss": 0.4776, "step": 12080 }, { "epoch": 0.98, "grad_norm": 0.9316063795807219, "learning_rate": 1.7225930234077147e-08, "loss": 0.448, "step": 12081 }, { "epoch": 0.98, "grad_norm": 0.9043191059114086, "learning_rate": 1.7071827856663947e-08, "loss": 0.4002, "step": 12082 }, { "epoch": 0.98, "grad_norm": 1.0040959642431642, "learning_rate": 1.6918417287318245e-08, "loss": 0.4702, "step": 12083 }, { "epoch": 0.98, "grad_norm": 1.0118369320866807, "learning_rate": 1.6765698536671538e-08, "loss": 0.5233, "step": 12084 }, { "epoch": 0.98, "grad_norm": 0.9343418206195916, "learning_rate": 1.661367161530647e-08, "loss": 0.4925, "step": 12085 }, { "epoch": 0.98, "grad_norm": 0.9724808816874202, "learning_rate": 1.646233653375795e-08, "loss": 0.5342, "step": 12086 }, { "epoch": 0.98, "grad_norm": 0.9234714441172094, "learning_rate": 1.6311693302515364e-08, "loss": 0.4269, "step": 12087 }, { "epoch": 0.98, "grad_norm": 0.9908340940280683, "learning_rate": 1.6161741932017026e-08, "loss": 0.5122, "step": 12088 }, { "epoch": 0.98, "grad_norm": 1.0136407090289303, "learning_rate": 1.601248243265352e-08, "loss": 0.555, "step": 12089 }, { "epoch": 0.98, "grad_norm": 0.8792657315259043, "learning_rate": 1.586391481476879e-08, "loss": 0.497, "step": 12090 }, { "epoch": 0.98, "grad_norm": 0.9361415339097172, "learning_rate": 1.5716039088660152e-08, "loss": 0.4718, "step": 12091 }, { "epoch": 0.98, "grad_norm": 1.051550955427514, "learning_rate": 1.5568855264572745e-08, "loss": 0.4684, "step": 12092 }, { "epoch": 0.98, "grad_norm": 1.0455735739038166, "learning_rate": 1.5422363352708414e-08, "loss": 0.4771, "step": 12093 }, { "epoch": 0.98, "grad_norm": 0.9894632078153739, "learning_rate": 1.5276563363217923e-08, "loss": 0.5236, "step": 12094 }, { "epoch": 0.98, "grad_norm": 0.9306248195163555, "learning_rate": 1.51314553062043e-08, "loss": 0.5081, "step": 12095 }, { "epoch": 0.98, "grad_norm": 1.030265257408779, "learning_rate": 1.498703919172506e-08, "loss": 0.5067, "step": 12096 }, { "epoch": 0.98, "grad_norm": 0.9951251961523201, "learning_rate": 1.4843315029786642e-08, "loss": 0.4647, "step": 12097 }, { "epoch": 0.98, "grad_norm": 0.9202627425313655, "learning_rate": 1.4700282830351077e-08, "loss": 0.4568, "step": 12098 }, { "epoch": 0.98, "grad_norm": 1.0646728167615809, "learning_rate": 1.4557942603327103e-08, "loss": 0.5333, "step": 12099 }, { "epoch": 0.98, "grad_norm": 0.9165375441309248, "learning_rate": 1.4416294358582383e-08, "loss": 0.474, "step": 12100 }, { "epoch": 0.98, "grad_norm": 0.9989449749096093, "learning_rate": 1.4275338105930181e-08, "loss": 0.5541, "step": 12101 }, { "epoch": 0.98, "grad_norm": 0.8842241995287015, "learning_rate": 1.4135073855139348e-08, "loss": 0.4333, "step": 12102 }, { "epoch": 0.98, "grad_norm": 1.0020455888538515, "learning_rate": 1.3995501615930996e-08, "loss": 0.5113, "step": 12103 }, { "epoch": 0.98, "grad_norm": 0.9392848855909458, "learning_rate": 1.3856621397977388e-08, "loss": 0.4491, "step": 12104 }, { "epoch": 0.98, "grad_norm": 0.8645236971420108, "learning_rate": 1.3718433210901938e-08, "loss": 0.4684, "step": 12105 }, { "epoch": 0.98, "grad_norm": 0.9476125835258605, "learning_rate": 1.358093706428032e-08, "loss": 0.4455, "step": 12106 }, { "epoch": 0.98, "grad_norm": 0.9120247324159362, "learning_rate": 1.3444132967642687e-08, "loss": 0.4567, "step": 12107 }, { "epoch": 0.98, "grad_norm": 0.9439904162223841, "learning_rate": 1.3308020930468123e-08, "loss": 0.4762, "step": 12108 }, { "epoch": 0.98, "grad_norm": 0.8476734435812402, "learning_rate": 1.3172600962190196e-08, "loss": 0.3775, "step": 12109 }, { "epoch": 0.98, "grad_norm": 1.0085984962827712, "learning_rate": 1.3037873072192509e-08, "loss": 0.5417, "step": 12110 }, { "epoch": 0.98, "grad_norm": 0.9458556391105006, "learning_rate": 1.2903837269810926e-08, "loss": 0.4422, "step": 12111 }, { "epoch": 0.98, "grad_norm": 0.927148332515927, "learning_rate": 1.2770493564335795e-08, "loss": 0.4492, "step": 12112 }, { "epoch": 0.98, "grad_norm": 0.8648138309308088, "learning_rate": 1.2637841965006392e-08, "loss": 0.4853, "step": 12113 }, { "epoch": 0.98, "grad_norm": 0.9085857938849952, "learning_rate": 1.2505882481016473e-08, "loss": 0.4589, "step": 12114 }, { "epoch": 0.98, "grad_norm": 0.9850194006674354, "learning_rate": 1.2374615121508726e-08, "loss": 0.5077, "step": 12115 }, { "epoch": 0.98, "grad_norm": 0.8683619860038134, "learning_rate": 1.2244039895582538e-08, "loss": 0.4737, "step": 12116 }, { "epoch": 0.98, "grad_norm": 0.986180421412581, "learning_rate": 1.2114156812284006e-08, "loss": 0.4238, "step": 12117 }, { "epoch": 0.98, "grad_norm": 0.8630904059814098, "learning_rate": 1.1984965880615929e-08, "loss": 0.4534, "step": 12118 }, { "epoch": 0.98, "grad_norm": 0.9567877247689517, "learning_rate": 1.1856467109530034e-08, "loss": 0.4496, "step": 12119 }, { "epoch": 0.99, "grad_norm": 0.9244238624328863, "learning_rate": 1.1728660507931423e-08, "loss": 0.4996, "step": 12120 }, { "epoch": 0.99, "grad_norm": 0.9355603986596689, "learning_rate": 1.1601546084677451e-08, "loss": 0.4538, "step": 12121 }, { "epoch": 0.99, "grad_norm": 1.0998083959992173, "learning_rate": 1.147512384857663e-08, "loss": 0.5296, "step": 12122 }, { "epoch": 0.99, "grad_norm": 1.0029466284947572, "learning_rate": 1.134939380838973e-08, "loss": 0.5109, "step": 12123 }, { "epoch": 0.99, "grad_norm": 1.018903049451067, "learning_rate": 1.1224355972829782e-08, "loss": 0.53, "step": 12124 }, { "epoch": 0.99, "grad_norm": 0.9814802302583996, "learning_rate": 1.1100010350562073e-08, "loss": 0.4953, "step": 12125 }, { "epoch": 0.99, "grad_norm": 0.9378454731447133, "learning_rate": 1.0976356950203049e-08, "loss": 0.4791, "step": 12126 }, { "epoch": 0.99, "grad_norm": 0.8459287341705666, "learning_rate": 1.0853395780322518e-08, "loss": 0.4792, "step": 12127 }, { "epoch": 0.99, "grad_norm": 0.9874219066355951, "learning_rate": 1.0731126849441442e-08, "loss": 0.5082, "step": 12128 }, { "epoch": 0.99, "grad_norm": 0.9790531177343795, "learning_rate": 1.0609550166033045e-08, "loss": 0.4642, "step": 12129 }, { "epoch": 0.99, "grad_norm": 0.8633723815748132, "learning_rate": 1.0488665738521697e-08, "loss": 0.4535, "step": 12130 }, { "epoch": 0.99, "grad_norm": 0.9307003256715729, "learning_rate": 1.0368473575285143e-08, "loss": 0.491, "step": 12131 }, { "epoch": 0.99, "grad_norm": 0.9499840617739655, "learning_rate": 1.0248973684653385e-08, "loss": 0.4717, "step": 12132 }, { "epoch": 0.99, "grad_norm": 0.9656425326858633, "learning_rate": 1.0130166074906467e-08, "loss": 0.4833, "step": 12133 }, { "epoch": 0.99, "grad_norm": 0.969248297391733, "learning_rate": 1.0012050754277802e-08, "loss": 0.4377, "step": 12134 }, { "epoch": 0.99, "grad_norm": 1.0964434122566502, "learning_rate": 9.894627730953066e-09, "loss": 0.4803, "step": 12135 }, { "epoch": 0.99, "grad_norm": 0.9802224309989128, "learning_rate": 9.777897013069082e-09, "loss": 0.5238, "step": 12136 }, { "epoch": 0.99, "grad_norm": 0.8730273662548644, "learning_rate": 9.661858608716045e-09, "loss": 0.4808, "step": 12137 }, { "epoch": 0.99, "grad_norm": 0.9342879322938532, "learning_rate": 9.546512525934193e-09, "loss": 0.4506, "step": 12138 }, { "epoch": 0.99, "grad_norm": 0.8701452487731054, "learning_rate": 9.43185877271824e-09, "loss": 0.4778, "step": 12139 }, { "epoch": 0.99, "grad_norm": 1.0066801314510307, "learning_rate": 9.317897357011829e-09, "loss": 0.4871, "step": 12140 }, { "epoch": 0.99, "grad_norm": 0.9434565414798493, "learning_rate": 9.204628286714202e-09, "loss": 0.5085, "step": 12141 }, { "epoch": 0.99, "grad_norm": 0.960720693115298, "learning_rate": 9.092051569674632e-09, "loss": 0.5068, "step": 12142 }, { "epoch": 0.99, "grad_norm": 1.0266753458833535, "learning_rate": 8.980167213692437e-09, "loss": 0.4912, "step": 12143 }, { "epoch": 0.99, "grad_norm": 0.8768551645097515, "learning_rate": 8.868975226523634e-09, "loss": 0.4614, "step": 12144 }, { "epoch": 0.99, "grad_norm": 0.9258059202902077, "learning_rate": 8.758475615872065e-09, "loss": 0.4769, "step": 12145 }, { "epoch": 0.99, "grad_norm": 0.8688950692394761, "learning_rate": 8.648668389397153e-09, "loss": 0.4356, "step": 12146 }, { "epoch": 0.99, "grad_norm": 0.9116665627644456, "learning_rate": 8.539553554706148e-09, "loss": 0.479, "step": 12147 }, { "epoch": 0.99, "grad_norm": 0.9170045332553114, "learning_rate": 8.431131119361891e-09, "loss": 0.4745, "step": 12148 }, { "epoch": 0.99, "grad_norm": 0.9571233551136623, "learning_rate": 8.323401090877258e-09, "loss": 0.4227, "step": 12149 }, { "epoch": 0.99, "grad_norm": 0.9635271757833536, "learning_rate": 8.216363476718503e-09, "loss": 0.4495, "step": 12150 }, { "epoch": 0.99, "grad_norm": 0.972196287676129, "learning_rate": 8.110018284304132e-09, "loss": 0.5253, "step": 12151 }, { "epoch": 0.99, "grad_norm": 0.9668650890442178, "learning_rate": 8.004365521001589e-09, "loss": 0.468, "step": 12152 }, { "epoch": 0.99, "grad_norm": 0.9220578685108054, "learning_rate": 7.899405194133902e-09, "loss": 0.4399, "step": 12153 }, { "epoch": 0.99, "grad_norm": 1.109967136629721, "learning_rate": 7.795137310974143e-09, "loss": 0.5623, "step": 12154 }, { "epoch": 0.99, "grad_norm": 0.9759862425878394, "learning_rate": 7.691561878748755e-09, "loss": 0.4836, "step": 12155 }, { "epoch": 0.99, "grad_norm": 0.9096238591337812, "learning_rate": 7.588678904635328e-09, "loss": 0.4849, "step": 12156 }, { "epoch": 0.99, "grad_norm": 1.0074088319601273, "learning_rate": 7.486488395762604e-09, "loss": 0.556, "step": 12157 }, { "epoch": 0.99, "grad_norm": 0.9484713726196886, "learning_rate": 7.384990359212696e-09, "loss": 0.4814, "step": 12158 }, { "epoch": 0.99, "grad_norm": 0.9423528640935501, "learning_rate": 7.284184802019978e-09, "loss": 0.4615, "step": 12159 }, { "epoch": 0.99, "grad_norm": 1.0101292494710938, "learning_rate": 7.1840717311688625e-09, "loss": 0.5216, "step": 12160 }, { "epoch": 0.99, "grad_norm": 0.9788949263707301, "learning_rate": 7.084651153599353e-09, "loss": 0.4846, "step": 12161 }, { "epoch": 0.99, "grad_norm": 0.8823697123892938, "learning_rate": 6.985923076199275e-09, "loss": 0.4516, "step": 12162 }, { "epoch": 0.99, "grad_norm": 0.9332411928489063, "learning_rate": 6.88788750580982e-09, "loss": 0.4917, "step": 12163 }, { "epoch": 0.99, "grad_norm": 0.8939196205465503, "learning_rate": 6.790544449227776e-09, "loss": 0.5001, "step": 12164 }, { "epoch": 0.99, "grad_norm": 0.915649796606784, "learning_rate": 6.693893913195526e-09, "loss": 0.4448, "step": 12165 }, { "epoch": 0.99, "grad_norm": 0.8985794749672081, "learning_rate": 6.597935904413267e-09, "loss": 0.4557, "step": 12166 }, { "epoch": 0.99, "grad_norm": 0.8898060428434278, "learning_rate": 6.502670429529012e-09, "loss": 0.4438, "step": 12167 }, { "epoch": 0.99, "grad_norm": 0.911361729983376, "learning_rate": 6.40809749514637e-09, "loss": 0.4731, "step": 12168 }, { "epoch": 0.99, "grad_norm": 0.9853357071007716, "learning_rate": 6.314217107817877e-09, "loss": 0.5042, "step": 12169 }, { "epoch": 0.99, "grad_norm": 0.9595718090401706, "learning_rate": 6.221029274049439e-09, "loss": 0.4843, "step": 12170 }, { "epoch": 0.99, "grad_norm": 0.9264152524313263, "learning_rate": 6.1285340003003346e-09, "loss": 0.4648, "step": 12171 }, { "epoch": 0.99, "grad_norm": 1.0823810828783662, "learning_rate": 6.03673129297877e-09, "loss": 0.5243, "step": 12172 }, { "epoch": 0.99, "grad_norm": 1.0166043314385933, "learning_rate": 5.945621158446324e-09, "loss": 0.4803, "step": 12173 }, { "epoch": 0.99, "grad_norm": 1.0495585663202873, "learning_rate": 5.855203603017945e-09, "loss": 0.4966, "step": 12174 }, { "epoch": 0.99, "grad_norm": 0.9321136127621876, "learning_rate": 5.765478632959731e-09, "loss": 0.4823, "step": 12175 }, { "epoch": 0.99, "grad_norm": 0.89851925044116, "learning_rate": 5.676446254488932e-09, "loss": 0.4719, "step": 12176 }, { "epoch": 0.99, "grad_norm": 1.0085946206564493, "learning_rate": 5.588106473775057e-09, "loss": 0.5553, "step": 12177 }, { "epoch": 0.99, "grad_norm": 0.8649896612244649, "learning_rate": 5.500459296939875e-09, "loss": 0.4842, "step": 12178 }, { "epoch": 0.99, "grad_norm": 0.9366458571588505, "learning_rate": 5.413504730058527e-09, "loss": 0.4386, "step": 12179 }, { "epoch": 0.99, "grad_norm": 0.8616063953962477, "learning_rate": 5.327242779156194e-09, "loss": 0.485, "step": 12180 }, { "epoch": 0.99, "grad_norm": 0.947643360116589, "learning_rate": 5.2416734502103165e-09, "loss": 0.4673, "step": 12181 }, { "epoch": 0.99, "grad_norm": 1.5494313108108435, "learning_rate": 5.156796749150595e-09, "loss": 0.458, "step": 12182 }, { "epoch": 0.99, "grad_norm": 0.9505065847530575, "learning_rate": 5.0726126818601e-09, "loss": 0.456, "step": 12183 }, { "epoch": 0.99, "grad_norm": 0.9026091368438374, "learning_rate": 4.989121254171947e-09, "loss": 0.4619, "step": 12184 }, { "epoch": 0.99, "grad_norm": 0.9139503054391478, "learning_rate": 4.9063224718726154e-09, "loss": 0.4758, "step": 12185 }, { "epoch": 0.99, "grad_norm": 1.0371267800898043, "learning_rate": 4.824216340698629e-09, "loss": 0.5083, "step": 12186 }, { "epoch": 0.99, "grad_norm": 0.9140285052535965, "learning_rate": 4.74280286634099e-09, "loss": 0.4906, "step": 12187 }, { "epoch": 0.99, "grad_norm": 1.0079408181761793, "learning_rate": 4.662082054441852e-09, "loss": 0.5653, "step": 12188 }, { "epoch": 0.99, "grad_norm": 0.9830808916635849, "learning_rate": 4.582053910594519e-09, "loss": 0.4774, "step": 12189 }, { "epoch": 0.99, "grad_norm": 0.9077023704770761, "learning_rate": 4.502718440344556e-09, "loss": 0.4558, "step": 12190 }, { "epoch": 0.99, "grad_norm": 0.7935871680265658, "learning_rate": 4.4240756491897854e-09, "loss": 0.4481, "step": 12191 }, { "epoch": 0.99, "grad_norm": 0.8963596426210041, "learning_rate": 4.346125542581403e-09, "loss": 0.4413, "step": 12192 }, { "epoch": 0.99, "grad_norm": 0.9176413396869422, "learning_rate": 4.268868125919534e-09, "loss": 0.4847, "step": 12193 }, { "epoch": 0.99, "grad_norm": 0.8930937176633225, "learning_rate": 4.192303404559894e-09, "loss": 0.4818, "step": 12194 }, { "epoch": 0.99, "grad_norm": 0.9774095396399091, "learning_rate": 4.1164313838060184e-09, "loss": 0.4618, "step": 12195 }, { "epoch": 0.99, "grad_norm": 0.9304736319360605, "learning_rate": 4.041252068918145e-09, "loss": 0.469, "step": 12196 }, { "epoch": 0.99, "grad_norm": 1.0330209884154204, "learning_rate": 3.966765465105438e-09, "loss": 0.4876, "step": 12197 }, { "epoch": 0.99, "grad_norm": 0.8581916788578621, "learning_rate": 3.892971577528215e-09, "loss": 0.4076, "step": 12198 }, { "epoch": 0.99, "grad_norm": 1.018750336762057, "learning_rate": 3.819870411302385e-09, "loss": 0.5133, "step": 12199 }, { "epoch": 0.99, "grad_norm": 0.9301758200353073, "learning_rate": 3.747461971492783e-09, "loss": 0.4803, "step": 12200 }, { "epoch": 0.99, "grad_norm": 0.9524198887721386, "learning_rate": 3.6757462631176186e-09, "loss": 0.5102, "step": 12201 }, { "epoch": 0.99, "grad_norm": 0.9998380514117441, "learning_rate": 3.6047232911462506e-09, "loss": 0.4627, "step": 12202 }, { "epoch": 0.99, "grad_norm": 0.9303895789749116, "learning_rate": 3.5343930605002964e-09, "loss": 0.4983, "step": 12203 }, { "epoch": 0.99, "grad_norm": 0.9814676213506741, "learning_rate": 3.4647555760547456e-09, "loss": 0.5019, "step": 12204 }, { "epoch": 0.99, "grad_norm": 0.8899742237254751, "learning_rate": 3.3958108426346283e-09, "loss": 0.4679, "step": 12205 }, { "epoch": 0.99, "grad_norm": 0.9854215413848316, "learning_rate": 3.327558865017233e-09, "loss": 0.4697, "step": 12206 }, { "epoch": 0.99, "grad_norm": 0.9949671671252079, "learning_rate": 3.259999647933221e-09, "loss": 0.4586, "step": 12207 }, { "epoch": 0.99, "grad_norm": 0.9767718153567564, "learning_rate": 3.193133196064402e-09, "loss": 0.5054, "step": 12208 }, { "epoch": 0.99, "grad_norm": 1.0785275131904468, "learning_rate": 3.126959514043737e-09, "loss": 0.5084, "step": 12209 }, { "epoch": 0.99, "grad_norm": 0.8674149722017763, "learning_rate": 3.0614786064586676e-09, "loss": 0.4421, "step": 12210 }, { "epoch": 0.99, "grad_norm": 0.9315561827978559, "learning_rate": 2.996690477844455e-09, "loss": 0.4992, "step": 12211 }, { "epoch": 0.99, "grad_norm": 0.9269373342663652, "learning_rate": 2.9325951326930614e-09, "loss": 0.5106, "step": 12212 }, { "epoch": 0.99, "grad_norm": 1.0034662424592324, "learning_rate": 2.8691925754453785e-09, "loss": 0.5164, "step": 12213 }, { "epoch": 0.99, "grad_norm": 0.9529358562010914, "learning_rate": 2.8064828104956697e-09, "loss": 0.4973, "step": 12214 }, { "epoch": 0.99, "grad_norm": 1.0100316056822873, "learning_rate": 2.7444658421882375e-09, "loss": 0.516, "step": 12215 }, { "epoch": 0.99, "grad_norm": 0.947131493385448, "learning_rate": 2.6831416748229755e-09, "loss": 0.4978, "step": 12216 }, { "epoch": 0.99, "grad_norm": 1.013465954899412, "learning_rate": 2.622510312647597e-09, "loss": 0.4745, "step": 12217 }, { "epoch": 0.99, "grad_norm": 1.0572863965813828, "learning_rate": 2.5625717598642962e-09, "loss": 0.5839, "step": 12218 }, { "epoch": 0.99, "grad_norm": 0.8450663723277408, "learning_rate": 2.5033260206275277e-09, "loss": 0.4414, "step": 12219 }, { "epoch": 0.99, "grad_norm": 0.9349043179917564, "learning_rate": 2.4447730990428964e-09, "loss": 0.4565, "step": 12220 }, { "epoch": 0.99, "grad_norm": 0.8921858303213847, "learning_rate": 2.386912999167157e-09, "loss": 0.4308, "step": 12221 }, { "epoch": 0.99, "grad_norm": 1.1106808138466342, "learning_rate": 2.329745725010435e-09, "loss": 0.5236, "step": 12222 }, { "epoch": 0.99, "grad_norm": 0.9107858977787934, "learning_rate": 2.273271280534006e-09, "loss": 0.4344, "step": 12223 }, { "epoch": 0.99, "grad_norm": 1.0104185102825205, "learning_rate": 2.217489669652517e-09, "loss": 0.5316, "step": 12224 }, { "epoch": 0.99, "grad_norm": 0.9485027710249507, "learning_rate": 2.1624008962306543e-09, "loss": 0.4754, "step": 12225 }, { "epoch": 0.99, "grad_norm": 0.886480688086442, "learning_rate": 2.108004964086474e-09, "loss": 0.4888, "step": 12226 }, { "epoch": 0.99, "grad_norm": 0.937058241545652, "learning_rate": 2.0543018769902946e-09, "loss": 0.4611, "step": 12227 }, { "epoch": 0.99, "grad_norm": 0.9598546147109381, "learning_rate": 2.0012916386613625e-09, "loss": 0.4744, "step": 12228 }, { "epoch": 0.99, "grad_norm": 0.974531542912316, "learning_rate": 1.9489742527756263e-09, "loss": 0.5349, "step": 12229 }, { "epoch": 0.99, "grad_norm": 0.9519705243013669, "learning_rate": 1.8973497229568537e-09, "loss": 0.4761, "step": 12230 }, { "epoch": 0.99, "grad_norm": 0.9238988769473946, "learning_rate": 1.8464180527844044e-09, "loss": 0.4833, "step": 12231 }, { "epoch": 0.99, "grad_norm": 1.0111873319625468, "learning_rate": 1.7961792457865668e-09, "loss": 0.5243, "step": 12232 }, { "epoch": 0.99, "grad_norm": 0.967548228009322, "learning_rate": 1.7466333054450001e-09, "loss": 0.5089, "step": 12233 }, { "epoch": 0.99, "grad_norm": 0.918909362153841, "learning_rate": 1.6977802351936246e-09, "loss": 0.4543, "step": 12234 }, { "epoch": 0.99, "grad_norm": 0.7785482161984433, "learning_rate": 1.6496200384163996e-09, "loss": 0.3861, "step": 12235 }, { "epoch": 0.99, "grad_norm": 0.9384527289121802, "learning_rate": 1.6021527184528761e-09, "loss": 0.4751, "step": 12236 }, { "epoch": 0.99, "grad_norm": 0.9505856087988792, "learning_rate": 1.555378278591535e-09, "loss": 0.4957, "step": 12237 }, { "epoch": 0.99, "grad_norm": 0.9896863520416174, "learning_rate": 1.5092967220742272e-09, "loss": 0.4905, "step": 12238 }, { "epoch": 0.99, "grad_norm": 1.0768453855263935, "learning_rate": 1.4639080520939541e-09, "loss": 0.5181, "step": 12239 }, { "epoch": 0.99, "grad_norm": 1.0133044427198161, "learning_rate": 1.4192122717959777e-09, "loss": 0.5047, "step": 12240 }, { "epoch": 0.99, "grad_norm": 0.9404999214974434, "learning_rate": 1.3752093842778204e-09, "loss": 0.4799, "step": 12241 }, { "epoch": 0.99, "grad_norm": 1.0552931054831358, "learning_rate": 1.3318993925881541e-09, "loss": 0.5162, "step": 12242 }, { "epoch": 1.0, "grad_norm": 1.0552380665620096, "learning_rate": 1.2892822997301324e-09, "loss": 0.5035, "step": 12243 }, { "epoch": 1.0, "grad_norm": 0.9391633731274371, "learning_rate": 1.2473581086558383e-09, "loss": 0.4921, "step": 12244 }, { "epoch": 1.0, "grad_norm": 1.0052164661643428, "learning_rate": 1.2061268222707257e-09, "loss": 0.4663, "step": 12245 }, { "epoch": 1.0, "grad_norm": 0.9722915792156718, "learning_rate": 1.165588443431398e-09, "loss": 0.4943, "step": 12246 }, { "epoch": 1.0, "grad_norm": 0.9102927236377634, "learning_rate": 1.12574297494783e-09, "loss": 0.4471, "step": 12247 }, { "epoch": 1.0, "grad_norm": 0.9398310361694413, "learning_rate": 1.0865904195822563e-09, "loss": 0.4707, "step": 12248 }, { "epoch": 1.0, "grad_norm": 0.808454573445147, "learning_rate": 1.048130780046952e-09, "loss": 0.398, "step": 12249 }, { "epoch": 1.0, "grad_norm": 0.9746782424320476, "learning_rate": 1.0103640590064524e-09, "loss": 0.5442, "step": 12250 }, { "epoch": 1.0, "grad_norm": 0.9649576296411937, "learning_rate": 9.73290259078663e-10, "loss": 0.5274, "step": 12251 }, { "epoch": 1.0, "grad_norm": 0.8107375447971296, "learning_rate": 9.369093828326403e-10, "loss": 0.431, "step": 12252 }, { "epoch": 1.0, "grad_norm": 1.0699241121892324, "learning_rate": 9.012214327897006e-10, "loss": 0.5015, "step": 12253 }, { "epoch": 1.0, "grad_norm": 0.905378821625332, "learning_rate": 8.662264114234209e-10, "loss": 0.4383, "step": 12254 }, { "epoch": 1.0, "grad_norm": 1.0107933303670742, "learning_rate": 8.319243211585281e-10, "loss": 0.4994, "step": 12255 }, { "epoch": 1.0, "grad_norm": 0.8628225975921563, "learning_rate": 7.983151643708997e-10, "loss": 0.4456, "step": 12256 }, { "epoch": 1.0, "grad_norm": 1.0056042550071769, "learning_rate": 7.653989433920039e-10, "loss": 0.5566, "step": 12257 }, { "epoch": 1.0, "grad_norm": 1.0487775395495176, "learning_rate": 7.331756605011286e-10, "loss": 0.511, "step": 12258 }, { "epoch": 1.0, "grad_norm": 0.9034474611671817, "learning_rate": 7.016453179320426e-10, "loss": 0.4756, "step": 12259 }, { "epoch": 1.0, "grad_norm": 0.860531555843401, "learning_rate": 6.708079178685545e-10, "loss": 0.4613, "step": 12260 }, { "epoch": 1.0, "grad_norm": 0.894604087497659, "learning_rate": 6.40663462450064e-10, "loss": 0.4818, "step": 12261 }, { "epoch": 1.0, "grad_norm": 0.956638263621099, "learning_rate": 6.112119537637906e-10, "loss": 0.4995, "step": 12262 }, { "epoch": 1.0, "grad_norm": 0.950141944738411, "learning_rate": 5.82453393850324e-10, "loss": 0.4644, "step": 12263 }, { "epoch": 1.0, "grad_norm": 0.9197142556995642, "learning_rate": 5.54387784703625e-10, "loss": 0.4667, "step": 12264 }, { "epoch": 1.0, "grad_norm": 0.9409690029323907, "learning_rate": 5.270151282688041e-10, "loss": 0.4442, "step": 12265 }, { "epoch": 1.0, "grad_norm": 0.9224085642149458, "learning_rate": 5.003354264421223e-10, "loss": 0.4995, "step": 12266 }, { "epoch": 1.0, "grad_norm": 0.9039475339596145, "learning_rate": 4.743486810732111e-10, "loss": 0.4782, "step": 12267 }, { "epoch": 1.0, "grad_norm": 0.9500554852385881, "learning_rate": 4.490548939617423e-10, "loss": 0.5374, "step": 12268 }, { "epoch": 1.0, "grad_norm": 1.021632409812602, "learning_rate": 4.2445406686075776e-10, "loss": 0.5037, "step": 12269 }, { "epoch": 1.0, "grad_norm": 0.9019301685248371, "learning_rate": 4.005462014766703e-10, "loss": 0.4847, "step": 12270 }, { "epoch": 1.0, "grad_norm": 1.0448459387319498, "learning_rate": 3.7733129946371237e-10, "loss": 0.5018, "step": 12271 }, { "epoch": 1.0, "grad_norm": 0.9567017288240108, "learning_rate": 3.548093624328175e-10, "loss": 0.5002, "step": 12272 }, { "epoch": 1.0, "grad_norm": 0.9393370125389402, "learning_rate": 3.32980391943849e-10, "loss": 0.5022, "step": 12273 }, { "epoch": 1.0, "grad_norm": 1.0039457861354868, "learning_rate": 3.118443895100409e-10, "loss": 0.4833, "step": 12274 }, { "epoch": 1.0, "grad_norm": 0.876708231374574, "learning_rate": 2.914013565957774e-10, "loss": 0.4747, "step": 12275 }, { "epoch": 1.0, "grad_norm": 1.0483753373478573, "learning_rate": 2.716512946165928e-10, "loss": 0.5458, "step": 12276 }, { "epoch": 1.0, "grad_norm": 5.854264871286635, "learning_rate": 2.525942049436125e-10, "loss": 0.4194, "step": 12277 }, { "epoch": 1.0, "grad_norm": 0.9330203416041252, "learning_rate": 2.3423008889467134e-10, "loss": 0.4675, "step": 12278 }, { "epoch": 1.0, "grad_norm": 0.928285603265314, "learning_rate": 2.165589477443053e-10, "loss": 0.4496, "step": 12279 }, { "epoch": 1.0, "grad_norm": 0.896315861070505, "learning_rate": 1.9958078271709037e-10, "loss": 0.5164, "step": 12280 }, { "epoch": 1.0, "grad_norm": 0.9970984789464427, "learning_rate": 1.8329559498875272e-10, "loss": 0.4903, "step": 12281 }, { "epoch": 1.0, "grad_norm": 0.9862019097831741, "learning_rate": 1.6770338568838918e-10, "loss": 0.4294, "step": 12282 }, { "epoch": 1.0, "grad_norm": 0.8625424437572193, "learning_rate": 1.5280415589624676e-10, "loss": 0.4841, "step": 12283 }, { "epoch": 1.0, "grad_norm": 2.6996509177567276, "learning_rate": 1.3859790664483287e-10, "loss": 0.5431, "step": 12284 }, { "epoch": 1.0, "grad_norm": 0.983653006981926, "learning_rate": 1.250846389189153e-10, "loss": 0.4933, "step": 12285 }, { "epoch": 1.0, "grad_norm": 0.9691698910333484, "learning_rate": 1.1226435365441212e-10, "loss": 0.478, "step": 12286 }, { "epoch": 1.0, "grad_norm": 0.8999462139403965, "learning_rate": 1.0013705174061195e-10, "loss": 0.4665, "step": 12287 }, { "epoch": 1.0, "grad_norm": 0.9701615848783587, "learning_rate": 8.870273401684338e-11, "loss": 0.469, "step": 12288 }, { "epoch": 1.0, "grad_norm": 0.9788648850371204, "learning_rate": 7.796140127691587e-11, "loss": 0.4688, "step": 12289 }, { "epoch": 1.0, "grad_norm": 0.9041305602197512, "learning_rate": 6.791305426356865e-11, "loss": 0.4562, "step": 12290 }, { "epoch": 1.0, "grad_norm": 0.9908625759964073, "learning_rate": 5.855769367402176e-11, "loss": 0.5208, "step": 12291 }, { "epoch": 1.0, "grad_norm": 0.9195748995284373, "learning_rate": 4.9895320156645445e-11, "loss": 0.407, "step": 12292 }, { "epoch": 1.0, "grad_norm": 1.0408210431502338, "learning_rate": 4.192593431096015e-11, "loss": 0.5351, "step": 12293 }, { "epoch": 1.0, "grad_norm": 0.9092443947587706, "learning_rate": 3.4649536690967154e-11, "loss": 0.4844, "step": 12294 }, { "epoch": 1.0, "grad_norm": 0.9725729468080113, "learning_rate": 2.8066127798487274e-11, "loss": 0.5036, "step": 12295 }, { "epoch": 1.0, "grad_norm": 0.9842735837618908, "learning_rate": 2.2175708092042615e-11, "loss": 0.5061, "step": 12296 }, { "epoch": 1.0, "grad_norm": 0.9175628272236795, "learning_rate": 1.6978277979085023e-11, "loss": 0.4502, "step": 12297 }, { "epoch": 1.0, "grad_norm": 1.0374820658516282, "learning_rate": 1.2473837819326762e-11, "loss": 0.4847, "step": 12298 }, { "epoch": 1.0, "grad_norm": 0.8909161564068082, "learning_rate": 8.662387924740501e-12, "loss": 0.4672, "step": 12299 }, { "epoch": 1.0, "grad_norm": 0.9415518726489468, "learning_rate": 5.54392855955932e-12, "loss": 0.4625, "step": 12300 }, { "epoch": 1.0, "grad_norm": 0.9500390955938162, "learning_rate": 3.118459941386931e-12, "loss": 0.4868, "step": 12301 }, { "epoch": 1.0, "grad_norm": 0.922814982524896, "learning_rate": 1.3859822356465657e-12, "loss": 0.5076, "step": 12302 }, { "epoch": 1.0, "grad_norm": 1.0370158785381784, "learning_rate": 3.4649556446275656e-13, "loss": 0.5309, "step": 12303 }, { "epoch": 1.0, "grad_norm": 0.844434207744529, "learning_rate": 0.0, "loss": 0.4437, "step": 12304 }, { "epoch": 1.0, "step": 12304, "total_flos": 1.5084962346172416e+16, "train_loss": 0.5391508782323648, "train_runtime": 261842.5175, "train_samples_per_second": 6.015, "train_steps_per_second": 0.047 } ], "logging_steps": 1.0, "max_steps": 12304, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1846, "total_flos": 1.5084962346172416e+16, "train_batch_size": 1, "trial_name": null, "trial_params": null }