{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 432, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 7.692307692307694e-07, "loss": 2.065, "step": 1 }, { "epoch": 0.01, "learning_rate": 1.5384615384615387e-06, "loss": 2.0734, "step": 2 }, { "epoch": 0.02, "learning_rate": 2.307692307692308e-06, "loss": 2.0549, "step": 3 }, { "epoch": 0.03, "learning_rate": 3.0769230769230774e-06, "loss": 2.0151, "step": 4 }, { "epoch": 0.03, "learning_rate": 3.846153846153847e-06, "loss": 1.997, "step": 5 }, { "epoch": 0.04, "learning_rate": 4.615384615384616e-06, "loss": 1.9595, "step": 6 }, { "epoch": 0.05, "learning_rate": 5.384615384615385e-06, "loss": 1.9403, "step": 7 }, { "epoch": 0.06, "learning_rate": 6.153846153846155e-06, "loss": 1.9314, "step": 8 }, { "epoch": 0.06, "learning_rate": 6.923076923076923e-06, "loss": 1.9198, "step": 9 }, { "epoch": 0.07, "learning_rate": 7.692307692307694e-06, "loss": 1.9514, "step": 10 }, { "epoch": 0.08, "learning_rate": 8.461538461538462e-06, "loss": 1.9133, "step": 11 }, { "epoch": 0.08, "learning_rate": 9.230769230769232e-06, "loss": 1.9148, "step": 12 }, { "epoch": 0.09, "learning_rate": 1e-05, "loss": 1.896, "step": 13 }, { "epoch": 0.1, "learning_rate": 9.999859456853116e-06, "loss": 1.8821, "step": 14 }, { "epoch": 0.1, "learning_rate": 9.99943783531341e-06, "loss": 1.8483, "step": 15 }, { "epoch": 0.11, "learning_rate": 9.998735159083295e-06, "loss": 1.8198, "step": 16 }, { "epoch": 0.12, "learning_rate": 9.997751467665295e-06, "loss": 1.8479, "step": 17 }, { "epoch": 0.12, "learning_rate": 9.996486816359851e-06, "loss": 1.8472, "step": 18 }, { "epoch": 0.13, "learning_rate": 9.994941276262188e-06, "loss": 1.8071, "step": 19 }, { "epoch": 0.14, "learning_rate": 9.99311493425834e-06, "loss": 1.8517, "step": 20 }, { "epoch": 0.15, "learning_rate": 9.991007893020242e-06, "loss": 1.816, "step": 21 }, { "epoch": 0.15, "learning_rate": 9.98862027099998e-06, "loss": 1.8063, "step": 22 }, { "epoch": 0.16, "learning_rate": 9.985952202423116e-06, "loss": 1.8017, "step": 23 }, { "epoch": 0.17, "learning_rate": 9.983003837281152e-06, "loss": 1.8223, "step": 24 }, { "epoch": 0.17, "learning_rate": 9.979775341323097e-06, "loss": 1.8145, "step": 25 }, { "epoch": 0.18, "learning_rate": 9.976266896046143e-06, "loss": 1.8268, "step": 26 }, { "epoch": 0.19, "learning_rate": 9.972478698685463e-06, "loss": 1.7692, "step": 27 }, { "epoch": 0.19, "learning_rate": 9.968410962203131e-06, "loss": 1.773, "step": 28 }, { "epoch": 0.2, "learning_rate": 9.964063915276141e-06, "loss": 1.7792, "step": 29 }, { "epoch": 0.21, "learning_rate": 9.959437802283552e-06, "loss": 1.7774, "step": 30 }, { "epoch": 0.22, "learning_rate": 9.954532883292761e-06, "loss": 1.7651, "step": 31 }, { "epoch": 0.22, "learning_rate": 9.949349434044862e-06, "loss": 1.8011, "step": 32 }, { "epoch": 0.23, "learning_rate": 9.943887745939164e-06, "loss": 1.7864, "step": 33 }, { "epoch": 0.24, "learning_rate": 9.938148126016805e-06, "loss": 1.7789, "step": 34 }, { "epoch": 0.24, "learning_rate": 9.932130896943477e-06, "loss": 1.7514, "step": 35 }, { "epoch": 0.25, "learning_rate": 9.925836396991309e-06, "loss": 1.7729, "step": 36 }, { "epoch": 0.26, "learning_rate": 9.919264980019829e-06, "loss": 1.7676, "step": 37 }, { "epoch": 0.26, "learning_rate": 9.912417015456088e-06, "loss": 1.7933, "step": 38 }, { "epoch": 0.27, "learning_rate": 9.905292888273883e-06, "loss": 1.7307, "step": 39 }, { "epoch": 0.28, "learning_rate": 9.897892998972113e-06, "loss": 1.7404, "step": 40 }, { "epoch": 0.28, "learning_rate": 9.89021776355227e-06, "loss": 1.7741, "step": 41 }, { "epoch": 0.29, "learning_rate": 9.882267613495049e-06, "loss": 1.7601, "step": 42 }, { "epoch": 0.3, "learning_rate": 9.874042995736095e-06, "loss": 1.7748, "step": 43 }, { "epoch": 0.31, "learning_rate": 9.865544372640872e-06, "loss": 1.7497, "step": 44 }, { "epoch": 0.31, "learning_rate": 9.85677222197867e-06, "loss": 1.7459, "step": 45 }, { "epoch": 0.32, "learning_rate": 9.847727036895759e-06, "loss": 1.7574, "step": 46 }, { "epoch": 0.33, "learning_rate": 9.838409325887643e-06, "loss": 1.7503, "step": 47 }, { "epoch": 0.33, "learning_rate": 9.828819612770497e-06, "loss": 1.7564, "step": 48 }, { "epoch": 0.34, "learning_rate": 9.818958436651704e-06, "loss": 1.7693, "step": 49 }, { "epoch": 0.35, "learning_rate": 9.808826351899551e-06, "loss": 1.7723, "step": 50 }, { "epoch": 0.35, "learning_rate": 9.798423928112071e-06, "loss": 1.7767, "step": 51 }, { "epoch": 0.36, "learning_rate": 9.787751750085014e-06, "loss": 1.7676, "step": 52 }, { "epoch": 0.37, "learning_rate": 9.77681041777897e-06, "loss": 1.7143, "step": 53 }, { "epoch": 0.38, "learning_rate": 9.765600546285654e-06, "loss": 1.7606, "step": 54 }, { "epoch": 0.38, "learning_rate": 9.754122765793306e-06, "loss": 1.7771, "step": 55 }, { "epoch": 0.39, "learning_rate": 9.742377721551286e-06, "loss": 1.7835, "step": 56 }, { "epoch": 0.4, "learning_rate": 9.730366073833785e-06, "loss": 1.7591, "step": 57 }, { "epoch": 0.4, "learning_rate": 9.718088497902709e-06, "loss": 1.7789, "step": 58 }, { "epoch": 0.41, "learning_rate": 9.705545683969722e-06, "loss": 1.7526, "step": 59 }, { "epoch": 0.42, "learning_rate": 9.692738337157441e-06, "loss": 1.7449, "step": 60 }, { "epoch": 0.42, "learning_rate": 9.679667177459794e-06, "loss": 1.7617, "step": 61 }, { "epoch": 0.43, "learning_rate": 9.66633293970155e-06, "loss": 1.7585, "step": 62 }, { "epoch": 0.44, "learning_rate": 9.652736373497001e-06, "loss": 1.764, "step": 63 }, { "epoch": 0.44, "learning_rate": 9.63887824320783e-06, "loss": 1.7151, "step": 64 }, { "epoch": 0.45, "learning_rate": 9.624759327900131e-06, "loss": 1.7299, "step": 65 }, { "epoch": 0.46, "learning_rate": 9.610380421300623e-06, "loss": 1.7642, "step": 66 }, { "epoch": 0.47, "learning_rate": 9.595742331752014e-06, "loss": 1.7522, "step": 67 }, { "epoch": 0.47, "learning_rate": 9.580845882167574e-06, "loss": 1.7303, "step": 68 }, { "epoch": 0.48, "learning_rate": 9.565691909984864e-06, "loss": 1.7423, "step": 69 }, { "epoch": 0.49, "learning_rate": 9.550281267118659e-06, "loss": 1.7434, "step": 70 }, { "epoch": 0.49, "learning_rate": 9.534614819913056e-06, "loss": 1.6931, "step": 71 }, { "epoch": 0.5, "learning_rate": 9.518693449092772e-06, "loss": 1.7438, "step": 72 }, { "epoch": 0.51, "learning_rate": 9.502518049713633e-06, "loss": 1.6983, "step": 73 }, { "epoch": 0.51, "learning_rate": 9.486089531112247e-06, "loss": 1.7422, "step": 74 }, { "epoch": 0.52, "learning_rate": 9.469408816854898e-06, "loss": 1.7321, "step": 75 }, { "epoch": 0.53, "learning_rate": 9.452476844685611e-06, "loss": 1.7252, "step": 76 }, { "epoch": 0.53, "learning_rate": 9.435294566473453e-06, "loss": 1.6962, "step": 77 }, { "epoch": 0.54, "learning_rate": 9.417862948158997e-06, "loss": 1.7193, "step": 78 }, { "epoch": 0.55, "learning_rate": 9.40018296970005e-06, "loss": 1.697, "step": 79 }, { "epoch": 0.56, "learning_rate": 9.382255625016527e-06, "loss": 1.7596, "step": 80 }, { "epoch": 0.56, "learning_rate": 9.364081921934607e-06, "loss": 1.7381, "step": 81 }, { "epoch": 0.57, "learning_rate": 9.345662882130056e-06, "loss": 1.7395, "step": 82 }, { "epoch": 0.58, "learning_rate": 9.326999541070804e-06, "loss": 1.7373, "step": 83 }, { "epoch": 0.58, "learning_rate": 9.308092947958725e-06, "loss": 1.7659, "step": 84 }, { "epoch": 0.59, "learning_rate": 9.288944165670651e-06, "loss": 1.7416, "step": 85 }, { "epoch": 0.6, "learning_rate": 9.269554270698636e-06, "loss": 1.7444, "step": 86 }, { "epoch": 0.6, "learning_rate": 9.24992435308942e-06, "loss": 1.7693, "step": 87 }, { "epoch": 0.61, "learning_rate": 9.23005551638316e-06, "loss": 1.7481, "step": 88 }, { "epoch": 0.62, "learning_rate": 9.209948877551393e-06, "loss": 1.7063, "step": 89 }, { "epoch": 0.62, "learning_rate": 9.189605566934235e-06, "loss": 1.7348, "step": 90 }, { "epoch": 0.63, "learning_rate": 9.169026728176845e-06, "loss": 1.756, "step": 91 }, { "epoch": 0.64, "learning_rate": 9.148213518165121e-06, "loss": 1.7177, "step": 92 }, { "epoch": 0.65, "learning_rate": 9.127167106960682e-06, "loss": 1.7138, "step": 93 }, { "epoch": 0.65, "learning_rate": 9.105888677735069e-06, "loss": 1.7358, "step": 94 }, { "epoch": 0.66, "learning_rate": 9.084379426703245e-06, "loss": 1.7359, "step": 95 }, { "epoch": 0.67, "learning_rate": 9.062640563056339e-06, "loss": 1.6954, "step": 96 }, { "epoch": 0.67, "learning_rate": 9.040673308893677e-06, "loss": 1.7139, "step": 97 }, { "epoch": 0.68, "learning_rate": 9.018478899154068e-06, "loss": 1.7276, "step": 98 }, { "epoch": 0.69, "learning_rate": 8.996058581546386e-06, "loss": 1.7083, "step": 99 }, { "epoch": 0.69, "learning_rate": 8.973413616479429e-06, "loss": 1.6832, "step": 100 }, { "epoch": 0.7, "learning_rate": 8.95054527699106e-06, "loss": 1.7406, "step": 101 }, { "epoch": 0.71, "learning_rate": 8.927454848676633e-06, "loss": 1.7201, "step": 102 }, { "epoch": 0.72, "learning_rate": 8.904143629616735e-06, "loss": 1.7369, "step": 103 }, { "epoch": 0.72, "learning_rate": 8.880612930304196e-06, "loss": 1.6823, "step": 104 }, { "epoch": 0.73, "learning_rate": 8.856864073570429e-06, "loss": 1.7197, "step": 105 }, { "epoch": 0.74, "learning_rate": 8.83289839451106e-06, "loss": 1.6927, "step": 106 }, { "epoch": 0.74, "learning_rate": 8.80871724041087e-06, "loss": 1.7568, "step": 107 }, { "epoch": 0.75, "learning_rate": 8.784321970668054e-06, "loss": 1.7227, "step": 108 }, { "epoch": 0.76, "learning_rate": 8.759713956717804e-06, "loss": 1.7387, "step": 109 }, { "epoch": 0.76, "learning_rate": 8.734894581955208e-06, "loss": 1.7293, "step": 110 }, { "epoch": 0.77, "learning_rate": 8.70986524165748e-06, "loss": 1.7002, "step": 111 }, { "epoch": 0.78, "learning_rate": 8.684627342905519e-06, "loss": 1.715, "step": 112 }, { "epoch": 0.78, "learning_rate": 8.65918230450481e-06, "loss": 1.7531, "step": 113 }, { "epoch": 0.79, "learning_rate": 8.63353155690566e-06, "loss": 1.6997, "step": 114 }, { "epoch": 0.8, "learning_rate": 8.607676542122782e-06, "loss": 1.7396, "step": 115 }, { "epoch": 0.81, "learning_rate": 8.581618713654239e-06, "loss": 1.7285, "step": 116 }, { "epoch": 0.81, "learning_rate": 8.55535953639971e-06, "loss": 1.741, "step": 117 }, { "epoch": 0.82, "learning_rate": 8.528900486578158e-06, "loss": 1.7045, "step": 118 }, { "epoch": 0.83, "learning_rate": 8.502243051644838e-06, "loss": 1.6933, "step": 119 }, { "epoch": 0.83, "learning_rate": 8.475388730207662e-06, "loss": 1.7161, "step": 120 }, { "epoch": 0.84, "learning_rate": 8.44833903194297e-06, "loss": 1.7186, "step": 121 }, { "epoch": 0.85, "learning_rate": 8.421095477510648e-06, "loss": 1.706, "step": 122 }, { "epoch": 0.85, "learning_rate": 8.393659598468644e-06, "loss": 1.7607, "step": 123 }, { "epoch": 0.86, "learning_rate": 8.366032937186869e-06, "loss": 1.7455, "step": 124 }, { "epoch": 0.87, "learning_rate": 8.33821704676049e-06, "loss": 1.7347, "step": 125 }, { "epoch": 0.88, "learning_rate": 8.310213490922616e-06, "loss": 1.6941, "step": 126 }, { "epoch": 0.88, "learning_rate": 8.282023843956392e-06, "loss": 1.7156, "step": 127 }, { "epoch": 0.89, "learning_rate": 8.253649690606495e-06, "loss": 1.7068, "step": 128 }, { "epoch": 0.9, "learning_rate": 8.225092625990047e-06, "loss": 1.731, "step": 129 }, { "epoch": 0.9, "learning_rate": 8.196354255506937e-06, "loss": 1.6977, "step": 130 }, { "epoch": 0.91, "learning_rate": 8.167436194749576e-06, "loss": 1.7639, "step": 131 }, { "epoch": 0.92, "learning_rate": 8.138340069412069e-06, "loss": 1.7265, "step": 132 }, { "epoch": 0.92, "learning_rate": 8.109067515198822e-06, "loss": 1.7195, "step": 133 }, { "epoch": 0.93, "learning_rate": 8.079620177732587e-06, "loss": 1.7243, "step": 134 }, { "epoch": 0.94, "learning_rate": 8.049999712461956e-06, "loss": 1.7081, "step": 135 }, { "epoch": 0.94, "learning_rate": 8.020207784568293e-06, "loss": 1.7023, "step": 136 }, { "epoch": 0.95, "learning_rate": 7.990246068872111e-06, "loss": 1.7047, "step": 137 }, { "epoch": 0.96, "learning_rate": 7.960116249738939e-06, "loss": 1.688, "step": 138 }, { "epoch": 0.97, "learning_rate": 7.92982002098461e-06, "loss": 1.7059, "step": 139 }, { "epoch": 0.97, "learning_rate": 7.899359085780062e-06, "loss": 1.6971, "step": 140 }, { "epoch": 0.98, "learning_rate": 7.868735156555567e-06, "loss": 1.6947, "step": 141 }, { "epoch": 0.99, "learning_rate": 7.83794995490448e-06, "loss": 1.7479, "step": 142 }, { "epoch": 0.99, "learning_rate": 7.807005211486445e-06, "loss": 1.7464, "step": 143 }, { "epoch": 1.0, "learning_rate": 7.775902665930114e-06, "loss": 1.7386, "step": 144 }, { "epoch": 1.01, "learning_rate": 7.744644066735335e-06, "loss": 1.6369, "step": 145 }, { "epoch": 1.01, "learning_rate": 7.713231171174868e-06, "loss": 1.6396, "step": 146 }, { "epoch": 1.02, "learning_rate": 7.681665745195593e-06, "loss": 1.6174, "step": 147 }, { "epoch": 1.03, "learning_rate": 7.649949563319228e-06, "loss": 1.5872, "step": 148 }, { "epoch": 1.03, "learning_rate": 7.618084408542576e-06, "loss": 1.5716, "step": 149 }, { "epoch": 1.04, "learning_rate": 7.586072072237291e-06, "loss": 1.5423, "step": 150 }, { "epoch": 1.05, "learning_rate": 7.5539143540491635e-06, "loss": 1.5086, "step": 151 }, { "epoch": 1.06, "learning_rate": 7.521613061796957e-06, "loss": 1.5367, "step": 152 }, { "epoch": 1.06, "learning_rate": 7.48917001137078e-06, "loss": 1.5399, "step": 153 }, { "epoch": 1.07, "learning_rate": 7.456587026629991e-06, "loss": 1.594, "step": 154 }, { "epoch": 1.08, "learning_rate": 7.423865939300674e-06, "loss": 1.5454, "step": 155 }, { "epoch": 1.08, "learning_rate": 7.391008588872661e-06, "loss": 1.5158, "step": 156 }, { "epoch": 1.09, "learning_rate": 7.358016822496126e-06, "loss": 1.4813, "step": 157 }, { "epoch": 1.1, "learning_rate": 7.324892494877734e-06, "loss": 1.4816, "step": 158 }, { "epoch": 1.1, "learning_rate": 7.29163746817638e-06, "loss": 1.4641, "step": 159 }, { "epoch": 1.11, "learning_rate": 7.258253611898509e-06, "loss": 1.374, "step": 160 }, { "epoch": 1.12, "learning_rate": 7.224742802793005e-06, "loss": 1.4205, "step": 161 }, { "epoch": 1.12, "learning_rate": 7.191106924745695e-06, "loss": 1.4606, "step": 162 }, { "epoch": 1.13, "learning_rate": 7.157347868673441e-06, "loss": 1.4101, "step": 163 }, { "epoch": 1.14, "learning_rate": 7.1234675324178295e-06, "loss": 1.4522, "step": 164 }, { "epoch": 1.15, "learning_rate": 7.089467820638491e-06, "loss": 1.4079, "step": 165 }, { "epoch": 1.15, "learning_rate": 7.055350644706023e-06, "loss": 1.4163, "step": 166 }, { "epoch": 1.16, "learning_rate": 7.021117922594532e-06, "loss": 1.3857, "step": 167 }, { "epoch": 1.17, "learning_rate": 6.986771578773812e-06, "loss": 1.4118, "step": 168 }, { "epoch": 1.17, "learning_rate": 6.952313544101165e-06, "loss": 1.4108, "step": 169 }, { "epoch": 1.18, "learning_rate": 6.917745755712839e-06, "loss": 1.4318, "step": 170 }, { "epoch": 1.19, "learning_rate": 6.8830701569151394e-06, "loss": 1.3488, "step": 171 }, { "epoch": 1.19, "learning_rate": 6.8482886970751785e-06, "loss": 1.3547, "step": 172 }, { "epoch": 1.2, "learning_rate": 6.81340333151128e-06, "loss": 1.3479, "step": 173 }, { "epoch": 1.21, "learning_rate": 6.7784160213830696e-06, "loss": 1.3658, "step": 174 }, { "epoch": 1.22, "learning_rate": 6.743328733581211e-06, "loss": 1.3601, "step": 175 }, { "epoch": 1.22, "learning_rate": 6.708143440616845e-06, "loss": 1.4016, "step": 176 }, { "epoch": 1.23, "learning_rate": 6.672862120510688e-06, "loss": 1.3709, "step": 177 }, { "epoch": 1.24, "learning_rate": 6.637486756681843e-06, "loss": 1.3934, "step": 178 }, { "epoch": 1.24, "learning_rate": 6.602019337836291e-06, "loss": 1.3734, "step": 179 }, { "epoch": 1.25, "learning_rate": 6.566461857855096e-06, "loss": 1.3656, "step": 180 }, { "epoch": 1.26, "learning_rate": 6.5308163156823064e-06, "loss": 1.3864, "step": 181 }, { "epoch": 1.26, "learning_rate": 6.495084715212597e-06, "loss": 1.4066, "step": 182 }, { "epoch": 1.27, "learning_rate": 6.459269065178592e-06, "loss": 1.3613, "step": 183 }, { "epoch": 1.28, "learning_rate": 6.423371379037957e-06, "loss": 1.3644, "step": 184 }, { "epoch": 1.28, "learning_rate": 6.387393674860205e-06, "loss": 1.4032, "step": 185 }, { "epoch": 1.29, "learning_rate": 6.351337975213239e-06, "loss": 1.3797, "step": 186 }, { "epoch": 1.3, "learning_rate": 6.315206307049656e-06, "loss": 1.4222, "step": 187 }, { "epoch": 1.31, "learning_rate": 6.2790007015927946e-06, "loss": 1.3548, "step": 188 }, { "epoch": 1.31, "learning_rate": 6.242723194222546e-06, "loss": 1.3757, "step": 189 }, { "epoch": 1.32, "learning_rate": 6.2063758243609275e-06, "loss": 1.3396, "step": 190 }, { "epoch": 1.33, "learning_rate": 6.169960635357437e-06, "loss": 1.3686, "step": 191 }, { "epoch": 1.33, "learning_rate": 6.133479674374176e-06, "loss": 1.3789, "step": 192 }, { "epoch": 1.34, "learning_rate": 6.0969349922707675e-06, "loss": 1.4203, "step": 193 }, { "epoch": 1.35, "learning_rate": 6.060328643489064e-06, "loss": 1.3963, "step": 194 }, { "epoch": 1.35, "learning_rate": 6.023662685937643e-06, "loss": 1.4282, "step": 195 }, { "epoch": 1.36, "learning_rate": 5.9869391808761315e-06, "loss": 1.3772, "step": 196 }, { "epoch": 1.37, "learning_rate": 5.9501601927993135e-06, "loss": 1.3101, "step": 197 }, { "epoch": 1.38, "learning_rate": 5.9133277893210785e-06, "loss": 1.3972, "step": 198 }, { "epoch": 1.38, "learning_rate": 5.8764440410581846e-06, "loss": 1.4262, "step": 199 }, { "epoch": 1.39, "learning_rate": 5.839511021513853e-06, "loss": 1.3973, "step": 200 }, { "epoch": 1.4, "learning_rate": 5.802530806961195e-06, "loss": 1.3769, "step": 201 }, { "epoch": 1.4, "learning_rate": 5.765505476326505e-06, "loss": 1.4184, "step": 202 }, { "epoch": 1.41, "learning_rate": 5.728437111072376e-06, "loss": 1.3802, "step": 203 }, { "epoch": 1.42, "learning_rate": 5.691327795080685e-06, "loss": 1.3814, "step": 204 }, { "epoch": 1.42, "learning_rate": 5.654179614535457e-06, "loss": 1.393, "step": 205 }, { "epoch": 1.43, "learning_rate": 5.616994657805566e-06, "loss": 1.3777, "step": 206 }, { "epoch": 1.44, "learning_rate": 5.579775015327347e-06, "loss": 1.4092, "step": 207 }, { "epoch": 1.44, "learning_rate": 5.5425227794870715e-06, "loss": 1.3382, "step": 208 }, { "epoch": 1.45, "learning_rate": 5.505240044503324e-06, "loss": 1.3584, "step": 209 }, { "epoch": 1.46, "learning_rate": 5.46792890630926e-06, "loss": 1.4156, "step": 210 }, { "epoch": 1.47, "learning_rate": 5.430591462434792e-06, "loss": 1.37, "step": 211 }, { "epoch": 1.47, "learning_rate": 5.393229811888663e-06, "loss": 1.36, "step": 212 }, { "epoch": 1.48, "learning_rate": 5.355846055040449e-06, "loss": 1.4008, "step": 213 }, { "epoch": 1.49, "learning_rate": 5.318442293502482e-06, "loss": 1.3849, "step": 214 }, { "epoch": 1.49, "learning_rate": 5.281020630011703e-06, "loss": 1.3328, "step": 215 }, { "epoch": 1.5, "learning_rate": 5.2435831683114515e-06, "loss": 1.3804, "step": 216 }, { "epoch": 1.51, "learning_rate": 5.206132013033199e-06, "loss": 1.3161, "step": 217 }, { "epoch": 1.51, "learning_rate": 5.1686692695782325e-06, "loss": 1.3887, "step": 218 }, { "epoch": 1.52, "learning_rate": 5.131197043999294e-06, "loss": 1.4188, "step": 219 }, { "epoch": 1.53, "learning_rate": 5.093717442882185e-06, "loss": 1.3621, "step": 220 }, { "epoch": 1.53, "learning_rate": 5.0562325732273405e-06, "loss": 1.3492, "step": 221 }, { "epoch": 1.54, "learning_rate": 5.018744542331376e-06, "loss": 1.3909, "step": 222 }, { "epoch": 1.55, "learning_rate": 4.981255457668625e-06, "loss": 1.3613, "step": 223 }, { "epoch": 1.56, "learning_rate": 4.94376742677266e-06, "loss": 1.4051, "step": 224 }, { "epoch": 1.56, "learning_rate": 4.906282557117817e-06, "loss": 1.385, "step": 225 }, { "epoch": 1.57, "learning_rate": 4.8688029560007064e-06, "loss": 1.3837, "step": 226 }, { "epoch": 1.58, "learning_rate": 4.831330730421769e-06, "loss": 1.4267, "step": 227 }, { "epoch": 1.58, "learning_rate": 4.793867986966802e-06, "loss": 1.3869, "step": 228 }, { "epoch": 1.59, "learning_rate": 4.756416831688549e-06, "loss": 1.3975, "step": 229 }, { "epoch": 1.6, "learning_rate": 4.718979369988299e-06, "loss": 1.4298, "step": 230 }, { "epoch": 1.6, "learning_rate": 4.681557706497519e-06, "loss": 1.4423, "step": 231 }, { "epoch": 1.61, "learning_rate": 4.644153944959553e-06, "loss": 1.3994, "step": 232 }, { "epoch": 1.62, "learning_rate": 4.606770188111339e-06, "loss": 1.3517, "step": 233 }, { "epoch": 1.62, "learning_rate": 4.5694085375652105e-06, "loss": 1.3984, "step": 234 }, { "epoch": 1.63, "learning_rate": 4.532071093690741e-06, "loss": 1.4148, "step": 235 }, { "epoch": 1.64, "learning_rate": 4.494759955496678e-06, "loss": 1.3898, "step": 236 }, { "epoch": 1.65, "learning_rate": 4.457477220512929e-06, "loss": 1.3715, "step": 237 }, { "epoch": 1.65, "learning_rate": 4.420224984672654e-06, "loss": 1.3819, "step": 238 }, { "epoch": 1.66, "learning_rate": 4.383005342194436e-06, "loss": 1.4257, "step": 239 }, { "epoch": 1.67, "learning_rate": 4.345820385464543e-06, "loss": 1.3365, "step": 240 }, { "epoch": 1.67, "learning_rate": 4.308672204919316e-06, "loss": 1.3555, "step": 241 }, { "epoch": 1.68, "learning_rate": 4.271562888927626e-06, "loss": 1.3831, "step": 242 }, { "epoch": 1.69, "learning_rate": 4.234494523673497e-06, "loss": 1.3866, "step": 243 }, { "epoch": 1.69, "learning_rate": 4.1974691930388055e-06, "loss": 1.3544, "step": 244 }, { "epoch": 1.7, "learning_rate": 4.16048897848615e-06, "loss": 1.3899, "step": 245 }, { "epoch": 1.71, "learning_rate": 4.123555958941817e-06, "loss": 1.3751, "step": 246 }, { "epoch": 1.72, "learning_rate": 4.0866722106789214e-06, "loss": 1.38, "step": 247 }, { "epoch": 1.72, "learning_rate": 4.049839807200688e-06, "loss": 1.3596, "step": 248 }, { "epoch": 1.73, "learning_rate": 4.013060819123869e-06, "loss": 1.3698, "step": 249 }, { "epoch": 1.74, "learning_rate": 3.976337314062358e-06, "loss": 1.3362, "step": 250 }, { "epoch": 1.74, "learning_rate": 3.9396713565109375e-06, "loss": 1.4218, "step": 251 }, { "epoch": 1.75, "learning_rate": 3.903065007729234e-06, "loss": 1.3871, "step": 252 }, { "epoch": 1.76, "learning_rate": 3.866520325625825e-06, "loss": 1.399, "step": 253 }, { "epoch": 1.76, "learning_rate": 3.830039364642566e-06, "loss": 1.4025, "step": 254 }, { "epoch": 1.77, "learning_rate": 3.7936241756390746e-06, "loss": 1.3634, "step": 255 }, { "epoch": 1.78, "learning_rate": 3.7572768057774543e-06, "loss": 1.3397, "step": 256 }, { "epoch": 1.78, "learning_rate": 3.7209992984072062e-06, "loss": 1.4236, "step": 257 }, { "epoch": 1.79, "learning_rate": 3.6847936929503446e-06, "loss": 1.387, "step": 258 }, { "epoch": 1.8, "learning_rate": 3.6486620247867625e-06, "loss": 1.4546, "step": 259 }, { "epoch": 1.81, "learning_rate": 3.6126063251397968e-06, "loss": 1.4263, "step": 260 }, { "epoch": 1.81, "learning_rate": 3.576628620962045e-06, "loss": 1.4271, "step": 261 }, { "epoch": 1.82, "learning_rate": 3.5407309348214094e-06, "loss": 1.3993, "step": 262 }, { "epoch": 1.83, "learning_rate": 3.5049152847874053e-06, "loss": 1.3685, "step": 263 }, { "epoch": 1.83, "learning_rate": 3.469183684317694e-06, "loss": 1.3821, "step": 264 }, { "epoch": 1.84, "learning_rate": 3.4335381421449056e-06, "loss": 1.3968, "step": 265 }, { "epoch": 1.85, "learning_rate": 3.39798066216371e-06, "loss": 1.3889, "step": 266 }, { "epoch": 1.85, "learning_rate": 3.3625132433181573e-06, "loss": 1.439, "step": 267 }, { "epoch": 1.86, "learning_rate": 3.327137879489313e-06, "loss": 1.4536, "step": 268 }, { "epoch": 1.87, "learning_rate": 3.2918565593831565e-06, "loss": 1.3946, "step": 269 }, { "epoch": 1.88, "learning_rate": 3.2566712664187907e-06, "loss": 1.3681, "step": 270 }, { "epoch": 1.88, "learning_rate": 3.221583978616932e-06, "loss": 1.395, "step": 271 }, { "epoch": 1.89, "learning_rate": 3.1865966684887222e-06, "loss": 1.3861, "step": 272 }, { "epoch": 1.9, "learning_rate": 3.1517113029248236e-06, "loss": 1.4264, "step": 273 }, { "epoch": 1.9, "learning_rate": 3.1169298430848605e-06, "loss": 1.372, "step": 274 }, { "epoch": 1.91, "learning_rate": 3.082254244287163e-06, "loss": 1.4616, "step": 275 }, { "epoch": 1.92, "learning_rate": 3.0476864558988364e-06, "loss": 1.4242, "step": 276 }, { "epoch": 1.92, "learning_rate": 3.0132284212261886e-06, "loss": 1.4027, "step": 277 }, { "epoch": 1.93, "learning_rate": 2.97888207740547e-06, "loss": 1.4085, "step": 278 }, { "epoch": 1.94, "learning_rate": 2.944649355293979e-06, "loss": 1.3835, "step": 279 }, { "epoch": 1.94, "learning_rate": 2.9105321793615106e-06, "loss": 1.3953, "step": 280 }, { "epoch": 1.95, "learning_rate": 2.876532467582174e-06, "loss": 1.4009, "step": 281 }, { "epoch": 1.96, "learning_rate": 2.842652131326562e-06, "loss": 1.3675, "step": 282 }, { "epoch": 1.97, "learning_rate": 2.8088930752543063e-06, "loss": 1.3713, "step": 283 }, { "epoch": 1.97, "learning_rate": 2.775257197206996e-06, "loss": 1.4089, "step": 284 }, { "epoch": 1.98, "learning_rate": 2.741746388101493e-06, "loss": 1.4123, "step": 285 }, { "epoch": 1.99, "learning_rate": 2.7083625318236213e-06, "loss": 1.4307, "step": 286 }, { "epoch": 1.99, "learning_rate": 2.6751075051222684e-06, "loss": 1.4278, "step": 287 }, { "epoch": 2.0, "learning_rate": 2.6419831775038763e-06, "loss": 1.472, "step": 288 }, { "epoch": 2.01, "learning_rate": 2.6089914111273398e-06, "loss": 1.3458, "step": 289 }, { "epoch": 2.01, "learning_rate": 2.576134060699328e-06, "loss": 1.3607, "step": 290 }, { "epoch": 2.02, "learning_rate": 2.54341297337001e-06, "loss": 1.3164, "step": 291 }, { "epoch": 2.03, "learning_rate": 2.510829988629222e-06, "loss": 1.2873, "step": 292 }, { "epoch": 2.03, "learning_rate": 2.478386938203043e-06, "loss": 1.2711, "step": 293 }, { "epoch": 2.04, "learning_rate": 2.4460856459508374e-06, "loss": 1.2507, "step": 294 }, { "epoch": 2.05, "learning_rate": 2.4139279277627113e-06, "loss": 1.201, "step": 295 }, { "epoch": 2.06, "learning_rate": 2.381915591457424e-06, "loss": 1.1929, "step": 296 }, { "epoch": 2.06, "learning_rate": 2.3500504366807743e-06, "loss": 1.2074, "step": 297 }, { "epoch": 2.07, "learning_rate": 2.3183342548044067e-06, "loss": 1.2939, "step": 298 }, { "epoch": 2.08, "learning_rate": 2.286768828825133e-06, "loss": 1.2518, "step": 299 }, { "epoch": 2.08, "learning_rate": 2.2553559332646675e-06, "loss": 1.2253, "step": 300 }, { "epoch": 2.09, "learning_rate": 2.2240973340698886e-06, "loss": 1.1937, "step": 301 }, { "epoch": 2.1, "learning_rate": 2.1929947885135567e-06, "loss": 1.1734, "step": 302 }, { "epoch": 2.1, "learning_rate": 2.1620500450955224e-06, "loss": 1.1867, "step": 303 }, { "epoch": 2.11, "learning_rate": 2.1312648434444342e-06, "loss": 1.08, "step": 304 }, { "epoch": 2.12, "learning_rate": 2.100640914219939e-06, "loss": 1.1372, "step": 305 }, { "epoch": 2.12, "learning_rate": 2.0701799790153897e-06, "loss": 1.178, "step": 306 }, { "epoch": 2.13, "learning_rate": 2.039883750261063e-06, "loss": 1.0955, "step": 307 }, { "epoch": 2.14, "learning_rate": 2.00975393112789e-06, "loss": 1.1424, "step": 308 }, { "epoch": 2.15, "learning_rate": 1.979792215431709e-06, "loss": 1.0761, "step": 309 }, { "epoch": 2.15, "learning_rate": 1.9500002875380458e-06, "loss": 1.1093, "step": 310 }, { "epoch": 2.16, "learning_rate": 1.920379822267414e-06, "loss": 1.0658, "step": 311 }, { "epoch": 2.17, "learning_rate": 1.8909324848011802e-06, "loss": 1.0853, "step": 312 }, { "epoch": 2.17, "learning_rate": 1.8616599305879334e-06, "loss": 1.0628, "step": 313 }, { "epoch": 2.18, "learning_rate": 1.8325638052504235e-06, "loss": 1.103, "step": 314 }, { "epoch": 2.19, "learning_rate": 1.8036457444930643e-06, "loss": 1.0065, "step": 315 }, { "epoch": 2.19, "learning_rate": 1.774907374009953e-06, "loss": 1.0206, "step": 316 }, { "epoch": 2.2, "learning_rate": 1.7463503093935063e-06, "loss": 1.0387, "step": 317 }, { "epoch": 2.21, "learning_rate": 1.7179761560436097e-06, "loss": 1.0256, "step": 318 }, { "epoch": 2.22, "learning_rate": 1.6897865090773858e-06, "loss": 1.0667, "step": 319 }, { "epoch": 2.22, "learning_rate": 1.661782953239512e-06, "loss": 1.1087, "step": 320 }, { "epoch": 2.23, "learning_rate": 1.6339670628131327e-06, "loss": 1.077, "step": 321 }, { "epoch": 2.24, "learning_rate": 1.6063404015313583e-06, "loss": 1.1042, "step": 322 }, { "epoch": 2.24, "learning_rate": 1.578904522489354e-06, "loss": 1.0646, "step": 323 }, { "epoch": 2.25, "learning_rate": 1.5516609680570316e-06, "loss": 1.0694, "step": 324 }, { "epoch": 2.26, "learning_rate": 1.5246112697923389e-06, "loss": 1.0964, "step": 325 }, { "epoch": 2.26, "learning_rate": 1.4977569483551634e-06, "loss": 1.0993, "step": 326 }, { "epoch": 2.27, "learning_rate": 1.471099513421842e-06, "loss": 1.0485, "step": 327 }, { "epoch": 2.28, "learning_rate": 1.4446404636002931e-06, "loss": 1.0584, "step": 328 }, { "epoch": 2.28, "learning_rate": 1.4183812863457624e-06, "loss": 1.1169, "step": 329 }, { "epoch": 2.29, "learning_rate": 1.3923234578772177e-06, "loss": 1.087, "step": 330 }, { "epoch": 2.3, "learning_rate": 1.3664684430943431e-06, "loss": 1.1103, "step": 331 }, { "epoch": 2.31, "learning_rate": 1.3408176954951912e-06, "loss": 1.0431, "step": 332 }, { "epoch": 2.31, "learning_rate": 1.315372657094483e-06, "loss": 1.0704, "step": 333 }, { "epoch": 2.32, "learning_rate": 1.29013475834252e-06, "loss": 1.0333, "step": 334 }, { "epoch": 2.33, "learning_rate": 1.265105418044793e-06, "loss": 1.0854, "step": 335 }, { "epoch": 2.33, "learning_rate": 1.2402860432821972e-06, "loss": 1.0929, "step": 336 }, { "epoch": 2.34, "learning_rate": 1.2156780293319476e-06, "loss": 1.1359, "step": 337 }, { "epoch": 2.35, "learning_rate": 1.1912827595891313e-06, "loss": 1.1184, "step": 338 }, { "epoch": 2.35, "learning_rate": 1.1671016054889407e-06, "loss": 1.1367, "step": 339 }, { "epoch": 2.36, "learning_rate": 1.1431359264295717e-06, "loss": 1.0994, "step": 340 }, { "epoch": 2.37, "learning_rate": 1.1193870696958058e-06, "loss": 1.0315, "step": 341 }, { "epoch": 2.38, "learning_rate": 1.0958563703832675e-06, "loss": 1.1019, "step": 342 }, { "epoch": 2.38, "learning_rate": 1.0725451513233676e-06, "loss": 1.142, "step": 343 }, { "epoch": 2.39, "learning_rate": 1.0494547230089413e-06, "loss": 1.1241, "step": 344 }, { "epoch": 2.4, "learning_rate": 1.0265863835205709e-06, "loss": 1.0894, "step": 345 }, { "epoch": 2.4, "learning_rate": 1.0039414184536161e-06, "loss": 1.16, "step": 346 }, { "epoch": 2.41, "learning_rate": 9.815211008459336e-07, "loss": 1.1217, "step": 347 }, { "epoch": 2.42, "learning_rate": 9.593266911063253e-07, "loss": 1.1184, "step": 348 }, { "epoch": 2.42, "learning_rate": 9.373594369436611e-07, "loss": 1.0941, "step": 349 }, { "epoch": 2.43, "learning_rate": 9.15620573296756e-07, "loss": 1.0911, "step": 350 }, { "epoch": 2.44, "learning_rate": 8.941113222649328e-07, "loss": 1.12, "step": 351 }, { "epoch": 2.44, "learning_rate": 8.728328930393188e-07, "loss": 1.0704, "step": 352 }, { "epoch": 2.45, "learning_rate": 8.517864818348804e-07, "loss": 1.0672, "step": 353 }, { "epoch": 2.46, "learning_rate": 8.309732718231578e-07, "loss": 1.1475, "step": 354 }, { "epoch": 2.47, "learning_rate": 8.103944330657665e-07, "loss": 1.0936, "step": 355 }, { "epoch": 2.47, "learning_rate": 7.900511224486085e-07, "loss": 1.0835, "step": 356 }, { "epoch": 2.48, "learning_rate": 7.699444836168413e-07, "loss": 1.1453, "step": 357 }, { "epoch": 2.49, "learning_rate": 7.500756469105819e-07, "loss": 1.1123, "step": 358 }, { "epoch": 2.49, "learning_rate": 7.304457293013656e-07, "loss": 1.0686, "step": 359 }, { "epoch": 2.5, "learning_rate": 7.1105583432935e-07, "loss": 1.1315, "step": 360 }, { "epoch": 2.51, "learning_rate": 6.919070520412768e-07, "loss": 1.0504, "step": 361 }, { "epoch": 2.51, "learning_rate": 6.730004589291961e-07, "loss": 1.1282, "step": 362 }, { "epoch": 2.52, "learning_rate": 6.543371178699442e-07, "loss": 1.164, "step": 363 }, { "epoch": 2.53, "learning_rate": 6.359180780653957e-07, "loss": 1.0805, "step": 364 }, { "epoch": 2.53, "learning_rate": 6.177443749834743e-07, "loss": 1.1122, "step": 365 }, { "epoch": 2.54, "learning_rate": 5.998170302999529e-07, "loss": 1.1417, "step": 366 }, { "epoch": 2.55, "learning_rate": 5.821370518410019e-07, "loss": 1.1123, "step": 367 }, { "epoch": 2.56, "learning_rate": 5.647054335265489e-07, "loss": 1.156, "step": 368 }, { "epoch": 2.56, "learning_rate": 5.475231553143906e-07, "loss": 1.1302, "step": 369 }, { "epoch": 2.57, "learning_rate": 5.305911831451044e-07, "loss": 1.1281, "step": 370 }, { "epoch": 2.58, "learning_rate": 5.139104688877549e-07, "loss": 1.1888, "step": 371 }, { "epoch": 2.58, "learning_rate": 4.974819502863687e-07, "loss": 1.1384, "step": 372 }, { "epoch": 2.59, "learning_rate": 4.813065509072279e-07, "loss": 1.1435, "step": 373 }, { "epoch": 2.6, "learning_rate": 4.6538518008694465e-07, "loss": 1.1718, "step": 374 }, { "epoch": 2.6, "learning_rate": 4.4971873288134237e-07, "loss": 1.2111, "step": 375 }, { "epoch": 2.61, "learning_rate": 4.343080900151375e-07, "loss": 1.1525, "step": 376 }, { "epoch": 2.62, "learning_rate": 4.1915411783242766e-07, "loss": 1.1026, "step": 377 }, { "epoch": 2.62, "learning_rate": 4.0425766824798817e-07, "loss": 1.1428, "step": 378 }, { "epoch": 2.63, "learning_rate": 3.8961957869937893e-07, "loss": 1.1742, "step": 379 }, { "epoch": 2.64, "learning_rate": 3.752406720998691e-07, "loss": 1.1439, "step": 380 }, { "epoch": 2.65, "learning_rate": 3.61121756792171e-07, "loss": 1.1181, "step": 381 }, { "epoch": 2.65, "learning_rate": 3.472636265030005e-07, "loss": 1.1227, "step": 382 }, { "epoch": 2.66, "learning_rate": 3.3366706029845097e-07, "loss": 1.1738, "step": 383 }, { "epoch": 2.67, "learning_rate": 3.2033282254020747e-07, "loss": 1.0632, "step": 384 }, { "epoch": 2.67, "learning_rate": 3.072616628425601e-07, "loss": 1.0974, "step": 385 }, { "epoch": 2.68, "learning_rate": 2.9445431603027876e-07, "loss": 1.1317, "step": 386 }, { "epoch": 2.69, "learning_rate": 2.8191150209729233e-07, "loss": 1.1371, "step": 387 }, { "epoch": 2.69, "learning_rate": 2.696339261662156e-07, "loss": 1.1154, "step": 388 }, { "epoch": 2.7, "learning_rate": 2.576222784487148e-07, "loss": 1.1395, "step": 389 }, { "epoch": 2.71, "learning_rate": 2.45877234206694e-07, "loss": 1.1419, "step": 390 }, { "epoch": 2.72, "learning_rate": 2.3439945371434792e-07, "loss": 1.135, "step": 391 }, { "epoch": 2.72, "learning_rate": 2.2318958222103004e-07, "loss": 1.1152, "step": 392 }, { "epoch": 2.73, "learning_rate": 2.1224824991498695e-07, "loss": 1.1253, "step": 393 }, { "epoch": 2.74, "learning_rate": 2.0157607188792894e-07, "loss": 1.1046, "step": 394 }, { "epoch": 2.74, "learning_rate": 1.911736481004489e-07, "loss": 1.1955, "step": 395 }, { "epoch": 2.75, "learning_rate": 1.8104156334829703e-07, "loss": 1.1563, "step": 396 }, { "epoch": 2.76, "learning_rate": 1.7118038722950313e-07, "loss": 1.1846, "step": 397 }, { "epoch": 2.76, "learning_rate": 1.615906741123574e-07, "loss": 1.1909, "step": 398 }, { "epoch": 2.77, "learning_rate": 1.5227296310424244e-07, "loss": 1.1598, "step": 399 }, { "epoch": 2.78, "learning_rate": 1.432277780213298e-07, "loss": 1.118, "step": 400 }, { "epoch": 2.78, "learning_rate": 1.3445562735912965e-07, "loss": 1.2047, "step": 401 }, { "epoch": 2.79, "learning_rate": 1.2595700426390633e-07, "loss": 1.1622, "step": 402 }, { "epoch": 2.8, "learning_rate": 1.1773238650495122e-07, "loss": 1.2584, "step": 403 }, { "epoch": 2.81, "learning_rate": 1.0978223644773134e-07, "loss": 1.2419, "step": 404 }, { "epoch": 2.81, "learning_rate": 1.0210700102788796e-07, "loss": 1.2292, "step": 405 }, { "epoch": 2.82, "learning_rate": 9.470711172611724e-08, "loss": 1.2044, "step": 406 }, { "epoch": 2.83, "learning_rate": 8.75829845439119e-08, "loss": 1.1776, "step": 407 }, { "epoch": 2.83, "learning_rate": 8.073501998017152e-08, "loss": 1.1786, "step": 408 }, { "epoch": 2.84, "learning_rate": 7.416360300869285e-08, "loss": 1.2012, "step": 409 }, { "epoch": 2.85, "learning_rate": 6.786910305652373e-08, "loss": 1.1945, "step": 410 }, { "epoch": 2.85, "learning_rate": 6.185187398319691e-08, "loss": 1.2414, "step": 411 }, { "epoch": 2.86, "learning_rate": 5.611225406083609e-08, "loss": 1.2703, "step": 412 }, { "epoch": 2.87, "learning_rate": 5.065056595513984e-08, "loss": 1.1913, "step": 413 }, { "epoch": 2.88, "learning_rate": 4.546711670724124e-08, "loss": 1.1705, "step": 414 }, { "epoch": 2.88, "learning_rate": 4.0562197716448316e-08, "loss": 1.2055, "step": 415 }, { "epoch": 2.89, "learning_rate": 3.593608472386045e-08, "loss": 1.1861, "step": 416 }, { "epoch": 2.9, "learning_rate": 3.1589037796869725e-08, "loss": 1.2427, "step": 417 }, { "epoch": 2.9, "learning_rate": 2.7521301314537564e-08, "loss": 1.1897, "step": 418 }, { "epoch": 2.91, "learning_rate": 2.373310395385797e-08, "loss": 1.2836, "step": 419 }, { "epoch": 2.92, "learning_rate": 2.022465867690282e-08, "loss": 1.2451, "step": 420 }, { "epoch": 2.92, "learning_rate": 1.699616271884752e-08, "loss": 1.2135, "step": 421 }, { "epoch": 2.93, "learning_rate": 1.4047797576885458e-08, "loss": 1.2279, "step": 422 }, { "epoch": 2.94, "learning_rate": 1.1379729000021711e-08, "loss": 1.2035, "step": 423 }, { "epoch": 2.94, "learning_rate": 8.99210697975883e-09, "loss": 1.2248, "step": 424 }, { "epoch": 2.95, "learning_rate": 6.885065741661367e-09, "loss": 1.2341, "step": 425 }, { "epoch": 2.96, "learning_rate": 5.058723737811355e-09, "loss": 1.1999, "step": 426 }, { "epoch": 2.97, "learning_rate": 3.5131836401502972e-09, "loss": 1.1994, "step": 427 }, { "epoch": 2.97, "learning_rate": 2.2485323347054555e-09, "loss": 1.2527, "step": 428 }, { "epoch": 2.98, "learning_rate": 1.2648409167070886e-09, "loss": 1.2552, "step": 429 }, { "epoch": 2.99, "learning_rate": 5.621646865899832e-10, "loss": 1.2782, "step": 430 }, { "epoch": 2.99, "learning_rate": 1.405431468848306e-10, "loss": 1.2717, "step": 431 }, { "epoch": 3.0, "learning_rate": 0.0, "loss": 1.3355, "step": 432 }, { "epoch": 3.0, "step": 432, "total_flos": 0.0, "train_loss": 1.441655464746334, "train_runtime": 12001.223, "train_samples_per_second": 4.04, "train_steps_per_second": 0.036 } ], "logging_steps": 1.0, "max_steps": 432, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }