{ "best_metric": 0.5836298932384342, "best_model_checkpoint": "wav2vec2-5Class-Validation-Mobil/checkpoint-773", "epoch": 276.9230769230769, "eval_steps": 500, "global_step": 900, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.92, "eval_accuracy": 0.3202846975088968, "eval_loss": 1.602386713027954, "eval_runtime": 4.3468, "eval_samples_per_second": 64.645, "eval_steps_per_second": 0.69, "step": 3 }, { "epoch": 1.85, "eval_accuracy": 0.3167259786476868, "eval_loss": 1.6022303104400635, "eval_runtime": 3.573, "eval_samples_per_second": 78.645, "eval_steps_per_second": 0.84, "step": 6 }, { "epoch": 2.77, "eval_accuracy": 0.3167259786476868, "eval_loss": 1.601974368095398, "eval_runtime": 4.6151, "eval_samples_per_second": 60.887, "eval_steps_per_second": 0.65, "step": 9 }, { "epoch": 4.0, "eval_accuracy": 0.3167259786476868, "eval_loss": 1.6014597415924072, "eval_runtime": 5.3659, "eval_samples_per_second": 52.368, "eval_steps_per_second": 0.559, "step": 13 }, { "epoch": 4.92, "eval_accuracy": 0.3167259786476868, "eval_loss": 1.6009386777877808, "eval_runtime": 3.4504, "eval_samples_per_second": 81.439, "eval_steps_per_second": 0.869, "step": 16 }, { "epoch": 5.85, "eval_accuracy": 0.31316725978647686, "eval_loss": 1.6003268957138062, "eval_runtime": 4.2937, "eval_samples_per_second": 65.445, "eval_steps_per_second": 0.699, "step": 19 }, { "epoch": 6.77, "eval_accuracy": 0.30604982206405695, "eval_loss": 1.5995941162109375, "eval_runtime": 3.7057, "eval_samples_per_second": 75.828, "eval_steps_per_second": 0.81, "step": 22 }, { "epoch": 8.0, "eval_accuracy": 0.298932384341637, "eval_loss": 1.5984183549880981, "eval_runtime": 4.6458, "eval_samples_per_second": 60.484, "eval_steps_per_second": 0.646, "step": 26 }, { "epoch": 8.92, "eval_accuracy": 0.2918149466192171, "eval_loss": 1.5974235534667969, "eval_runtime": 5.0303, "eval_samples_per_second": 55.861, "eval_steps_per_second": 0.596, "step": 29 }, { "epoch": 9.85, "eval_accuracy": 0.27402135231316727, "eval_loss": 1.596360445022583, "eval_runtime": 3.3268, "eval_samples_per_second": 84.465, "eval_steps_per_second": 0.902, "step": 32 }, { "epoch": 10.77, "eval_accuracy": 0.2597864768683274, "eval_loss": 1.5951836109161377, "eval_runtime": 3.1882, "eval_samples_per_second": 88.138, "eval_steps_per_second": 0.941, "step": 35 }, { "epoch": 12.0, "eval_accuracy": 0.26334519572953735, "eval_loss": 1.593432903289795, "eval_runtime": 4.2078, "eval_samples_per_second": 66.78, "eval_steps_per_second": 0.713, "step": 39 }, { "epoch": 12.92, "eval_accuracy": 0.27402135231316727, "eval_loss": 1.5920255184173584, "eval_runtime": 4.9074, "eval_samples_per_second": 57.261, "eval_steps_per_second": 0.611, "step": 42 }, { "epoch": 13.85, "eval_accuracy": 0.298932384341637, "eval_loss": 1.5904992818832397, "eval_runtime": 5.4737, "eval_samples_per_second": 51.336, "eval_steps_per_second": 0.548, "step": 45 }, { "epoch": 14.77, "eval_accuracy": 0.298932384341637, "eval_loss": 1.5889027118682861, "eval_runtime": 5.4844, "eval_samples_per_second": 51.236, "eval_steps_per_second": 0.547, "step": 48 }, { "epoch": 16.0, "eval_accuracy": 0.2846975088967972, "eval_loss": 1.5867795944213867, "eval_runtime": 4.8027, "eval_samples_per_second": 58.508, "eval_steps_per_second": 0.625, "step": 52 }, { "epoch": 16.92, "eval_accuracy": 0.2846975088967972, "eval_loss": 1.5850844383239746, "eval_runtime": 4.5938, "eval_samples_per_second": 61.169, "eval_steps_per_second": 0.653, "step": 55 }, { "epoch": 17.85, "eval_accuracy": 0.2846975088967972, "eval_loss": 1.5833449363708496, "eval_runtime": 3.4722, "eval_samples_per_second": 80.929, "eval_steps_per_second": 0.864, "step": 58 }, { "epoch": 18.77, "eval_accuracy": 0.26334519572953735, "eval_loss": 1.58156418800354, "eval_runtime": 3.9515, "eval_samples_per_second": 71.112, "eval_steps_per_second": 0.759, "step": 61 }, { "epoch": 20.0, "eval_accuracy": 0.24555160142348753, "eval_loss": 1.579047441482544, "eval_runtime": 4.2125, "eval_samples_per_second": 66.707, "eval_steps_per_second": 0.712, "step": 65 }, { "epoch": 20.92, "eval_accuracy": 0.24199288256227758, "eval_loss": 1.576985478401184, "eval_runtime": 4.6275, "eval_samples_per_second": 60.724, "eval_steps_per_second": 0.648, "step": 68 }, { "epoch": 21.85, "eval_accuracy": 0.23487544483985764, "eval_loss": 1.574812650680542, "eval_runtime": 4.9061, "eval_samples_per_second": 57.275, "eval_steps_per_second": 0.611, "step": 71 }, { "epoch": 22.77, "eval_accuracy": 0.2313167259786477, "eval_loss": 1.5727591514587402, "eval_runtime": 5.6003, "eval_samples_per_second": 50.176, "eval_steps_per_second": 0.536, "step": 74 }, { "epoch": 24.0, "eval_accuracy": 0.2277580071174377, "eval_loss": 1.5699430704116821, "eval_runtime": 4.5057, "eval_samples_per_second": 62.365, "eval_steps_per_second": 0.666, "step": 78 }, { "epoch": 24.92, "eval_accuracy": 0.2313167259786477, "eval_loss": 1.567823052406311, "eval_runtime": 4.5731, "eval_samples_per_second": 61.446, "eval_steps_per_second": 0.656, "step": 81 }, { "epoch": 25.85, "eval_accuracy": 0.2313167259786477, "eval_loss": 1.5657496452331543, "eval_runtime": 4.3556, "eval_samples_per_second": 64.515, "eval_steps_per_second": 0.689, "step": 84 }, { "epoch": 26.77, "eval_accuracy": 0.2313167259786477, "eval_loss": 1.5637929439544678, "eval_runtime": 5.9441, "eval_samples_per_second": 47.274, "eval_steps_per_second": 0.505, "step": 87 }, { "epoch": 28.0, "eval_accuracy": 0.2313167259786477, "eval_loss": 1.5613017082214355, "eval_runtime": 4.5762, "eval_samples_per_second": 61.404, "eval_steps_per_second": 0.656, "step": 91 }, { "epoch": 28.92, "eval_accuracy": 0.2313167259786477, "eval_loss": 1.5597190856933594, "eval_runtime": 4.1813, "eval_samples_per_second": 67.204, "eval_steps_per_second": 0.717, "step": 94 }, { "epoch": 29.85, "eval_accuracy": 0.2313167259786477, "eval_loss": 1.5587605237960815, "eval_runtime": 4.6749, "eval_samples_per_second": 60.108, "eval_steps_per_second": 0.642, "step": 97 }, { "epoch": 30.77, "grad_norm": 66708.1953125, "learning_rate": 2.962962962962963e-05, "loss": 1.561, "step": 100 }, { "epoch": 30.77, "eval_accuracy": 0.2313167259786477, "eval_loss": 1.5586402416229248, "eval_runtime": 5.2059, "eval_samples_per_second": 53.977, "eval_steps_per_second": 0.576, "step": 100 }, { "epoch": 32.0, "eval_accuracy": 0.2313167259786477, "eval_loss": 1.5596789121627808, "eval_runtime": 4.428, "eval_samples_per_second": 63.46, "eval_steps_per_second": 0.678, "step": 104 }, { "epoch": 32.92, "eval_accuracy": 0.2313167259786477, "eval_loss": 1.5619100332260132, "eval_runtime": 3.3009, "eval_samples_per_second": 85.128, "eval_steps_per_second": 0.909, "step": 107 }, { "epoch": 33.85, "eval_accuracy": 0.2313167259786477, "eval_loss": 1.5660569667816162, "eval_runtime": 3.371, "eval_samples_per_second": 83.357, "eval_steps_per_second": 0.89, "step": 110 }, { "epoch": 34.77, "eval_accuracy": 0.2313167259786477, "eval_loss": 1.5720349550247192, "eval_runtime": 3.9013, "eval_samples_per_second": 72.028, "eval_steps_per_second": 0.769, "step": 113 }, { "epoch": 36.0, "eval_accuracy": 0.2313167259786477, "eval_loss": 1.5833308696746826, "eval_runtime": 4.7161, "eval_samples_per_second": 59.583, "eval_steps_per_second": 0.636, "step": 117 }, { "epoch": 36.92, "eval_accuracy": 0.2313167259786477, "eval_loss": 1.5957212448120117, "eval_runtime": 4.1977, "eval_samples_per_second": 66.942, "eval_steps_per_second": 0.715, "step": 120 }, { "epoch": 37.85, "eval_accuracy": 0.2313167259786477, "eval_loss": 1.6119521856307983, "eval_runtime": 3.034, "eval_samples_per_second": 92.618, "eval_steps_per_second": 0.989, "step": 123 }, { "epoch": 38.77, "eval_accuracy": 0.2313167259786477, "eval_loss": 1.631814956665039, "eval_runtime": 3.0252, "eval_samples_per_second": 92.887, "eval_steps_per_second": 0.992, "step": 126 }, { "epoch": 40.0, "eval_accuracy": 0.2313167259786477, "eval_loss": 1.663757085800171, "eval_runtime": 3.243, "eval_samples_per_second": 86.648, "eval_steps_per_second": 0.925, "step": 130 }, { "epoch": 40.92, "eval_accuracy": 0.2313167259786477, "eval_loss": 1.6904593706130981, "eval_runtime": 3.1943, "eval_samples_per_second": 87.97, "eval_steps_per_second": 0.939, "step": 133 }, { "epoch": 41.85, "eval_accuracy": 0.2313167259786477, "eval_loss": 1.7196571826934814, "eval_runtime": 3.4764, "eval_samples_per_second": 80.832, "eval_steps_per_second": 0.863, "step": 136 }, { "epoch": 42.77, "eval_accuracy": 0.2313167259786477, "eval_loss": 1.750288724899292, "eval_runtime": 3.415, "eval_samples_per_second": 82.283, "eval_steps_per_second": 0.878, "step": 139 }, { "epoch": 44.0, "eval_accuracy": 0.2313167259786477, "eval_loss": 1.7802847623825073, "eval_runtime": 3.0779, "eval_samples_per_second": 91.295, "eval_steps_per_second": 0.975, "step": 143 }, { "epoch": 44.92, "eval_accuracy": 0.2313167259786477, "eval_loss": 1.7917312383651733, "eval_runtime": 3.6229, "eval_samples_per_second": 77.562, "eval_steps_per_second": 0.828, "step": 146 }, { "epoch": 45.85, "eval_accuracy": 0.2313167259786477, "eval_loss": 1.7919948101043701, "eval_runtime": 3.2733, "eval_samples_per_second": 85.845, "eval_steps_per_second": 0.916, "step": 149 }, { "epoch": 46.77, "eval_accuracy": 0.2313167259786477, "eval_loss": 1.7869282960891724, "eval_runtime": 3.1081, "eval_samples_per_second": 90.408, "eval_steps_per_second": 0.965, "step": 152 }, { "epoch": 48.0, "eval_accuracy": 0.2597864768683274, "eval_loss": 1.7699986696243286, "eval_runtime": 3.2526, "eval_samples_per_second": 86.392, "eval_steps_per_second": 0.922, "step": 156 }, { "epoch": 48.92, "eval_accuracy": 0.27402135231316727, "eval_loss": 1.7525370121002197, "eval_runtime": 2.789, "eval_samples_per_second": 100.754, "eval_steps_per_second": 1.076, "step": 159 }, { "epoch": 49.85, "eval_accuracy": 0.2775800711743772, "eval_loss": 1.7406829595565796, "eval_runtime": 3.5203, "eval_samples_per_second": 79.822, "eval_steps_per_second": 0.852, "step": 162 }, { "epoch": 50.77, "eval_accuracy": 0.2918149466192171, "eval_loss": 1.7306878566741943, "eval_runtime": 3.4092, "eval_samples_per_second": 82.424, "eval_steps_per_second": 0.88, "step": 165 }, { "epoch": 52.0, "eval_accuracy": 0.3096085409252669, "eval_loss": 1.7241473197937012, "eval_runtime": 3.4771, "eval_samples_per_second": 80.815, "eval_steps_per_second": 0.863, "step": 169 }, { "epoch": 52.92, "eval_accuracy": 0.3167259786476868, "eval_loss": 1.7242671251296997, "eval_runtime": 3.338, "eval_samples_per_second": 84.182, "eval_steps_per_second": 0.899, "step": 172 }, { "epoch": 53.85, "eval_accuracy": 0.3167259786476868, "eval_loss": 1.7253814935684204, "eval_runtime": 3.037, "eval_samples_per_second": 92.524, "eval_steps_per_second": 0.988, "step": 175 }, { "epoch": 54.77, "eval_accuracy": 0.3238434163701068, "eval_loss": 1.7232733964920044, "eval_runtime": 3.3453, "eval_samples_per_second": 84.0, "eval_steps_per_second": 0.897, "step": 178 }, { "epoch": 56.0, "eval_accuracy": 0.3238434163701068, "eval_loss": 1.7224737405776978, "eval_runtime": 4.1856, "eval_samples_per_second": 67.135, "eval_steps_per_second": 0.717, "step": 182 }, { "epoch": 56.92, "eval_accuracy": 0.3274021352313167, "eval_loss": 1.7187089920043945, "eval_runtime": 4.0825, "eval_samples_per_second": 68.831, "eval_steps_per_second": 0.735, "step": 185 }, { "epoch": 57.85, "eval_accuracy": 0.3274021352313167, "eval_loss": 1.7172435522079468, "eval_runtime": 4.3988, "eval_samples_per_second": 63.881, "eval_steps_per_second": 0.682, "step": 188 }, { "epoch": 58.77, "eval_accuracy": 0.33451957295373663, "eval_loss": 1.7145518064498901, "eval_runtime": 3.5886, "eval_samples_per_second": 78.303, "eval_steps_per_second": 0.836, "step": 191 }, { "epoch": 60.0, "eval_accuracy": 0.3487544483985765, "eval_loss": 1.711957573890686, "eval_runtime": 3.0988, "eval_samples_per_second": 90.681, "eval_steps_per_second": 0.968, "step": 195 }, { "epoch": 60.92, "eval_accuracy": 0.35587188612099646, "eval_loss": 1.7048858404159546, "eval_runtime": 3.3244, "eval_samples_per_second": 84.526, "eval_steps_per_second": 0.902, "step": 198 }, { "epoch": 61.54, "grad_norm": 26972.24609375, "learning_rate": 2.5925925925925925e-05, "loss": 1.3094, "step": 200 }, { "epoch": 61.85, "eval_accuracy": 0.3594306049822064, "eval_loss": 1.702221155166626, "eval_runtime": 2.9103, "eval_samples_per_second": 96.553, "eval_steps_per_second": 1.031, "step": 201 }, { "epoch": 62.77, "eval_accuracy": 0.3736654804270463, "eval_loss": 1.6912201642990112, "eval_runtime": 3.4935, "eval_samples_per_second": 80.435, "eval_steps_per_second": 0.859, "step": 204 }, { "epoch": 64.0, "eval_accuracy": 0.37722419928825623, "eval_loss": 1.6797984838485718, "eval_runtime": 3.0757, "eval_samples_per_second": 91.361, "eval_steps_per_second": 0.975, "step": 208 }, { "epoch": 64.92, "eval_accuracy": 0.3807829181494662, "eval_loss": 1.6687328815460205, "eval_runtime": 3.281, "eval_samples_per_second": 85.645, "eval_steps_per_second": 0.914, "step": 211 }, { "epoch": 65.85, "eval_accuracy": 0.38434163701067614, "eval_loss": 1.6568727493286133, "eval_runtime": 3.0158, "eval_samples_per_second": 93.174, "eval_steps_per_second": 0.995, "step": 214 }, { "epoch": 66.77, "eval_accuracy": 0.3914590747330961, "eval_loss": 1.642698049545288, "eval_runtime": 2.9377, "eval_samples_per_second": 95.654, "eval_steps_per_second": 1.021, "step": 217 }, { "epoch": 68.0, "eval_accuracy": 0.3914590747330961, "eval_loss": 1.6301021575927734, "eval_runtime": 2.9188, "eval_samples_per_second": 96.272, "eval_steps_per_second": 1.028, "step": 221 }, { "epoch": 68.92, "eval_accuracy": 0.39501779359430605, "eval_loss": 1.6217372417449951, "eval_runtime": 3.1297, "eval_samples_per_second": 89.784, "eval_steps_per_second": 0.959, "step": 224 }, { "epoch": 69.85, "eval_accuracy": 0.39501779359430605, "eval_loss": 1.6203086376190186, "eval_runtime": 3.3261, "eval_samples_per_second": 84.482, "eval_steps_per_second": 0.902, "step": 227 }, { "epoch": 70.77, "eval_accuracy": 0.39501779359430605, "eval_loss": 1.6257439851760864, "eval_runtime": 3.1941, "eval_samples_per_second": 87.974, "eval_steps_per_second": 0.939, "step": 230 }, { "epoch": 72.0, "eval_accuracy": 0.40213523131672596, "eval_loss": 1.6192444562911987, "eval_runtime": 2.8716, "eval_samples_per_second": 97.855, "eval_steps_per_second": 1.045, "step": 234 }, { "epoch": 72.92, "eval_accuracy": 0.4092526690391459, "eval_loss": 1.6044347286224365, "eval_runtime": 3.3231, "eval_samples_per_second": 84.559, "eval_steps_per_second": 0.903, "step": 237 }, { "epoch": 73.85, "eval_accuracy": 0.4306049822064057, "eval_loss": 1.5868154764175415, "eval_runtime": 3.0078, "eval_samples_per_second": 93.422, "eval_steps_per_second": 0.997, "step": 240 }, { "epoch": 74.77, "eval_accuracy": 0.4377224199288256, "eval_loss": 1.5786783695220947, "eval_runtime": 3.1108, "eval_samples_per_second": 90.332, "eval_steps_per_second": 0.964, "step": 243 }, { "epoch": 76.0, "eval_accuracy": 0.43416370106761565, "eval_loss": 1.5762073993682861, "eval_runtime": 4.8033, "eval_samples_per_second": 58.501, "eval_steps_per_second": 0.625, "step": 247 }, { "epoch": 76.92, "eval_accuracy": 0.4377224199288256, "eval_loss": 1.5717052221298218, "eval_runtime": 4.9388, "eval_samples_per_second": 56.896, "eval_steps_per_second": 0.607, "step": 250 }, { "epoch": 77.85, "eval_accuracy": 0.43416370106761565, "eval_loss": 1.5673516988754272, "eval_runtime": 3.5439, "eval_samples_per_second": 79.29, "eval_steps_per_second": 0.847, "step": 253 }, { "epoch": 78.77, "eval_accuracy": 0.42704626334519574, "eval_loss": 1.5683715343475342, "eval_runtime": 2.9479, "eval_samples_per_second": 95.323, "eval_steps_per_second": 1.018, "step": 256 }, { "epoch": 80.0, "eval_accuracy": 0.42704626334519574, "eval_loss": 1.5619009733200073, "eval_runtime": 3.2494, "eval_samples_per_second": 86.478, "eval_steps_per_second": 0.923, "step": 260 }, { "epoch": 80.92, "eval_accuracy": 0.4306049822064057, "eval_loss": 1.5554527044296265, "eval_runtime": 3.0649, "eval_samples_per_second": 91.683, "eval_steps_per_second": 0.979, "step": 263 }, { "epoch": 81.85, "eval_accuracy": 0.43416370106761565, "eval_loss": 1.550489068031311, "eval_runtime": 3.1587, "eval_samples_per_second": 88.96, "eval_steps_per_second": 0.95, "step": 266 }, { "epoch": 82.77, "eval_accuracy": 0.4412811387900356, "eval_loss": 1.5385645627975464, "eval_runtime": 3.1715, "eval_samples_per_second": 88.601, "eval_steps_per_second": 0.946, "step": 269 }, { "epoch": 84.0, "eval_accuracy": 0.4377224199288256, "eval_loss": 1.536201000213623, "eval_runtime": 3.2602, "eval_samples_per_second": 86.191, "eval_steps_per_second": 0.92, "step": 273 }, { "epoch": 84.92, "eval_accuracy": 0.43416370106761565, "eval_loss": 1.5410619974136353, "eval_runtime": 2.9845, "eval_samples_per_second": 94.153, "eval_steps_per_second": 1.005, "step": 276 }, { "epoch": 85.85, "eval_accuracy": 0.43416370106761565, "eval_loss": 1.5452691316604614, "eval_runtime": 3.4013, "eval_samples_per_second": 82.616, "eval_steps_per_second": 0.882, "step": 279 }, { "epoch": 86.77, "eval_accuracy": 0.42704626334519574, "eval_loss": 1.5611252784729004, "eval_runtime": 2.9135, "eval_samples_per_second": 96.447, "eval_steps_per_second": 1.03, "step": 282 }, { "epoch": 88.0, "eval_accuracy": 0.4199288256227758, "eval_loss": 1.5766078233718872, "eval_runtime": 2.8634, "eval_samples_per_second": 98.135, "eval_steps_per_second": 1.048, "step": 286 }, { "epoch": 88.92, "eval_accuracy": 0.4199288256227758, "eval_loss": 1.5781065225601196, "eval_runtime": 3.1014, "eval_samples_per_second": 90.606, "eval_steps_per_second": 0.967, "step": 289 }, { "epoch": 89.85, "eval_accuracy": 0.4234875444839858, "eval_loss": 1.5674538612365723, "eval_runtime": 3.5418, "eval_samples_per_second": 79.339, "eval_steps_per_second": 0.847, "step": 292 }, { "epoch": 90.77, "eval_accuracy": 0.42704626334519574, "eval_loss": 1.558840036392212, "eval_runtime": 4.5717, "eval_samples_per_second": 61.464, "eval_steps_per_second": 0.656, "step": 295 }, { "epoch": 92.0, "eval_accuracy": 0.42704626334519574, "eval_loss": 1.5495978593826294, "eval_runtime": 2.971, "eval_samples_per_second": 94.581, "eval_steps_per_second": 1.01, "step": 299 }, { "epoch": 92.31, "grad_norm": 27984.919921875, "learning_rate": 2.222222222222222e-05, "loss": 1.0538, "step": 300 }, { "epoch": 92.92, "eval_accuracy": 0.42704626334519574, "eval_loss": 1.5492929220199585, "eval_runtime": 3.229, "eval_samples_per_second": 87.023, "eval_steps_per_second": 0.929, "step": 302 }, { "epoch": 93.85, "eval_accuracy": 0.4234875444839858, "eval_loss": 1.5539740324020386, "eval_runtime": 2.993, "eval_samples_per_second": 93.886, "eval_steps_per_second": 1.002, "step": 305 }, { "epoch": 94.77, "eval_accuracy": 0.41637010676156583, "eval_loss": 1.5620365142822266, "eval_runtime": 3.5102, "eval_samples_per_second": 80.052, "eval_steps_per_second": 0.855, "step": 308 }, { "epoch": 96.0, "eval_accuracy": 0.41637010676156583, "eval_loss": 1.564751148223877, "eval_runtime": 3.7132, "eval_samples_per_second": 75.677, "eval_steps_per_second": 0.808, "step": 312 }, { "epoch": 96.92, "eval_accuracy": 0.41637010676156583, "eval_loss": 1.561686396598816, "eval_runtime": 4.9316, "eval_samples_per_second": 56.98, "eval_steps_per_second": 0.608, "step": 315 }, { "epoch": 97.85, "eval_accuracy": 0.4234875444839858, "eval_loss": 1.5461145639419556, "eval_runtime": 3.1512, "eval_samples_per_second": 89.173, "eval_steps_per_second": 0.952, "step": 318 }, { "epoch": 98.77, "eval_accuracy": 0.4306049822064057, "eval_loss": 1.5348182916641235, "eval_runtime": 4.3294, "eval_samples_per_second": 64.906, "eval_steps_per_second": 0.693, "step": 321 }, { "epoch": 100.0, "eval_accuracy": 0.4306049822064057, "eval_loss": 1.5345805883407593, "eval_runtime": 3.3762, "eval_samples_per_second": 83.23, "eval_steps_per_second": 0.889, "step": 325 }, { "epoch": 100.92, "eval_accuracy": 0.41637010676156583, "eval_loss": 1.5465843677520752, "eval_runtime": 3.8288, "eval_samples_per_second": 73.391, "eval_steps_per_second": 0.784, "step": 328 }, { "epoch": 101.85, "eval_accuracy": 0.4128113879003559, "eval_loss": 1.5547189712524414, "eval_runtime": 4.3332, "eval_samples_per_second": 64.848, "eval_steps_per_second": 0.692, "step": 331 }, { "epoch": 102.77, "eval_accuracy": 0.4128113879003559, "eval_loss": 1.5559605360031128, "eval_runtime": 3.2588, "eval_samples_per_second": 86.229, "eval_steps_per_second": 0.921, "step": 334 }, { "epoch": 104.0, "eval_accuracy": 0.4306049822064057, "eval_loss": 1.5315039157867432, "eval_runtime": 4.5744, "eval_samples_per_second": 61.429, "eval_steps_per_second": 0.656, "step": 338 }, { "epoch": 104.92, "eval_accuracy": 0.44483985765124556, "eval_loss": 1.5124022960662842, "eval_runtime": 3.3067, "eval_samples_per_second": 84.979, "eval_steps_per_second": 0.907, "step": 341 }, { "epoch": 105.85, "eval_accuracy": 0.44483985765124556, "eval_loss": 1.5044087171554565, "eval_runtime": 3.9949, "eval_samples_per_second": 70.341, "eval_steps_per_second": 0.751, "step": 344 }, { "epoch": 106.77, "eval_accuracy": 0.4483985765124555, "eval_loss": 1.5010027885437012, "eval_runtime": 3.5698, "eval_samples_per_second": 78.716, "eval_steps_per_second": 0.84, "step": 347 }, { "epoch": 108.0, "eval_accuracy": 0.44483985765124556, "eval_loss": 1.5004721879959106, "eval_runtime": 2.9807, "eval_samples_per_second": 94.273, "eval_steps_per_second": 1.006, "step": 351 }, { "epoch": 108.92, "eval_accuracy": 0.44483985765124556, "eval_loss": 1.499153971672058, "eval_runtime": 2.8868, "eval_samples_per_second": 97.339, "eval_steps_per_second": 1.039, "step": 354 }, { "epoch": 109.85, "eval_accuracy": 0.4483985765124555, "eval_loss": 1.4993938207626343, "eval_runtime": 3.2052, "eval_samples_per_second": 87.67, "eval_steps_per_second": 0.936, "step": 357 }, { "epoch": 110.77, "eval_accuracy": 0.45195729537366547, "eval_loss": 1.4987653493881226, "eval_runtime": 3.3473, "eval_samples_per_second": 83.949, "eval_steps_per_second": 0.896, "step": 360 }, { "epoch": 112.0, "eval_accuracy": 0.46619217081850534, "eval_loss": 1.5004514455795288, "eval_runtime": 2.8714, "eval_samples_per_second": 97.862, "eval_steps_per_second": 1.045, "step": 364 }, { "epoch": 112.92, "eval_accuracy": 0.47330960854092524, "eval_loss": 1.5010361671447754, "eval_runtime": 3.6886, "eval_samples_per_second": 76.182, "eval_steps_per_second": 0.813, "step": 367 }, { "epoch": 113.85, "eval_accuracy": 0.4697508896797153, "eval_loss": 1.4968541860580444, "eval_runtime": 3.5621, "eval_samples_per_second": 78.886, "eval_steps_per_second": 0.842, "step": 370 }, { "epoch": 114.77, "eval_accuracy": 0.47330960854092524, "eval_loss": 1.4775702953338623, "eval_runtime": 4.3842, "eval_samples_per_second": 64.093, "eval_steps_per_second": 0.684, "step": 373 }, { "epoch": 116.0, "eval_accuracy": 0.47686832740213525, "eval_loss": 1.4527899026870728, "eval_runtime": 4.7808, "eval_samples_per_second": 58.777, "eval_steps_per_second": 0.628, "step": 377 }, { "epoch": 116.92, "eval_accuracy": 0.49466192170818507, "eval_loss": 1.4394866228103638, "eval_runtime": 5.0753, "eval_samples_per_second": 55.366, "eval_steps_per_second": 0.591, "step": 380 }, { "epoch": 117.85, "eval_accuracy": 0.498220640569395, "eval_loss": 1.4310173988342285, "eval_runtime": 4.758, "eval_samples_per_second": 59.058, "eval_steps_per_second": 0.631, "step": 383 }, { "epoch": 118.77, "eval_accuracy": 0.49466192170818507, "eval_loss": 1.4314603805541992, "eval_runtime": 3.9673, "eval_samples_per_second": 70.829, "eval_steps_per_second": 0.756, "step": 386 }, { "epoch": 120.0, "eval_accuracy": 0.49466192170818507, "eval_loss": 1.4388599395751953, "eval_runtime": 4.1069, "eval_samples_per_second": 68.422, "eval_steps_per_second": 0.73, "step": 390 }, { "epoch": 120.92, "eval_accuracy": 0.498220640569395, "eval_loss": 1.4374699592590332, "eval_runtime": 5.1154, "eval_samples_per_second": 54.933, "eval_steps_per_second": 0.586, "step": 393 }, { "epoch": 121.85, "eval_accuracy": 0.498220640569395, "eval_loss": 1.4381343126296997, "eval_runtime": 4.1133, "eval_samples_per_second": 68.315, "eval_steps_per_second": 0.729, "step": 396 }, { "epoch": 122.77, "eval_accuracy": 0.498220640569395, "eval_loss": 1.4246776103973389, "eval_runtime": 3.9833, "eval_samples_per_second": 70.544, "eval_steps_per_second": 0.753, "step": 399 }, { "epoch": 123.08, "grad_norm": 31388.482421875, "learning_rate": 1.8518518518518518e-05, "loss": 0.8509, "step": 400 }, { "epoch": 124.0, "eval_accuracy": 0.498220640569395, "eval_loss": 1.4195659160614014, "eval_runtime": 4.1654, "eval_samples_per_second": 67.461, "eval_steps_per_second": 0.72, "step": 403 }, { "epoch": 124.92, "eval_accuracy": 0.505338078291815, "eval_loss": 1.4178649187088013, "eval_runtime": 5.0869, "eval_samples_per_second": 55.239, "eval_steps_per_second": 0.59, "step": 406 }, { "epoch": 125.85, "eval_accuracy": 0.505338078291815, "eval_loss": 1.40910804271698, "eval_runtime": 4.5242, "eval_samples_per_second": 62.11, "eval_steps_per_second": 0.663, "step": 409 }, { "epoch": 126.77, "eval_accuracy": 0.505338078291815, "eval_loss": 1.3957635164260864, "eval_runtime": 4.5377, "eval_samples_per_second": 61.926, "eval_steps_per_second": 0.661, "step": 412 }, { "epoch": 128.0, "eval_accuracy": 0.5088967971530249, "eval_loss": 1.3736003637313843, "eval_runtime": 3.6994, "eval_samples_per_second": 75.958, "eval_steps_per_second": 0.811, "step": 416 }, { "epoch": 128.92, "eval_accuracy": 0.5088967971530249, "eval_loss": 1.3661431074142456, "eval_runtime": 4.0248, "eval_samples_per_second": 69.817, "eval_steps_per_second": 0.745, "step": 419 }, { "epoch": 129.85, "eval_accuracy": 0.5124555160142349, "eval_loss": 1.369443416595459, "eval_runtime": 4.9876, "eval_samples_per_second": 56.34, "eval_steps_per_second": 0.601, "step": 422 }, { "epoch": 130.77, "eval_accuracy": 0.5124555160142349, "eval_loss": 1.3807623386383057, "eval_runtime": 3.5494, "eval_samples_per_second": 79.169, "eval_steps_per_second": 0.845, "step": 425 }, { "epoch": 132.0, "eval_accuracy": 0.5124555160142349, "eval_loss": 1.3818711042404175, "eval_runtime": 3.9503, "eval_samples_per_second": 71.134, "eval_steps_per_second": 0.759, "step": 429 }, { "epoch": 132.92, "eval_accuracy": 0.5124555160142349, "eval_loss": 1.3859163522720337, "eval_runtime": 4.2041, "eval_samples_per_second": 66.84, "eval_steps_per_second": 0.714, "step": 432 }, { "epoch": 133.85, "eval_accuracy": 0.5231316725978647, "eval_loss": 1.378004789352417, "eval_runtime": 3.8384, "eval_samples_per_second": 73.208, "eval_steps_per_second": 0.782, "step": 435 }, { "epoch": 134.77, "eval_accuracy": 0.5231316725978647, "eval_loss": 1.3696413040161133, "eval_runtime": 4.6334, "eval_samples_per_second": 60.646, "eval_steps_per_second": 0.647, "step": 438 }, { "epoch": 136.0, "eval_accuracy": 0.5302491103202847, "eval_loss": 1.3564013242721558, "eval_runtime": 4.002, "eval_samples_per_second": 70.215, "eval_steps_per_second": 0.75, "step": 442 }, { "epoch": 136.92, "eval_accuracy": 0.5338078291814946, "eval_loss": 1.3421210050582886, "eval_runtime": 4.0161, "eval_samples_per_second": 69.968, "eval_steps_per_second": 0.747, "step": 445 }, { "epoch": 137.85, "eval_accuracy": 0.5373665480427047, "eval_loss": 1.325627326965332, "eval_runtime": 4.156, "eval_samples_per_second": 67.613, "eval_steps_per_second": 0.722, "step": 448 }, { "epoch": 138.77, "eval_accuracy": 0.5373665480427047, "eval_loss": 1.3274290561676025, "eval_runtime": 3.9911, "eval_samples_per_second": 70.407, "eval_steps_per_second": 0.752, "step": 451 }, { "epoch": 140.0, "eval_accuracy": 0.5409252669039146, "eval_loss": 1.3401566743850708, "eval_runtime": 4.4088, "eval_samples_per_second": 63.736, "eval_steps_per_second": 0.68, "step": 455 }, { "epoch": 140.92, "eval_accuracy": 0.5409252669039146, "eval_loss": 1.351689338684082, "eval_runtime": 4.4409, "eval_samples_per_second": 63.276, "eval_steps_per_second": 0.676, "step": 458 }, { "epoch": 141.85, "eval_accuracy": 0.5409252669039146, "eval_loss": 1.3585495948791504, "eval_runtime": 3.7955, "eval_samples_per_second": 74.035, "eval_steps_per_second": 0.79, "step": 461 }, { "epoch": 142.77, "eval_accuracy": 0.5373665480427047, "eval_loss": 1.3592112064361572, "eval_runtime": 3.3552, "eval_samples_per_second": 83.75, "eval_steps_per_second": 0.894, "step": 464 }, { "epoch": 144.0, "eval_accuracy": 0.5480427046263345, "eval_loss": 1.3329293727874756, "eval_runtime": 5.3044, "eval_samples_per_second": 52.975, "eval_steps_per_second": 0.566, "step": 468 }, { "epoch": 144.92, "eval_accuracy": 0.5480427046263345, "eval_loss": 1.312560796737671, "eval_runtime": 4.319, "eval_samples_per_second": 65.061, "eval_steps_per_second": 0.695, "step": 471 }, { "epoch": 145.85, "eval_accuracy": 0.5444839857651246, "eval_loss": 1.3075566291809082, "eval_runtime": 3.9528, "eval_samples_per_second": 71.09, "eval_steps_per_second": 0.759, "step": 474 }, { "epoch": 146.77, "eval_accuracy": 0.5480427046263345, "eval_loss": 1.3146412372589111, "eval_runtime": 4.3249, "eval_samples_per_second": 64.973, "eval_steps_per_second": 0.694, "step": 477 }, { "epoch": 148.0, "eval_accuracy": 0.5444839857651246, "eval_loss": 1.3345069885253906, "eval_runtime": 3.9127, "eval_samples_per_second": 71.817, "eval_steps_per_second": 0.767, "step": 481 }, { "epoch": 148.92, "eval_accuracy": 0.5444839857651246, "eval_loss": 1.3408929109573364, "eval_runtime": 4.1463, "eval_samples_per_second": 67.771, "eval_steps_per_second": 0.724, "step": 484 }, { "epoch": 149.85, "eval_accuracy": 0.5444839857651246, "eval_loss": 1.3374032974243164, "eval_runtime": 4.2345, "eval_samples_per_second": 66.359, "eval_steps_per_second": 0.708, "step": 487 }, { "epoch": 150.77, "eval_accuracy": 0.5480427046263345, "eval_loss": 1.3227189779281616, "eval_runtime": 4.3006, "eval_samples_per_second": 65.339, "eval_steps_per_second": 0.698, "step": 490 }, { "epoch": 152.0, "eval_accuracy": 0.5444839857651246, "eval_loss": 1.3200651407241821, "eval_runtime": 4.4216, "eval_samples_per_second": 63.551, "eval_steps_per_second": 0.678, "step": 494 }, { "epoch": 152.92, "eval_accuracy": 0.5444839857651246, "eval_loss": 1.3174102306365967, "eval_runtime": 4.4898, "eval_samples_per_second": 62.586, "eval_steps_per_second": 0.668, "step": 497 }, { "epoch": 153.85, "grad_norm": 24984.7734375, "learning_rate": 1.4814814814814815e-05, "loss": 0.7118, "step": 500 }, { "epoch": 153.85, "eval_accuracy": 0.5444839857651246, "eval_loss": 1.3073471784591675, "eval_runtime": 4.2385, "eval_samples_per_second": 66.297, "eval_steps_per_second": 0.708, "step": 500 }, { "epoch": 154.77, "eval_accuracy": 0.5551601423487544, "eval_loss": 1.2983657121658325, "eval_runtime": 3.4569, "eval_samples_per_second": 81.286, "eval_steps_per_second": 0.868, "step": 503 }, { "epoch": 156.0, "eval_accuracy": 0.5516014234875445, "eval_loss": 1.2974605560302734, "eval_runtime": 4.3467, "eval_samples_per_second": 64.647, "eval_steps_per_second": 0.69, "step": 507 }, { "epoch": 156.92, "eval_accuracy": 0.5516014234875445, "eval_loss": 1.3027478456497192, "eval_runtime": 4.5106, "eval_samples_per_second": 62.297, "eval_steps_per_second": 0.665, "step": 510 }, { "epoch": 157.85, "eval_accuracy": 0.5480427046263345, "eval_loss": 1.3088507652282715, "eval_runtime": 4.2508, "eval_samples_per_second": 66.105, "eval_steps_per_second": 0.706, "step": 513 }, { "epoch": 158.77, "eval_accuracy": 0.5480427046263345, "eval_loss": 1.3138750791549683, "eval_runtime": 4.4205, "eval_samples_per_second": 63.567, "eval_steps_per_second": 0.679, "step": 516 }, { "epoch": 160.0, "eval_accuracy": 0.5551601423487544, "eval_loss": 1.3067928552627563, "eval_runtime": 4.2488, "eval_samples_per_second": 66.136, "eval_steps_per_second": 0.706, "step": 520 }, { "epoch": 160.92, "eval_accuracy": 0.5551601423487544, "eval_loss": 1.3011025190353394, "eval_runtime": 4.3025, "eval_samples_per_second": 65.31, "eval_steps_per_second": 0.697, "step": 523 }, { "epoch": 161.85, "eval_accuracy": 0.5551601423487544, "eval_loss": 1.2957364320755005, "eval_runtime": 4.3812, "eval_samples_per_second": 64.137, "eval_steps_per_second": 0.685, "step": 526 }, { "epoch": 162.77, "eval_accuracy": 0.5551601423487544, "eval_loss": 1.296021819114685, "eval_runtime": 4.6921, "eval_samples_per_second": 59.887, "eval_steps_per_second": 0.639, "step": 529 }, { "epoch": 164.0, "eval_accuracy": 0.5516014234875445, "eval_loss": 1.3158953189849854, "eval_runtime": 4.7452, "eval_samples_per_second": 59.218, "eval_steps_per_second": 0.632, "step": 533 }, { "epoch": 164.92, "eval_accuracy": 0.5516014234875445, "eval_loss": 1.3257168531417847, "eval_runtime": 4.4128, "eval_samples_per_second": 63.678, "eval_steps_per_second": 0.68, "step": 536 }, { "epoch": 165.85, "eval_accuracy": 0.5516014234875445, "eval_loss": 1.3312301635742188, "eval_runtime": 3.6447, "eval_samples_per_second": 77.099, "eval_steps_per_second": 0.823, "step": 539 }, { "epoch": 166.77, "eval_accuracy": 0.5516014234875445, "eval_loss": 1.322218418121338, "eval_runtime": 4.2773, "eval_samples_per_second": 65.695, "eval_steps_per_second": 0.701, "step": 542 }, { "epoch": 168.0, "eval_accuracy": 0.5551601423487544, "eval_loss": 1.298622488975525, "eval_runtime": 4.5789, "eval_samples_per_second": 61.369, "eval_steps_per_second": 0.655, "step": 546 }, { "epoch": 168.92, "eval_accuracy": 0.5587188612099644, "eval_loss": 1.289797306060791, "eval_runtime": 4.5328, "eval_samples_per_second": 61.993, "eval_steps_per_second": 0.662, "step": 549 }, { "epoch": 169.85, "eval_accuracy": 0.5551601423487544, "eval_loss": 1.2937852144241333, "eval_runtime": 3.7509, "eval_samples_per_second": 74.915, "eval_steps_per_second": 0.8, "step": 552 }, { "epoch": 170.77, "eval_accuracy": 0.5551601423487544, "eval_loss": 1.290231704711914, "eval_runtime": 4.1153, "eval_samples_per_second": 68.282, "eval_steps_per_second": 0.729, "step": 555 }, { "epoch": 172.0, "eval_accuracy": 0.5658362989323843, "eval_loss": 1.287913203239441, "eval_runtime": 4.7912, "eval_samples_per_second": 58.649, "eval_steps_per_second": 0.626, "step": 559 }, { "epoch": 172.92, "eval_accuracy": 0.5658362989323843, "eval_loss": 1.283803939819336, "eval_runtime": 4.5456, "eval_samples_per_second": 61.818, "eval_steps_per_second": 0.66, "step": 562 }, { "epoch": 173.85, "eval_accuracy": 0.5658362989323843, "eval_loss": 1.2811965942382812, "eval_runtime": 4.4869, "eval_samples_per_second": 62.627, "eval_steps_per_second": 0.669, "step": 565 }, { "epoch": 174.77, "eval_accuracy": 0.5658362989323843, "eval_loss": 1.2863661050796509, "eval_runtime": 4.2715, "eval_samples_per_second": 65.785, "eval_steps_per_second": 0.702, "step": 568 }, { "epoch": 176.0, "eval_accuracy": 0.5551601423487544, "eval_loss": 1.2934131622314453, "eval_runtime": 4.643, "eval_samples_per_second": 60.522, "eval_steps_per_second": 0.646, "step": 572 }, { "epoch": 176.92, "eval_accuracy": 0.5587188612099644, "eval_loss": 1.2940202951431274, "eval_runtime": 4.2681, "eval_samples_per_second": 65.837, "eval_steps_per_second": 0.703, "step": 575 }, { "epoch": 177.85, "eval_accuracy": 0.5587188612099644, "eval_loss": 1.298832654953003, "eval_runtime": 4.2991, "eval_samples_per_second": 65.363, "eval_steps_per_second": 0.698, "step": 578 }, { "epoch": 178.77, "eval_accuracy": 0.5622775800711743, "eval_loss": 1.295286774635315, "eval_runtime": 4.1989, "eval_samples_per_second": 66.922, "eval_steps_per_second": 0.714, "step": 581 }, { "epoch": 180.0, "eval_accuracy": 0.5587188612099644, "eval_loss": 1.2971975803375244, "eval_runtime": 4.7188, "eval_samples_per_second": 59.549, "eval_steps_per_second": 0.636, "step": 585 }, { "epoch": 180.92, "eval_accuracy": 0.5658362989323843, "eval_loss": 1.2936004400253296, "eval_runtime": 4.813, "eval_samples_per_second": 58.383, "eval_steps_per_second": 0.623, "step": 588 }, { "epoch": 181.85, "eval_accuracy": 0.5658362989323843, "eval_loss": 1.2928047180175781, "eval_runtime": 4.1735, "eval_samples_per_second": 67.33, "eval_steps_per_second": 0.719, "step": 591 }, { "epoch": 182.77, "eval_accuracy": 0.5658362989323843, "eval_loss": 1.291295051574707, "eval_runtime": 4.4694, "eval_samples_per_second": 62.872, "eval_steps_per_second": 0.671, "step": 594 }, { "epoch": 184.0, "eval_accuracy": 0.5658362989323843, "eval_loss": 1.2824889421463013, "eval_runtime": 4.0765, "eval_samples_per_second": 68.932, "eval_steps_per_second": 0.736, "step": 598 }, { "epoch": 184.62, "grad_norm": 29892.71484375, "learning_rate": 1.111111111111111e-05, "loss": 0.6473, "step": 600 }, { "epoch": 184.92, "eval_accuracy": 0.5693950177935944, "eval_loss": 1.2735832929611206, "eval_runtime": 4.6704, "eval_samples_per_second": 60.166, "eval_steps_per_second": 0.642, "step": 601 }, { "epoch": 185.85, "eval_accuracy": 0.5693950177935944, "eval_loss": 1.2714898586273193, "eval_runtime": 4.6432, "eval_samples_per_second": 60.519, "eval_steps_per_second": 0.646, "step": 604 }, { "epoch": 186.77, "eval_accuracy": 0.5693950177935944, "eval_loss": 1.2703534364700317, "eval_runtime": 4.1853, "eval_samples_per_second": 67.139, "eval_steps_per_second": 0.717, "step": 607 }, { "epoch": 188.0, "eval_accuracy": 0.5693950177935944, "eval_loss": 1.2716755867004395, "eval_runtime": 4.1775, "eval_samples_per_second": 67.265, "eval_steps_per_second": 0.718, "step": 611 }, { "epoch": 188.92, "eval_accuracy": 0.5658362989323843, "eval_loss": 1.2724348306655884, "eval_runtime": 4.9312, "eval_samples_per_second": 56.984, "eval_steps_per_second": 0.608, "step": 614 }, { "epoch": 189.85, "eval_accuracy": 0.5658362989323843, "eval_loss": 1.2763242721557617, "eval_runtime": 3.5712, "eval_samples_per_second": 78.685, "eval_steps_per_second": 0.84, "step": 617 }, { "epoch": 190.77, "eval_accuracy": 0.5658362989323843, "eval_loss": 1.2811599969863892, "eval_runtime": 4.2324, "eval_samples_per_second": 66.393, "eval_steps_per_second": 0.709, "step": 620 }, { "epoch": 192.0, "eval_accuracy": 0.5658362989323843, "eval_loss": 1.2791301012039185, "eval_runtime": 4.4625, "eval_samples_per_second": 62.97, "eval_steps_per_second": 0.672, "step": 624 }, { "epoch": 192.92, "eval_accuracy": 0.5693950177935944, "eval_loss": 1.2697654962539673, "eval_runtime": 4.2766, "eval_samples_per_second": 65.707, "eval_steps_per_second": 0.701, "step": 627 }, { "epoch": 193.85, "eval_accuracy": 0.5693950177935944, "eval_loss": 1.269476294517517, "eval_runtime": 4.2862, "eval_samples_per_second": 65.56, "eval_steps_per_second": 0.7, "step": 630 }, { "epoch": 194.77, "eval_accuracy": 0.5693950177935944, "eval_loss": 1.2703962326049805, "eval_runtime": 4.2135, "eval_samples_per_second": 66.69, "eval_steps_per_second": 0.712, "step": 633 }, { "epoch": 196.0, "eval_accuracy": 0.5658362989323843, "eval_loss": 1.2736749649047852, "eval_runtime": 4.1666, "eval_samples_per_second": 67.441, "eval_steps_per_second": 0.72, "step": 637 }, { "epoch": 196.92, "eval_accuracy": 0.5658362989323843, "eval_loss": 1.2782082557678223, "eval_runtime": 4.3682, "eval_samples_per_second": 64.329, "eval_steps_per_second": 0.687, "step": 640 }, { "epoch": 197.85, "eval_accuracy": 0.5622775800711743, "eval_loss": 1.2813825607299805, "eval_runtime": 5.6488, "eval_samples_per_second": 49.745, "eval_steps_per_second": 0.531, "step": 643 }, { "epoch": 198.77, "eval_accuracy": 0.5622775800711743, "eval_loss": 1.2819089889526367, "eval_runtime": 5.1916, "eval_samples_per_second": 54.126, "eval_steps_per_second": 0.578, "step": 646 }, { "epoch": 200.0, "eval_accuracy": 0.5658362989323843, "eval_loss": 1.274595022201538, "eval_runtime": 4.4378, "eval_samples_per_second": 63.32, "eval_steps_per_second": 0.676, "step": 650 }, { "epoch": 200.92, "eval_accuracy": 0.5658362989323843, "eval_loss": 1.2694467306137085, "eval_runtime": 4.797, "eval_samples_per_second": 58.579, "eval_steps_per_second": 0.625, "step": 653 }, { "epoch": 201.85, "eval_accuracy": 0.5765124555160143, "eval_loss": 1.262547254562378, "eval_runtime": 4.6991, "eval_samples_per_second": 59.798, "eval_steps_per_second": 0.638, "step": 656 }, { "epoch": 202.77, "eval_accuracy": 0.5800711743772242, "eval_loss": 1.2575123310089111, "eval_runtime": 4.9663, "eval_samples_per_second": 56.582, "eval_steps_per_second": 0.604, "step": 659 }, { "epoch": 204.0, "eval_accuracy": 0.5800711743772242, "eval_loss": 1.2548755407333374, "eval_runtime": 5.2012, "eval_samples_per_second": 54.026, "eval_steps_per_second": 0.577, "step": 663 }, { "epoch": 204.92, "eval_accuracy": 0.5729537366548043, "eval_loss": 1.2623133659362793, "eval_runtime": 4.5347, "eval_samples_per_second": 61.967, "eval_steps_per_second": 0.662, "step": 666 }, { "epoch": 205.85, "eval_accuracy": 0.5658362989323843, "eval_loss": 1.2665455341339111, "eval_runtime": 3.1603, "eval_samples_per_second": 88.917, "eval_steps_per_second": 0.949, "step": 669 }, { "epoch": 206.77, "eval_accuracy": 0.5658362989323843, "eval_loss": 1.2684026956558228, "eval_runtime": 4.2009, "eval_samples_per_second": 66.89, "eval_steps_per_second": 0.714, "step": 672 }, { "epoch": 208.0, "eval_accuracy": 0.5622775800711743, "eval_loss": 1.277047038078308, "eval_runtime": 4.3489, "eval_samples_per_second": 64.613, "eval_steps_per_second": 0.69, "step": 676 }, { "epoch": 208.92, "eval_accuracy": 0.5622775800711743, "eval_loss": 1.2807551622390747, "eval_runtime": 3.8563, "eval_samples_per_second": 72.867, "eval_steps_per_second": 0.778, "step": 679 }, { "epoch": 209.85, "eval_accuracy": 0.5729537366548043, "eval_loss": 1.2761532068252563, "eval_runtime": 4.7161, "eval_samples_per_second": 59.583, "eval_steps_per_second": 0.636, "step": 682 }, { "epoch": 210.77, "eval_accuracy": 0.5729537366548043, "eval_loss": 1.2759194374084473, "eval_runtime": 5.0376, "eval_samples_per_second": 55.781, "eval_steps_per_second": 0.596, "step": 685 }, { "epoch": 212.0, "eval_accuracy": 0.5729537366548043, "eval_loss": 1.2752187252044678, "eval_runtime": 4.5842, "eval_samples_per_second": 61.297, "eval_steps_per_second": 0.654, "step": 689 }, { "epoch": 212.92, "eval_accuracy": 0.5729537366548043, "eval_loss": 1.275394082069397, "eval_runtime": 4.2209, "eval_samples_per_second": 66.573, "eval_steps_per_second": 0.711, "step": 692 }, { "epoch": 213.85, "eval_accuracy": 0.5765124555160143, "eval_loss": 1.272161602973938, "eval_runtime": 4.7348, "eval_samples_per_second": 59.347, "eval_steps_per_second": 0.634, "step": 695 }, { "epoch": 214.77, "eval_accuracy": 0.5765124555160143, "eval_loss": 1.273858904838562, "eval_runtime": 4.0254, "eval_samples_per_second": 69.808, "eval_steps_per_second": 0.745, "step": 698 }, { "epoch": 215.38, "grad_norm": 28098.056640625, "learning_rate": 7.4074074074074075e-06, "loss": 0.613, "step": 700 }, { "epoch": 216.0, "eval_accuracy": 0.5765124555160143, "eval_loss": 1.2782981395721436, "eval_runtime": 4.6723, "eval_samples_per_second": 60.142, "eval_steps_per_second": 0.642, "step": 702 }, { "epoch": 216.92, "eval_accuracy": 0.5765124555160143, "eval_loss": 1.2774933576583862, "eval_runtime": 4.576, "eval_samples_per_second": 61.407, "eval_steps_per_second": 0.656, "step": 705 }, { "epoch": 217.85, "eval_accuracy": 0.5765124555160143, "eval_loss": 1.2740654945373535, "eval_runtime": 4.8253, "eval_samples_per_second": 58.234, "eval_steps_per_second": 0.622, "step": 708 }, { "epoch": 218.77, "eval_accuracy": 0.5765124555160143, "eval_loss": 1.2705509662628174, "eval_runtime": 4.386, "eval_samples_per_second": 64.067, "eval_steps_per_second": 0.684, "step": 711 }, { "epoch": 220.0, "eval_accuracy": 0.5765124555160143, "eval_loss": 1.2627956867218018, "eval_runtime": 4.2817, "eval_samples_per_second": 65.628, "eval_steps_per_second": 0.701, "step": 715 }, { "epoch": 220.92, "eval_accuracy": 0.5800711743772242, "eval_loss": 1.2580970525741577, "eval_runtime": 3.9386, "eval_samples_per_second": 71.344, "eval_steps_per_second": 0.762, "step": 718 }, { "epoch": 221.85, "eval_accuracy": 0.5765124555160143, "eval_loss": 1.2567566633224487, "eval_runtime": 4.3353, "eval_samples_per_second": 64.817, "eval_steps_per_second": 0.692, "step": 721 }, { "epoch": 222.77, "eval_accuracy": 0.5729537366548043, "eval_loss": 1.2558982372283936, "eval_runtime": 3.7135, "eval_samples_per_second": 75.67, "eval_steps_per_second": 0.808, "step": 724 }, { "epoch": 224.0, "eval_accuracy": 0.5765124555160143, "eval_loss": 1.2502700090408325, "eval_runtime": 5.0636, "eval_samples_per_second": 55.494, "eval_steps_per_second": 0.592, "step": 728 }, { "epoch": 224.92, "eval_accuracy": 0.5765124555160143, "eval_loss": 1.2497973442077637, "eval_runtime": 4.7669, "eval_samples_per_second": 58.948, "eval_steps_per_second": 0.629, "step": 731 }, { "epoch": 225.85, "eval_accuracy": 0.5765124555160143, "eval_loss": 1.2500195503234863, "eval_runtime": 3.9522, "eval_samples_per_second": 71.099, "eval_steps_per_second": 0.759, "step": 734 }, { "epoch": 226.77, "eval_accuracy": 0.5765124555160143, "eval_loss": 1.2490234375, "eval_runtime": 4.1869, "eval_samples_per_second": 67.114, "eval_steps_per_second": 0.717, "step": 737 }, { "epoch": 228.0, "eval_accuracy": 0.5765124555160143, "eval_loss": 1.2531741857528687, "eval_runtime": 3.9865, "eval_samples_per_second": 70.489, "eval_steps_per_second": 0.753, "step": 741 }, { "epoch": 228.92, "eval_accuracy": 0.5765124555160143, "eval_loss": 1.2572293281555176, "eval_runtime": 5.2241, "eval_samples_per_second": 53.789, "eval_steps_per_second": 0.574, "step": 744 }, { "epoch": 229.85, "eval_accuracy": 0.5765124555160143, "eval_loss": 1.2598803043365479, "eval_runtime": 4.1402, "eval_samples_per_second": 67.87, "eval_steps_per_second": 0.725, "step": 747 }, { "epoch": 230.77, "eval_accuracy": 0.5729537366548043, "eval_loss": 1.2600898742675781, "eval_runtime": 3.9785, "eval_samples_per_second": 70.63, "eval_steps_per_second": 0.754, "step": 750 }, { "epoch": 232.0, "eval_accuracy": 0.5729537366548043, "eval_loss": 1.2625129222869873, "eval_runtime": 4.1458, "eval_samples_per_second": 67.779, "eval_steps_per_second": 0.724, "step": 754 }, { "epoch": 232.92, "eval_accuracy": 0.5765124555160143, "eval_loss": 1.2635974884033203, "eval_runtime": 4.5032, "eval_samples_per_second": 62.401, "eval_steps_per_second": 0.666, "step": 757 }, { "epoch": 233.85, "eval_accuracy": 0.5765124555160143, "eval_loss": 1.2629433870315552, "eval_runtime": 4.1399, "eval_samples_per_second": 67.876, "eval_steps_per_second": 0.725, "step": 760 }, { "epoch": 234.77, "eval_accuracy": 0.5765124555160143, "eval_loss": 1.2600425481796265, "eval_runtime": 4.2407, "eval_samples_per_second": 66.263, "eval_steps_per_second": 0.707, "step": 763 }, { "epoch": 236.0, "eval_accuracy": 0.5800711743772242, "eval_loss": 1.2558783292770386, "eval_runtime": 4.1208, "eval_samples_per_second": 68.19, "eval_steps_per_second": 0.728, "step": 767 }, { "epoch": 236.92, "eval_accuracy": 0.5800711743772242, "eval_loss": 1.2534478902816772, "eval_runtime": 3.8139, "eval_samples_per_second": 73.678, "eval_steps_per_second": 0.787, "step": 770 }, { "epoch": 237.85, "eval_accuracy": 0.5836298932384342, "eval_loss": 1.2513927221298218, "eval_runtime": 4.6813, "eval_samples_per_second": 60.026, "eval_steps_per_second": 0.641, "step": 773 }, { "epoch": 238.77, "eval_accuracy": 0.5836298932384342, "eval_loss": 1.2508091926574707, "eval_runtime": 4.2671, "eval_samples_per_second": 65.852, "eval_steps_per_second": 0.703, "step": 776 }, { "epoch": 240.0, "eval_accuracy": 0.5836298932384342, "eval_loss": 1.2487518787384033, "eval_runtime": 3.7642, "eval_samples_per_second": 74.651, "eval_steps_per_second": 0.797, "step": 780 }, { "epoch": 240.92, "eval_accuracy": 0.5836298932384342, "eval_loss": 1.2483351230621338, "eval_runtime": 4.8941, "eval_samples_per_second": 57.416, "eval_steps_per_second": 0.613, "step": 783 }, { "epoch": 241.85, "eval_accuracy": 0.5836298932384342, "eval_loss": 1.2500139474868774, "eval_runtime": 4.274, "eval_samples_per_second": 65.746, "eval_steps_per_second": 0.702, "step": 786 }, { "epoch": 242.77, "eval_accuracy": 0.5800711743772242, "eval_loss": 1.2503968477249146, "eval_runtime": 4.4982, "eval_samples_per_second": 62.469, "eval_steps_per_second": 0.667, "step": 789 }, { "epoch": 244.0, "eval_accuracy": 0.5800711743772242, "eval_loss": 1.2521419525146484, "eval_runtime": 4.0413, "eval_samples_per_second": 69.532, "eval_steps_per_second": 0.742, "step": 793 }, { "epoch": 244.92, "eval_accuracy": 0.5800711743772242, "eval_loss": 1.2532862424850464, "eval_runtime": 4.1262, "eval_samples_per_second": 68.101, "eval_steps_per_second": 0.727, "step": 796 }, { "epoch": 245.85, "eval_accuracy": 0.5800711743772242, "eval_loss": 1.251287817955017, "eval_runtime": 3.9321, "eval_samples_per_second": 71.463, "eval_steps_per_second": 0.763, "step": 799 }, { "epoch": 246.15, "grad_norm": 63046.29296875, "learning_rate": 3.7037037037037037e-06, "loss": 0.5946, "step": 800 }, { "epoch": 246.77, "eval_accuracy": 0.5800711743772242, "eval_loss": 1.2513457536697388, "eval_runtime": 4.1155, "eval_samples_per_second": 68.279, "eval_steps_per_second": 0.729, "step": 802 }, { "epoch": 248.0, "eval_accuracy": 0.5800711743772242, "eval_loss": 1.2507133483886719, "eval_runtime": 4.3807, "eval_samples_per_second": 64.145, "eval_steps_per_second": 0.685, "step": 806 }, { "epoch": 248.92, "eval_accuracy": 0.5836298932384342, "eval_loss": 1.2491704225540161, "eval_runtime": 4.0611, "eval_samples_per_second": 69.193, "eval_steps_per_second": 0.739, "step": 809 }, { "epoch": 249.85, "eval_accuracy": 0.5800711743772242, "eval_loss": 1.2499818801879883, "eval_runtime": 3.9673, "eval_samples_per_second": 70.828, "eval_steps_per_second": 0.756, "step": 812 }, { "epoch": 250.77, "eval_accuracy": 0.5800711743772242, "eval_loss": 1.2505466938018799, "eval_runtime": 4.5211, "eval_samples_per_second": 62.153, "eval_steps_per_second": 0.664, "step": 815 }, { "epoch": 252.0, "eval_accuracy": 0.5800711743772242, "eval_loss": 1.2519145011901855, "eval_runtime": 5.2859, "eval_samples_per_second": 53.16, "eval_steps_per_second": 0.568, "step": 819 }, { "epoch": 252.92, "eval_accuracy": 0.5800711743772242, "eval_loss": 1.253113865852356, "eval_runtime": 4.0658, "eval_samples_per_second": 69.113, "eval_steps_per_second": 0.738, "step": 822 }, { "epoch": 253.85, "eval_accuracy": 0.5800711743772242, "eval_loss": 1.2538248300552368, "eval_runtime": 4.1084, "eval_samples_per_second": 68.396, "eval_steps_per_second": 0.73, "step": 825 }, { "epoch": 254.77, "eval_accuracy": 0.5800711743772242, "eval_loss": 1.2532281875610352, "eval_runtime": 4.0615, "eval_samples_per_second": 69.186, "eval_steps_per_second": 0.739, "step": 828 }, { "epoch": 256.0, "eval_accuracy": 0.5800711743772242, "eval_loss": 1.2527676820755005, "eval_runtime": 4.6892, "eval_samples_per_second": 59.925, "eval_steps_per_second": 0.64, "step": 832 }, { "epoch": 256.92, "eval_accuracy": 0.5800711743772242, "eval_loss": 1.252835988998413, "eval_runtime": 3.7759, "eval_samples_per_second": 74.42, "eval_steps_per_second": 0.795, "step": 835 }, { "epoch": 257.85, "eval_accuracy": 0.5836298932384342, "eval_loss": 1.2521347999572754, "eval_runtime": 3.5788, "eval_samples_per_second": 78.519, "eval_steps_per_second": 0.838, "step": 838 }, { "epoch": 258.77, "eval_accuracy": 0.5836298932384342, "eval_loss": 1.252551555633545, "eval_runtime": 3.9885, "eval_samples_per_second": 70.452, "eval_steps_per_second": 0.752, "step": 841 }, { "epoch": 260.0, "eval_accuracy": 0.5836298932384342, "eval_loss": 1.2527978420257568, "eval_runtime": 3.7855, "eval_samples_per_second": 74.231, "eval_steps_per_second": 0.792, "step": 845 }, { "epoch": 260.92, "eval_accuracy": 0.5836298932384342, "eval_loss": 1.2529038190841675, "eval_runtime": 3.8445, "eval_samples_per_second": 73.091, "eval_steps_per_second": 0.78, "step": 848 }, { "epoch": 261.85, "eval_accuracy": 0.5836298932384342, "eval_loss": 1.2528387308120728, "eval_runtime": 3.7891, "eval_samples_per_second": 74.16, "eval_steps_per_second": 0.792, "step": 851 }, { "epoch": 262.77, "eval_accuracy": 0.5836298932384342, "eval_loss": 1.2516640424728394, "eval_runtime": 4.2868, "eval_samples_per_second": 65.55, "eval_steps_per_second": 0.7, "step": 854 }, { "epoch": 264.0, "eval_accuracy": 0.5836298932384342, "eval_loss": 1.251232385635376, "eval_runtime": 3.7886, "eval_samples_per_second": 74.169, "eval_steps_per_second": 0.792, "step": 858 }, { "epoch": 264.92, "eval_accuracy": 0.5836298932384342, "eval_loss": 1.251160979270935, "eval_runtime": 4.4131, "eval_samples_per_second": 63.674, "eval_steps_per_second": 0.68, "step": 861 }, { "epoch": 265.85, "eval_accuracy": 0.5836298932384342, "eval_loss": 1.2503511905670166, "eval_runtime": 3.8431, "eval_samples_per_second": 73.118, "eval_steps_per_second": 0.781, "step": 864 }, { "epoch": 266.77, "eval_accuracy": 0.5836298932384342, "eval_loss": 1.2499034404754639, "eval_runtime": 3.7109, "eval_samples_per_second": 75.723, "eval_steps_per_second": 0.808, "step": 867 }, { "epoch": 268.0, "eval_accuracy": 0.5836298932384342, "eval_loss": 1.2496285438537598, "eval_runtime": 4.0777, "eval_samples_per_second": 68.912, "eval_steps_per_second": 0.736, "step": 871 }, { "epoch": 268.92, "eval_accuracy": 0.5836298932384342, "eval_loss": 1.2497419118881226, "eval_runtime": 4.4854, "eval_samples_per_second": 62.648, "eval_steps_per_second": 0.669, "step": 874 }, { "epoch": 269.85, "eval_accuracy": 0.5836298932384342, "eval_loss": 1.2500321865081787, "eval_runtime": 5.2487, "eval_samples_per_second": 53.537, "eval_steps_per_second": 0.572, "step": 877 }, { "epoch": 270.77, "eval_accuracy": 0.5836298932384342, "eval_loss": 1.250011682510376, "eval_runtime": 4.3951, "eval_samples_per_second": 63.935, "eval_steps_per_second": 0.683, "step": 880 }, { "epoch": 272.0, "eval_accuracy": 0.5836298932384342, "eval_loss": 1.2498865127563477, "eval_runtime": 4.1755, "eval_samples_per_second": 67.297, "eval_steps_per_second": 0.718, "step": 884 }, { "epoch": 272.92, "eval_accuracy": 0.5836298932384342, "eval_loss": 1.2500803470611572, "eval_runtime": 4.6562, "eval_samples_per_second": 60.349, "eval_steps_per_second": 0.644, "step": 887 }, { "epoch": 273.85, "eval_accuracy": 0.5836298932384342, "eval_loss": 1.2503583431243896, "eval_runtime": 4.5464, "eval_samples_per_second": 61.807, "eval_steps_per_second": 0.66, "step": 890 }, { "epoch": 274.77, "eval_accuracy": 0.5836298932384342, "eval_loss": 1.2506159543991089, "eval_runtime": 3.9624, "eval_samples_per_second": 70.917, "eval_steps_per_second": 0.757, "step": 893 }, { "epoch": 276.0, "eval_accuracy": 0.5836298932384342, "eval_loss": 1.2505924701690674, "eval_runtime": 4.0033, "eval_samples_per_second": 70.192, "eval_steps_per_second": 0.749, "step": 897 }, { "epoch": 276.92, "grad_norm": 30700.47265625, "learning_rate": 0.0, "loss": 0.588, "step": 900 }, { "epoch": 276.92, "eval_accuracy": 0.5836298932384342, "eval_loss": 1.2505559921264648, "eval_runtime": 4.7936, "eval_samples_per_second": 58.62, "eval_steps_per_second": 0.626, "step": 900 }, { "epoch": 276.92, "step": 900, "total_flos": 3.755576946691584e+18, "train_loss": 0.8810926691691081, "train_runtime": 3759.0782, "train_samples_per_second": 123.541, "train_steps_per_second": 0.239 } ], "logging_steps": 100, "max_steps": 900, "num_input_tokens_seen": 0, "num_train_epochs": 300, "save_steps": 500, "total_flos": 3.755576946691584e+18, "train_batch_size": 128, "trial_name": null, "trial_params": null }