{ "best_metric": null, "best_model_checkpoint": null, "epoch": 19.999983677999577, "global_step": 612660, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.1, "learning_rate": 9.951082166291254e-05, "loss": 4.9783, "step": 3000 }, { "epoch": 0.2, "learning_rate": 9.902115365782e-05, "loss": 3.1296, "step": 6000 }, { "epoch": 0.29, "learning_rate": 9.853148565272745e-05, "loss": 2.7383, "step": 9000 }, { "epoch": 0.39, "learning_rate": 9.804181764763491e-05, "loss": 2.5447, "step": 12000 }, { "epoch": 0.49, "learning_rate": 9.755231286521073e-05, "loss": 2.4175, "step": 15000 }, { "epoch": 0.59, "learning_rate": 9.706280808278654e-05, "loss": 2.3285, "step": 18000 }, { "epoch": 0.69, "learning_rate": 9.657346652303072e-05, "loss": 2.2597, "step": 21000 }, { "epoch": 0.78, "learning_rate": 9.608396174060654e-05, "loss": 2.2072, "step": 24000 }, { "epoch": 0.88, "learning_rate": 9.559462018085072e-05, "loss": 2.1601, "step": 27000 }, { "epoch": 0.98, "learning_rate": 9.510511539842654e-05, "loss": 2.125, "step": 30000 }, { "epoch": 1.0, "eval_loss": 2.072047233581543, "eval_runtime": 19.2095, "eval_samples_per_second": 1536.842, "eval_steps_per_second": 9.631, "step": 30633 }, { "epoch": 1.08, "learning_rate": 9.461577383867072e-05, "loss": 2.0771, "step": 33000 }, { "epoch": 1.18, "learning_rate": 9.412626905624655e-05, "loss": 2.0514, "step": 36000 }, { "epoch": 1.27, "learning_rate": 9.363692749649071e-05, "loss": 2.0297, "step": 39000 }, { "epoch": 1.37, "learning_rate": 9.314742271406653e-05, "loss": 2.0123, "step": 42000 }, { "epoch": 1.47, "learning_rate": 9.265791793164235e-05, "loss": 1.9955, "step": 45000 }, { "epoch": 1.57, "learning_rate": 9.216857637188653e-05, "loss": 1.9803, "step": 48000 }, { "epoch": 1.66, "learning_rate": 9.167907158946235e-05, "loss": 1.9667, "step": 51000 }, { "epoch": 1.76, "learning_rate": 9.118973002970654e-05, "loss": 1.9505, "step": 54000 }, { "epoch": 1.86, "learning_rate": 9.070022524728234e-05, "loss": 1.9381, "step": 57000 }, { "epoch": 1.96, "learning_rate": 9.021088368752652e-05, "loss": 1.9288, "step": 60000 }, { "epoch": 2.0, "eval_loss": 1.9032506942749023, "eval_runtime": 19.1781, "eval_samples_per_second": 1539.357, "eval_steps_per_second": 9.646, "step": 61266 }, { "epoch": 2.06, "learning_rate": 8.972137890510235e-05, "loss": 1.9011, "step": 63000 }, { "epoch": 2.15, "learning_rate": 8.923203734534653e-05, "loss": 1.8834, "step": 66000 }, { "epoch": 2.25, "learning_rate": 8.874253256292235e-05, "loss": 1.8807, "step": 69000 }, { "epoch": 2.35, "learning_rate": 8.825319100316653e-05, "loss": 1.8737, "step": 72000 }, { "epoch": 2.45, "learning_rate": 8.776368622074233e-05, "loss": 1.8686, "step": 75000 }, { "epoch": 2.55, "learning_rate": 8.727434466098652e-05, "loss": 1.861, "step": 78000 }, { "epoch": 2.64, "learning_rate": 8.678483987856234e-05, "loss": 1.8549, "step": 81000 }, { "epoch": 2.74, "learning_rate": 8.629549831880652e-05, "loss": 1.8503, "step": 84000 }, { "epoch": 2.84, "learning_rate": 8.580599353638234e-05, "loss": 1.8446, "step": 87000 }, { "epoch": 2.94, "learning_rate": 8.531665197662652e-05, "loss": 1.8387, "step": 90000 }, { "epoch": 3.0, "eval_loss": 1.833019733428955, "eval_runtime": 19.0959, "eval_samples_per_second": 1545.988, "eval_steps_per_second": 9.688, "step": 91899 }, { "epoch": 3.04, "learning_rate": 8.482714719420234e-05, "loss": 1.8222, "step": 93000 }, { "epoch": 3.13, "learning_rate": 8.433764241177815e-05, "loss": 1.8037, "step": 96000 }, { "epoch": 3.23, "learning_rate": 8.384830085202233e-05, "loss": 1.8017, "step": 99000 }, { "epoch": 3.33, "learning_rate": 8.335879606959815e-05, "loss": 1.8018, "step": 102000 }, { "epoch": 3.43, "learning_rate": 8.286945450984233e-05, "loss": 1.7984, "step": 105000 }, { "epoch": 3.53, "learning_rate": 8.237994972741815e-05, "loss": 1.796, "step": 108000 }, { "epoch": 3.62, "learning_rate": 8.189060816766233e-05, "loss": 1.7942, "step": 111000 }, { "epoch": 3.72, "learning_rate": 8.140110338523814e-05, "loss": 1.7905, "step": 114000 }, { "epoch": 3.82, "learning_rate": 8.091176182548232e-05, "loss": 1.7885, "step": 117000 }, { "epoch": 3.92, "learning_rate": 8.042225704305814e-05, "loss": 1.7832, "step": 120000 }, { "epoch": 4.0, "eval_loss": 1.7864413261413574, "eval_runtime": 19.3546, "eval_samples_per_second": 1525.321, "eval_steps_per_second": 9.558, "step": 122532 }, { "epoch": 4.02, "learning_rate": 7.993291548330233e-05, "loss": 1.7754, "step": 123000 }, { "epoch": 4.11, "learning_rate": 7.944341070087814e-05, "loss": 1.7507, "step": 126000 }, { "epoch": 4.21, "learning_rate": 7.895406914112233e-05, "loss": 1.7526, "step": 129000 }, { "epoch": 4.31, "learning_rate": 7.846456435869815e-05, "loss": 1.7541, "step": 132000 }, { "epoch": 4.41, "learning_rate": 7.797522279894231e-05, "loss": 1.7537, "step": 135000 }, { "epoch": 4.5, "learning_rate": 7.748571801651814e-05, "loss": 1.7541, "step": 138000 }, { "epoch": 4.6, "learning_rate": 7.699621323409395e-05, "loss": 1.7505, "step": 141000 }, { "epoch": 4.7, "learning_rate": 7.650687167433813e-05, "loss": 1.7475, "step": 144000 }, { "epoch": 4.8, "learning_rate": 7.601736689191396e-05, "loss": 1.7477, "step": 147000 }, { "epoch": 4.9, "learning_rate": 7.552802533215814e-05, "loss": 1.7461, "step": 150000 }, { "epoch": 4.99, "learning_rate": 7.503852054973394e-05, "loss": 1.7445, "step": 153000 }, { "epoch": 5.0, "eval_loss": 1.7591967582702637, "eval_runtime": 19.1359, "eval_samples_per_second": 1542.756, "eval_steps_per_second": 9.668, "step": 153165 }, { "epoch": 5.09, "learning_rate": 7.454917898997813e-05, "loss": 1.7143, "step": 156000 }, { "epoch": 5.19, "learning_rate": 7.405967420755395e-05, "loss": 1.7177, "step": 159000 }, { "epoch": 5.29, "learning_rate": 7.357033264779813e-05, "loss": 1.7188, "step": 162000 }, { "epoch": 5.39, "learning_rate": 7.308082786537395e-05, "loss": 1.7198, "step": 165000 }, { "epoch": 5.48, "learning_rate": 7.259148630561813e-05, "loss": 1.7202, "step": 168000 }, { "epoch": 5.58, "learning_rate": 7.210198152319395e-05, "loss": 1.7184, "step": 171000 }, { "epoch": 5.68, "learning_rate": 7.161247674076976e-05, "loss": 1.719, "step": 174000 }, { "epoch": 5.78, "learning_rate": 7.112313518101394e-05, "loss": 1.7173, "step": 177000 }, { "epoch": 5.88, "learning_rate": 7.063363039858976e-05, "loss": 1.7176, "step": 180000 }, { "epoch": 5.97, "learning_rate": 7.014428883883394e-05, "loss": 1.7152, "step": 183000 }, { "epoch": 6.0, "eval_loss": 1.740378975868225, "eval_runtime": 19.1537, "eval_samples_per_second": 1541.325, "eval_steps_per_second": 9.659, "step": 183798 }, { "epoch": 6.07, "learning_rate": 6.965478405640976e-05, "loss": 1.6926, "step": 186000 }, { "epoch": 6.17, "learning_rate": 6.916544249665395e-05, "loss": 1.6889, "step": 189000 }, { "epoch": 6.27, "learning_rate": 6.867593771422975e-05, "loss": 1.6923, "step": 192000 }, { "epoch": 6.37, "learning_rate": 6.818659615447393e-05, "loss": 1.693, "step": 195000 }, { "epoch": 6.46, "learning_rate": 6.769709137204976e-05, "loss": 1.694, "step": 198000 }, { "epoch": 6.56, "learning_rate": 6.720774981229393e-05, "loss": 1.6948, "step": 201000 }, { "epoch": 6.66, "learning_rate": 6.671824502986975e-05, "loss": 1.6944, "step": 204000 }, { "epoch": 6.76, "learning_rate": 6.622874024744557e-05, "loss": 1.6934, "step": 207000 }, { "epoch": 6.86, "learning_rate": 6.573939868768974e-05, "loss": 1.6926, "step": 210000 }, { "epoch": 6.95, "learning_rate": 6.524989390526556e-05, "loss": 1.6933, "step": 213000 }, { "epoch": 7.0, "eval_loss": 1.7208322286605835, "eval_runtime": 19.2921, "eval_samples_per_second": 1530.262, "eval_steps_per_second": 9.589, "step": 214431 }, { "epoch": 7.05, "learning_rate": 6.476055234550975e-05, "loss": 1.6773, "step": 216000 }, { "epoch": 7.15, "learning_rate": 6.427104756308556e-05, "loss": 1.6671, "step": 219000 }, { "epoch": 7.25, "learning_rate": 6.378170600332975e-05, "loss": 1.6695, "step": 222000 }, { "epoch": 7.35, "learning_rate": 6.329220122090557e-05, "loss": 1.6707, "step": 225000 }, { "epoch": 7.44, "learning_rate": 6.280285966114975e-05, "loss": 1.674, "step": 228000 }, { "epoch": 7.54, "learning_rate": 6.231335487872557e-05, "loss": 1.6726, "step": 231000 }, { "epoch": 7.64, "learning_rate": 6.182401331896974e-05, "loss": 1.6739, "step": 234000 }, { "epoch": 7.74, "learning_rate": 6.133450853654555e-05, "loss": 1.6755, "step": 237000 }, { "epoch": 7.83, "learning_rate": 6.084516697678973e-05, "loss": 1.6726, "step": 240000 }, { "epoch": 7.93, "learning_rate": 6.035566219436556e-05, "loss": 1.6743, "step": 243000 }, { "epoch": 8.0, "eval_loss": 1.7004761695861816, "eval_runtime": 19.351, "eval_samples_per_second": 1525.608, "eval_steps_per_second": 9.56, "step": 245064 }, { "epoch": 8.03, "learning_rate": 5.986632063460974e-05, "loss": 1.6642, "step": 246000 }, { "epoch": 8.13, "learning_rate": 5.9376815852185555e-05, "loss": 1.6475, "step": 249000 }, { "epoch": 8.23, "learning_rate": 5.888747429242973e-05, "loss": 1.6525, "step": 252000 }, { "epoch": 8.32, "learning_rate": 5.839796951000556e-05, "loss": 1.653, "step": 255000 }, { "epoch": 8.42, "learning_rate": 5.7908627950249736e-05, "loss": 1.6556, "step": 258000 }, { "epoch": 8.52, "learning_rate": 5.741912316782555e-05, "loss": 1.6556, "step": 261000 }, { "epoch": 8.62, "learning_rate": 5.692961838540136e-05, "loss": 1.6565, "step": 264000 }, { "epoch": 8.72, "learning_rate": 5.6440276825645545e-05, "loss": 1.6567, "step": 267000 }, { "epoch": 8.81, "learning_rate": 5.5950772043221364e-05, "loss": 1.6574, "step": 270000 }, { "epoch": 8.91, "learning_rate": 5.546143048346555e-05, "loss": 1.6561, "step": 273000 }, { "epoch": 9.0, "eval_loss": 1.6906808614730835, "eval_runtime": 19.2999, "eval_samples_per_second": 1529.642, "eval_steps_per_second": 9.586, "step": 275697 }, { "epoch": 9.01, "learning_rate": 5.497192570104136e-05, "loss": 1.6546, "step": 276000 }, { "epoch": 9.11, "learning_rate": 5.4482584141285545e-05, "loss": 1.6302, "step": 279000 }, { "epoch": 9.21, "learning_rate": 5.3993079358861364e-05, "loss": 1.6356, "step": 282000 }, { "epoch": 9.3, "learning_rate": 5.3503574576437175e-05, "loss": 1.6375, "step": 285000 }, { "epoch": 9.4, "learning_rate": 5.301423301668136e-05, "loss": 1.6399, "step": 288000 }, { "epoch": 9.5, "learning_rate": 5.252472823425718e-05, "loss": 1.6404, "step": 291000 }, { "epoch": 9.6, "learning_rate": 5.203538667450135e-05, "loss": 1.642, "step": 294000 }, { "epoch": 9.7, "learning_rate": 5.1545881892077175e-05, "loss": 1.642, "step": 297000 }, { "epoch": 9.79, "learning_rate": 5.105654033232135e-05, "loss": 1.6421, "step": 300000 }, { "epoch": 9.89, "learning_rate": 5.0567035549897165e-05, "loss": 1.6417, "step": 303000 }, { "epoch": 9.99, "learning_rate": 5.007753076747299e-05, "loss": 1.6431, "step": 306000 }, { "epoch": 10.0, "eval_loss": 1.690254807472229, "eval_runtime": 19.1786, "eval_samples_per_second": 1539.322, "eval_steps_per_second": 9.646, "step": 306330 }, { "epoch": 10.09, "learning_rate": 4.9588189207717175e-05, "loss": 1.6191, "step": 309000 }, { "epoch": 10.19, "learning_rate": 4.909868442529299e-05, "loss": 1.6215, "step": 312000 }, { "epoch": 10.28, "learning_rate": 4.8609342865537165e-05, "loss": 1.6247, "step": 315000 }, { "epoch": 10.38, "learning_rate": 4.8119838083112984e-05, "loss": 1.6244, "step": 318000 }, { "epoch": 10.48, "learning_rate": 4.76303333006888e-05, "loss": 1.6261, "step": 321000 }, { "epoch": 10.58, "learning_rate": 4.714099174093298e-05, "loss": 1.6288, "step": 324000 }, { "epoch": 10.67, "learning_rate": 4.66514869585088e-05, "loss": 1.6289, "step": 327000 }, { "epoch": 10.77, "learning_rate": 4.6162145398752984e-05, "loss": 1.6295, "step": 330000 }, { "epoch": 10.87, "learning_rate": 4.5672640616328796e-05, "loss": 1.6295, "step": 333000 }, { "epoch": 10.97, "learning_rate": 4.5183135833904614e-05, "loss": 1.6282, "step": 336000 }, { "epoch": 11.0, "eval_loss": 1.6800603866577148, "eval_runtime": 19.1041, "eval_samples_per_second": 1545.321, "eval_steps_per_second": 9.684, "step": 336963 }, { "epoch": 11.07, "learning_rate": 4.46937942741488e-05, "loss": 1.6134, "step": 339000 }, { "epoch": 11.16, "learning_rate": 4.420428949172462e-05, "loss": 1.6072, "step": 342000 }, { "epoch": 11.26, "learning_rate": 4.371494793196879e-05, "loss": 1.6099, "step": 345000 }, { "epoch": 11.36, "learning_rate": 4.322544314954461e-05, "loss": 1.6137, "step": 348000 }, { "epoch": 11.46, "learning_rate": 4.273610158978879e-05, "loss": 1.6136, "step": 351000 }, { "epoch": 11.56, "learning_rate": 4.224659680736461e-05, "loss": 1.6151, "step": 354000 }, { "epoch": 11.65, "learning_rate": 4.175725524760879e-05, "loss": 1.6166, "step": 357000 }, { "epoch": 11.75, "learning_rate": 4.126775046518461e-05, "loss": 1.6179, "step": 360000 }, { "epoch": 11.85, "learning_rate": 4.077840890542879e-05, "loss": 1.6174, "step": 363000 }, { "epoch": 11.95, "learning_rate": 4.0288904123004604e-05, "loss": 1.6173, "step": 366000 }, { "epoch": 12.0, "eval_loss": 1.6714136600494385, "eval_runtime": 19.2107, "eval_samples_per_second": 1536.747, "eval_steps_per_second": 9.63, "step": 367596 }, { "epoch": 12.05, "learning_rate": 3.979939934058042e-05, "loss": 1.6063, "step": 369000 }, { "epoch": 12.14, "learning_rate": 3.931005778082461e-05, "loss": 1.5969, "step": 372000 }, { "epoch": 12.24, "learning_rate": 3.882055299840042e-05, "loss": 1.5998, "step": 375000 }, { "epoch": 12.34, "learning_rate": 3.83312114386446e-05, "loss": 1.6011, "step": 378000 }, { "epoch": 12.44, "learning_rate": 3.784154343355205e-05, "loss": 1.6034, "step": 381000 }, { "epoch": 12.54, "learning_rate": 3.7352201873796235e-05, "loss": 1.6035, "step": 384000 }, { "epoch": 12.63, "learning_rate": 3.686269709137205e-05, "loss": 1.6054, "step": 387000 }, { "epoch": 12.73, "learning_rate": 3.637335553161623e-05, "loss": 1.6055, "step": 390000 }, { "epoch": 12.83, "learning_rate": 3.588385074919205e-05, "loss": 1.6057, "step": 393000 }, { "epoch": 12.93, "learning_rate": 3.539434596676787e-05, "loss": 1.6061, "step": 396000 }, { "epoch": 13.0, "eval_loss": 1.6634231805801392, "eval_runtime": 19.238, "eval_samples_per_second": 1534.564, "eval_steps_per_second": 9.616, "step": 398229 }, { "epoch": 13.03, "learning_rate": 3.4905004407012046e-05, "loss": 1.5995, "step": 399000 }, { "epoch": 13.12, "learning_rate": 3.4415499624587865e-05, "loss": 1.5849, "step": 402000 }, { "epoch": 13.22, "learning_rate": 3.392615806483205e-05, "loss": 1.5894, "step": 405000 }, { "epoch": 13.32, "learning_rate": 3.343665328240786e-05, "loss": 1.5914, "step": 408000 }, { "epoch": 13.42, "learning_rate": 3.294731172265204e-05, "loss": 1.5917, "step": 411000 }, { "epoch": 13.51, "learning_rate": 3.245780694022786e-05, "loss": 1.5933, "step": 414000 }, { "epoch": 13.61, "learning_rate": 3.196830215780368e-05, "loss": 1.5926, "step": 417000 }, { "epoch": 13.71, "learning_rate": 3.1478960598047855e-05, "loss": 1.5956, "step": 420000 }, { "epoch": 13.81, "learning_rate": 3.0989455815623674e-05, "loss": 1.5953, "step": 423000 }, { "epoch": 13.91, "learning_rate": 3.0500114255867855e-05, "loss": 1.5971, "step": 426000 }, { "epoch": 14.0, "eval_loss": 1.6542909145355225, "eval_runtime": 19.1669, "eval_samples_per_second": 1540.259, "eval_steps_per_second": 9.652, "step": 428862 }, { "epoch": 14.0, "learning_rate": 3.0010609473443674e-05, "loss": 1.5959, "step": 429000 }, { "epoch": 14.1, "learning_rate": 2.9521267913687855e-05, "loss": 1.5752, "step": 432000 }, { "epoch": 14.2, "learning_rate": 2.9031763131263674e-05, "loss": 1.5775, "step": 435000 }, { "epoch": 14.3, "learning_rate": 2.854225834883949e-05, "loss": 1.5814, "step": 438000 }, { "epoch": 14.4, "learning_rate": 2.8052916789083673e-05, "loss": 1.5814, "step": 441000 }, { "epoch": 14.49, "learning_rate": 2.7563412006659482e-05, "loss": 1.5837, "step": 444000 }, { "epoch": 14.59, "learning_rate": 2.7074070446903667e-05, "loss": 1.585, "step": 447000 }, { "epoch": 14.69, "learning_rate": 2.6584565664479482e-05, "loss": 1.5868, "step": 450000 }, { "epoch": 14.79, "learning_rate": 2.60950608820553e-05, "loss": 1.5854, "step": 453000 }, { "epoch": 14.89, "learning_rate": 2.5605719322299482e-05, "loss": 1.5864, "step": 456000 }, { "epoch": 14.98, "learning_rate": 2.51162145398753e-05, "loss": 1.5867, "step": 459000 }, { "epoch": 15.0, "eval_loss": 1.6488285064697266, "eval_runtime": 19.1177, "eval_samples_per_second": 1544.224, "eval_steps_per_second": 9.677, "step": 459495 }, { "epoch": 15.08, "learning_rate": 2.4626709757451116e-05, "loss": 1.5695, "step": 462000 }, { "epoch": 15.18, "learning_rate": 2.4137368197695297e-05, "loss": 1.5714, "step": 465000 }, { "epoch": 15.28, "learning_rate": 2.3647863415271113e-05, "loss": 1.5721, "step": 468000 }, { "epoch": 15.38, "learning_rate": 2.315835863284693e-05, "loss": 1.5729, "step": 471000 }, { "epoch": 15.47, "learning_rate": 2.2669017073091113e-05, "loss": 1.5728, "step": 474000 }, { "epoch": 15.57, "learning_rate": 2.2179512290666928e-05, "loss": 1.5739, "step": 477000 }, { "epoch": 15.67, "learning_rate": 2.1690007508242746e-05, "loss": 1.5756, "step": 480000 }, { "epoch": 15.77, "learning_rate": 2.1200665948486928e-05, "loss": 1.5774, "step": 483000 }, { "epoch": 15.87, "learning_rate": 2.0711161166062743e-05, "loss": 1.5772, "step": 486000 }, { "epoch": 15.96, "learning_rate": 2.022165638363856e-05, "loss": 1.5781, "step": 489000 }, { "epoch": 16.0, "eval_loss": 1.6446890830993652, "eval_runtime": 19.0262, "eval_samples_per_second": 1551.652, "eval_steps_per_second": 9.723, "step": 490128 }, { "epoch": 16.06, "learning_rate": 1.9732314823882743e-05, "loss": 1.5652, "step": 492000 }, { "epoch": 16.16, "learning_rate": 1.924281004145856e-05, "loss": 1.5612, "step": 495000 }, { "epoch": 16.26, "learning_rate": 1.8753305259034377e-05, "loss": 1.5634, "step": 498000 }, { "epoch": 16.35, "learning_rate": 1.8263800476610192e-05, "loss": 1.5648, "step": 501000 }, { "epoch": 16.45, "learning_rate": 1.7774458916854374e-05, "loss": 1.5664, "step": 504000 }, { "epoch": 16.55, "learning_rate": 1.728495413443019e-05, "loss": 1.5656, "step": 507000 }, { "epoch": 16.65, "learning_rate": 1.679561257467437e-05, "loss": 1.5676, "step": 510000 }, { "epoch": 16.75, "learning_rate": 1.630610779225019e-05, "loss": 1.566, "step": 513000 }, { "epoch": 16.84, "learning_rate": 1.5816603009826008e-05, "loss": 1.5691, "step": 516000 }, { "epoch": 16.94, "learning_rate": 1.5327098227401823e-05, "loss": 1.5684, "step": 519000 }, { "epoch": 17.0, "eval_loss": 1.6387931108474731, "eval_runtime": 19.1639, "eval_samples_per_second": 1540.501, "eval_steps_per_second": 9.654, "step": 520761 }, { "epoch": 17.04, "learning_rate": 1.4837756667646002e-05, "loss": 1.5616, "step": 522000 }, { "epoch": 17.14, "learning_rate": 1.434825188522182e-05, "loss": 1.5545, "step": 525000 }, { "epoch": 17.24, "learning_rate": 1.3858747102797636e-05, "loss": 1.5551, "step": 528000 }, { "epoch": 17.33, "learning_rate": 1.3369405543041818e-05, "loss": 1.5558, "step": 531000 }, { "epoch": 17.43, "learning_rate": 1.2879900760617636e-05, "loss": 1.5587, "step": 534000 }, { "epoch": 17.53, "learning_rate": 1.2390559200861816e-05, "loss": 1.5585, "step": 537000 }, { "epoch": 17.63, "learning_rate": 1.1901054418437633e-05, "loss": 1.5579, "step": 540000 }, { "epoch": 17.73, "learning_rate": 1.141154963601345e-05, "loss": 1.5586, "step": 543000 }, { "epoch": 17.82, "learning_rate": 1.0922208076257631e-05, "loss": 1.559, "step": 546000 }, { "epoch": 17.92, "learning_rate": 1.0432703293833448e-05, "loss": 1.5597, "step": 549000 }, { "epoch": 18.0, "eval_loss": 1.6415975093841553, "eval_runtime": 19.1825, "eval_samples_per_second": 1539.008, "eval_steps_per_second": 9.644, "step": 551394 }, { "epoch": 18.02, "learning_rate": 9.94336173407763e-06, "loss": 1.5579, "step": 552000 }, { "epoch": 18.12, "learning_rate": 9.453856951653447e-06, "loss": 1.5465, "step": 555000 }, { "epoch": 18.22, "learning_rate": 8.964352169229264e-06, "loss": 1.5491, "step": 558000 }, { "epoch": 18.31, "learning_rate": 8.475010609473443e-06, "loss": 1.5495, "step": 561000 }, { "epoch": 18.41, "learning_rate": 7.985505827049262e-06, "loss": 1.5498, "step": 564000 }, { "epoch": 18.51, "learning_rate": 7.496001044625078e-06, "loss": 1.5514, "step": 567000 }, { "epoch": 18.61, "learning_rate": 7.006659484869258e-06, "loss": 1.5508, "step": 570000 }, { "epoch": 18.71, "learning_rate": 6.517154702445076e-06, "loss": 1.5516, "step": 573000 }, { "epoch": 18.8, "learning_rate": 6.027649920020892e-06, "loss": 1.5508, "step": 576000 }, { "epoch": 18.9, "learning_rate": 5.538308360265074e-06, "loss": 1.5515, "step": 579000 }, { "epoch": 19.0, "learning_rate": 5.048803577840891e-06, "loss": 1.5521, "step": 582000 }, { "epoch": 19.0, "eval_loss": 1.6370400190353394, "eval_runtime": 19.1163, "eval_samples_per_second": 1544.337, "eval_steps_per_second": 9.678, "step": 582027 }, { "epoch": 19.1, "learning_rate": 4.559298795416708e-06, "loss": 1.5413, "step": 585000 }, { "epoch": 19.19, "learning_rate": 4.069957235660889e-06, "loss": 1.5435, "step": 588000 }, { "epoch": 19.29, "learning_rate": 3.580452453236706e-06, "loss": 1.5432, "step": 591000 }, { "epoch": 19.39, "learning_rate": 3.091110893480887e-06, "loss": 1.5437, "step": 594000 }, { "epoch": 19.49, "learning_rate": 2.6016061110567034e-06, "loss": 1.5431, "step": 597000 }, { "epoch": 19.59, "learning_rate": 2.1121013286325204e-06, "loss": 1.5431, "step": 600000 }, { "epoch": 19.68, "learning_rate": 1.6225965462083374e-06, "loss": 1.5441, "step": 603000 }, { "epoch": 19.78, "learning_rate": 1.1332549864525185e-06, "loss": 1.544, "step": 606000 }, { "epoch": 19.88, "learning_rate": 6.437502040283355e-07, "loss": 1.5469, "step": 609000 }, { "epoch": 19.98, "learning_rate": 1.5440864427251657e-07, "loss": 1.5438, "step": 612000 }, { "epoch": 20.0, "eval_loss": 1.636548638343811, "eval_runtime": 19.1335, "eval_samples_per_second": 1542.949, "eval_steps_per_second": 9.669, "step": 612660 }, { "epoch": 20.0, "step": 612660, "total_flos": 3.3229272051886326e+18, "train_loss": 1.7127611959194204, "train_runtime": 370998.644, "train_samples_per_second": 528.445, "train_steps_per_second": 1.651 } ], "max_steps": 612660, "num_train_epochs": 20, "total_flos": 3.3229272051886326e+18, "trial_name": null, "trial_params": null }