|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 200.0, |
|
"global_step": 4800, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 7.35e-05, |
|
"loss": 11.2783, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"eval_loss": 4.640867233276367, |
|
"eval_runtime": 5.3814, |
|
"eval_samples_per_second": 26.387, |
|
"eval_steps_per_second": 3.345, |
|
"eval_wer": 1.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 8.33, |
|
"learning_rate": 7.343617021276595e-05, |
|
"loss": 3.5578, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 8.33, |
|
"eval_loss": 3.164858341217041, |
|
"eval_runtime": 5.2371, |
|
"eval_samples_per_second": 27.114, |
|
"eval_steps_per_second": 3.437, |
|
"eval_wer": 1.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"learning_rate": 7.184042553191488e-05, |
|
"loss": 3.1279, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"eval_loss": 3.0335276126861572, |
|
"eval_runtime": 6.7986, |
|
"eval_samples_per_second": 20.887, |
|
"eval_steps_per_second": 2.648, |
|
"eval_wer": 1.0, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 16.67, |
|
"learning_rate": 7.024468085106383e-05, |
|
"loss": 2.9944, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 16.67, |
|
"eval_loss": 2.952620267868042, |
|
"eval_runtime": 7.0283, |
|
"eval_samples_per_second": 20.204, |
|
"eval_steps_per_second": 2.561, |
|
"eval_wer": 0.998256320836966, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 20.83, |
|
"learning_rate": 6.864893617021276e-05, |
|
"loss": 2.9275, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 20.83, |
|
"eval_loss": 2.929126501083374, |
|
"eval_runtime": 6.972, |
|
"eval_samples_per_second": 20.367, |
|
"eval_steps_per_second": 2.582, |
|
"eval_wer": 1.000871839581517, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"learning_rate": 6.70531914893617e-05, |
|
"loss": 2.8077, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_loss": 2.563281536102295, |
|
"eval_runtime": 7.1264, |
|
"eval_samples_per_second": 19.926, |
|
"eval_steps_per_second": 2.526, |
|
"eval_wer": 0.9895379250217959, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 29.17, |
|
"learning_rate": 6.545744680851063e-05, |
|
"loss": 2.4438, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 29.17, |
|
"eval_loss": 1.904543399810791, |
|
"eval_runtime": 6.7668, |
|
"eval_samples_per_second": 20.985, |
|
"eval_steps_per_second": 2.66, |
|
"eval_wer": 0.95640802092415, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 33.33, |
|
"learning_rate": 6.386170212765957e-05, |
|
"loss": 1.9659, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 33.33, |
|
"eval_loss": 1.4114454984664917, |
|
"eval_runtime": 6.9861, |
|
"eval_samples_per_second": 20.326, |
|
"eval_steps_per_second": 2.577, |
|
"eval_wer": 0.7959895379250218, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 37.5, |
|
"learning_rate": 6.226595744680851e-05, |
|
"loss": 1.7092, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 37.5, |
|
"eval_loss": 1.2583694458007812, |
|
"eval_runtime": 6.989, |
|
"eval_samples_per_second": 20.318, |
|
"eval_steps_per_second": 2.575, |
|
"eval_wer": 0.7637314734088928, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 41.67, |
|
"learning_rate": 6.067021276595744e-05, |
|
"loss": 1.517, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 41.67, |
|
"eval_loss": 1.2040127515792847, |
|
"eval_runtime": 5.4494, |
|
"eval_samples_per_second": 26.058, |
|
"eval_steps_per_second": 3.303, |
|
"eval_wer": 0.7506538796861377, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 45.83, |
|
"learning_rate": 5.907446808510638e-05, |
|
"loss": 1.3966, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 45.83, |
|
"eval_loss": 1.127307415008545, |
|
"eval_runtime": 6.9676, |
|
"eval_samples_per_second": 20.38, |
|
"eval_steps_per_second": 2.583, |
|
"eval_wer": 0.7462946817785527, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"learning_rate": 5.747872340425531e-05, |
|
"loss": 1.3197, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_loss": 1.10543692111969, |
|
"eval_runtime": 6.7578, |
|
"eval_samples_per_second": 21.013, |
|
"eval_steps_per_second": 2.664, |
|
"eval_wer": 0.6957279860505667, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 54.17, |
|
"learning_rate": 5.588297872340425e-05, |
|
"loss": 1.2476, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 54.17, |
|
"eval_loss": 1.1034547090530396, |
|
"eval_runtime": 6.9962, |
|
"eval_samples_per_second": 20.297, |
|
"eval_steps_per_second": 2.573, |
|
"eval_wer": 0.7000871839581517, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 58.33, |
|
"learning_rate": 5.428723404255319e-05, |
|
"loss": 1.1796, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 58.33, |
|
"eval_loss": 1.0890159606933594, |
|
"eval_runtime": 6.8836, |
|
"eval_samples_per_second": 20.629, |
|
"eval_steps_per_second": 2.615, |
|
"eval_wer": 0.7096774193548387, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 62.5, |
|
"learning_rate": 5.269148936170212e-05, |
|
"loss": 1.1237, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 62.5, |
|
"eval_loss": 1.0882998704910278, |
|
"eval_runtime": 7.0292, |
|
"eval_samples_per_second": 20.202, |
|
"eval_steps_per_second": 2.561, |
|
"eval_wer": 0.7166521360069747, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 66.67, |
|
"learning_rate": 5.109574468085105e-05, |
|
"loss": 1.0777, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 66.67, |
|
"eval_loss": 1.106709599494934, |
|
"eval_runtime": 6.9652, |
|
"eval_samples_per_second": 20.387, |
|
"eval_steps_per_second": 2.584, |
|
"eval_wer": 0.7218831734960767, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 70.83, |
|
"learning_rate": 4.95e-05, |
|
"loss": 1.0051, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 70.83, |
|
"eval_loss": 1.111539363861084, |
|
"eval_runtime": 5.3056, |
|
"eval_samples_per_second": 26.764, |
|
"eval_steps_per_second": 3.393, |
|
"eval_wer": 0.7236268526591108, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"learning_rate": 4.7904255319148935e-05, |
|
"loss": 0.9521, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"eval_loss": 1.0866659879684448, |
|
"eval_runtime": 7.6166, |
|
"eval_samples_per_second": 18.644, |
|
"eval_steps_per_second": 2.363, |
|
"eval_wer": 0.7131647776809067, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 79.17, |
|
"learning_rate": 4.6308510638297865e-05, |
|
"loss": 0.9147, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 79.17, |
|
"eval_loss": 1.0851967334747314, |
|
"eval_runtime": 6.7698, |
|
"eval_samples_per_second": 20.975, |
|
"eval_steps_per_second": 2.659, |
|
"eval_wer": 0.7210113339145597, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 83.33, |
|
"learning_rate": 4.471276595744681e-05, |
|
"loss": 0.8798, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 83.33, |
|
"eval_loss": 1.1411497592926025, |
|
"eval_runtime": 6.711, |
|
"eval_samples_per_second": 21.159, |
|
"eval_steps_per_second": 2.682, |
|
"eval_wer": 0.7096774193548387, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 87.5, |
|
"learning_rate": 4.311702127659574e-05, |
|
"loss": 0.8317, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 87.5, |
|
"eval_loss": 1.1634019613265991, |
|
"eval_runtime": 6.8272, |
|
"eval_samples_per_second": 20.799, |
|
"eval_steps_per_second": 2.637, |
|
"eval_wer": 0.7018308631211857, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 91.67, |
|
"learning_rate": 4.152127659574468e-05, |
|
"loss": 0.7946, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 91.67, |
|
"eval_loss": 1.1620630025863647, |
|
"eval_runtime": 7.1289, |
|
"eval_samples_per_second": 19.919, |
|
"eval_steps_per_second": 2.525, |
|
"eval_wer": 0.7201394943330427, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 95.83, |
|
"learning_rate": 3.992553191489361e-05, |
|
"loss": 0.7594, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 95.83, |
|
"eval_loss": 1.1481679677963257, |
|
"eval_runtime": 7.0324, |
|
"eval_samples_per_second": 20.192, |
|
"eval_steps_per_second": 2.56, |
|
"eval_wer": 0.7035745422842197, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"learning_rate": 3.834574468085106e-05, |
|
"loss": 0.729, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"eval_loss": 1.1493021249771118, |
|
"eval_runtime": 6.9652, |
|
"eval_samples_per_second": 20.387, |
|
"eval_steps_per_second": 2.584, |
|
"eval_wer": 0.7061900610287707, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 104.17, |
|
"learning_rate": 3.675e-05, |
|
"loss": 0.7055, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 104.17, |
|
"eval_loss": 1.1725823879241943, |
|
"eval_runtime": 7.0084, |
|
"eval_samples_per_second": 20.261, |
|
"eval_steps_per_second": 2.568, |
|
"eval_wer": 0.6931124673060157, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 108.33, |
|
"learning_rate": 3.5154255319148936e-05, |
|
"loss": 0.6622, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 108.33, |
|
"eval_loss": 1.1937670707702637, |
|
"eval_runtime": 7.4493, |
|
"eval_samples_per_second": 19.062, |
|
"eval_steps_per_second": 2.416, |
|
"eval_wer": 0.7000871839581517, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 112.5, |
|
"learning_rate": 3.355851063829787e-05, |
|
"loss": 0.6583, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 112.5, |
|
"eval_loss": 1.1832083463668823, |
|
"eval_runtime": 6.9743, |
|
"eval_samples_per_second": 20.361, |
|
"eval_steps_per_second": 2.581, |
|
"eval_wer": 0.7149084568439407, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 116.67, |
|
"learning_rate": 3.1962765957446805e-05, |
|
"loss": 0.6299, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 116.67, |
|
"eval_loss": 1.1996266841888428, |
|
"eval_runtime": 7.2192, |
|
"eval_samples_per_second": 19.67, |
|
"eval_steps_per_second": 2.493, |
|
"eval_wer": 0.7175239755884917, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 120.83, |
|
"learning_rate": 3.036702127659574e-05, |
|
"loss": 0.5903, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 120.83, |
|
"eval_loss": 1.1986336708068848, |
|
"eval_runtime": 7.2606, |
|
"eval_samples_per_second": 19.558, |
|
"eval_steps_per_second": 2.479, |
|
"eval_wer": 0.7131647776809067, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 125.0, |
|
"learning_rate": 2.877127659574468e-05, |
|
"loss": 0.5816, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 125.0, |
|
"eval_loss": 1.1909323930740356, |
|
"eval_runtime": 6.9272, |
|
"eval_samples_per_second": 20.499, |
|
"eval_steps_per_second": 2.598, |
|
"eval_wer": 0.7009590235396687, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 129.17, |
|
"learning_rate": 2.7175531914893614e-05, |
|
"loss": 0.5583, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 129.17, |
|
"eval_loss": 1.207918405532837, |
|
"eval_runtime": 6.9368, |
|
"eval_samples_per_second": 20.471, |
|
"eval_steps_per_second": 2.595, |
|
"eval_wer": 0.6870095902353966, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 133.33, |
|
"learning_rate": 2.5579787234042552e-05, |
|
"loss": 0.5392, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 133.33, |
|
"eval_loss": 1.2108745574951172, |
|
"eval_runtime": 5.9814, |
|
"eval_samples_per_second": 23.74, |
|
"eval_steps_per_second": 3.009, |
|
"eval_wer": 0.7227550130775937, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 137.5, |
|
"learning_rate": 2.398404255319149e-05, |
|
"loss": 0.5412, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 137.5, |
|
"eval_loss": 1.235259771347046, |
|
"eval_runtime": 6.7958, |
|
"eval_samples_per_second": 20.895, |
|
"eval_steps_per_second": 2.649, |
|
"eval_wer": 0.7244986922406277, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 141.67, |
|
"learning_rate": 2.2388297872340424e-05, |
|
"loss": 0.5136, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 141.67, |
|
"eval_loss": 1.2390460968017578, |
|
"eval_runtime": 6.7486, |
|
"eval_samples_per_second": 21.042, |
|
"eval_steps_per_second": 2.667, |
|
"eval_wer": 0.7253705318221447, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 145.83, |
|
"learning_rate": 2.079255319148936e-05, |
|
"loss": 0.5007, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 145.83, |
|
"eval_loss": 1.227264165878296, |
|
"eval_runtime": 6.7331, |
|
"eval_samples_per_second": 21.09, |
|
"eval_steps_per_second": 2.673, |
|
"eval_wer": 0.7122929380993898, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 150.0, |
|
"learning_rate": 1.9196808510638296e-05, |
|
"loss": 0.4883, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 150.0, |
|
"eval_loss": 1.2772815227508545, |
|
"eval_runtime": 6.6223, |
|
"eval_samples_per_second": 21.443, |
|
"eval_steps_per_second": 2.718, |
|
"eval_wer": 0.7288578901482128, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 154.17, |
|
"learning_rate": 1.7601063829787233e-05, |
|
"loss": 0.4835, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 154.17, |
|
"eval_loss": 1.2677749395370483, |
|
"eval_runtime": 6.8418, |
|
"eval_samples_per_second": 20.755, |
|
"eval_steps_per_second": 2.631, |
|
"eval_wer": 0.7288578901482128, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 158.33, |
|
"learning_rate": 1.600531914893617e-05, |
|
"loss": 0.4568, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 158.33, |
|
"eval_loss": 1.2592484951019287, |
|
"eval_runtime": 6.8949, |
|
"eval_samples_per_second": 20.595, |
|
"eval_steps_per_second": 2.611, |
|
"eval_wer": 0.7349607672188317, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 162.5, |
|
"learning_rate": 1.4409574468085105e-05, |
|
"loss": 0.4525, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 162.5, |
|
"eval_loss": 1.270469069480896, |
|
"eval_runtime": 5.5893, |
|
"eval_samples_per_second": 25.406, |
|
"eval_steps_per_second": 3.22, |
|
"eval_wer": 0.7253705318221447, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 166.67, |
|
"learning_rate": 1.2813829787234041e-05, |
|
"loss": 0.4379, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 166.67, |
|
"eval_loss": 1.2717314958572388, |
|
"eval_runtime": 6.6834, |
|
"eval_samples_per_second": 21.247, |
|
"eval_steps_per_second": 2.693, |
|
"eval_wer": 0.7306015693112468, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 170.83, |
|
"learning_rate": 1.1218085106382979e-05, |
|
"loss": 0.4198, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 170.83, |
|
"eval_loss": 1.2617682218551636, |
|
"eval_runtime": 7.1514, |
|
"eval_samples_per_second": 19.856, |
|
"eval_steps_per_second": 2.517, |
|
"eval_wer": 0.7218831734960767, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 175.0, |
|
"learning_rate": 9.622340425531914e-06, |
|
"loss": 0.4216, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 175.0, |
|
"eval_loss": 1.2908614873886108, |
|
"eval_runtime": 7.5161, |
|
"eval_samples_per_second": 18.893, |
|
"eval_steps_per_second": 2.395, |
|
"eval_wer": 0.7157802964254577, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 179.17, |
|
"learning_rate": 8.02659574468085e-06, |
|
"loss": 0.4305, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 179.17, |
|
"eval_loss": 1.2808016538619995, |
|
"eval_runtime": 7.0468, |
|
"eval_samples_per_second": 20.151, |
|
"eval_steps_per_second": 2.554, |
|
"eval_wer": 0.7166521360069747, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 183.33, |
|
"learning_rate": 6.446808510638297e-06, |
|
"loss": 0.399, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 183.33, |
|
"eval_loss": 1.2750086784362793, |
|
"eval_runtime": 6.8372, |
|
"eval_samples_per_second": 20.769, |
|
"eval_steps_per_second": 2.633, |
|
"eval_wer": 0.7192676547515258, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 187.5, |
|
"learning_rate": 4.851063829787233e-06, |
|
"loss": 0.3937, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 187.5, |
|
"eval_loss": 1.271910309791565, |
|
"eval_runtime": 7.0869, |
|
"eval_samples_per_second": 20.037, |
|
"eval_steps_per_second": 2.54, |
|
"eval_wer": 0.7149084568439407, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 191.67, |
|
"learning_rate": 3.25531914893617e-06, |
|
"loss": 0.3905, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 191.67, |
|
"eval_loss": 1.2815755605697632, |
|
"eval_runtime": 5.4594, |
|
"eval_samples_per_second": 26.01, |
|
"eval_steps_per_second": 3.297, |
|
"eval_wer": 0.7157802964254577, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 195.83, |
|
"learning_rate": 1.6595744680851062e-06, |
|
"loss": 0.3892, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 195.83, |
|
"eval_loss": 1.295116901397705, |
|
"eval_runtime": 7.0588, |
|
"eval_samples_per_second": 20.117, |
|
"eval_steps_per_second": 2.55, |
|
"eval_wer": 0.7210113339145597, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 200.0, |
|
"learning_rate": 6.382978723404255e-08, |
|
"loss": 0.3932, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 200.0, |
|
"eval_loss": 1.2923693656921387, |
|
"eval_runtime": 7.179, |
|
"eval_samples_per_second": 19.78, |
|
"eval_steps_per_second": 2.507, |
|
"eval_wer": 0.7201394943330427, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 200.0, |
|
"step": 4800, |
|
"total_flos": 1.0000910465788367e+19, |
|
"train_loss": 1.2620406293869018, |
|
"train_runtime": 6643.3653, |
|
"train_samples_per_second": 11.38, |
|
"train_steps_per_second": 0.723 |
|
} |
|
], |
|
"max_steps": 4800, |
|
"num_train_epochs": 200, |
|
"total_flos": 1.0000910465788367e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|