|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 50.0, |
|
"global_step": 5750, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 5.7166666666666664e-06, |
|
"loss": 13.1691, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 1.155e-05, |
|
"loss": 5.7916, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 1.738333333333333e-05, |
|
"loss": 3.9554, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"eval_loss": 3.8166096210479736, |
|
"eval_runtime": 76.9069, |
|
"eval_samples_per_second": 22.105, |
|
"eval_steps_per_second": 1.391, |
|
"eval_wer": 1.0, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 2.3216666666666664e-05, |
|
"loss": 3.4177, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 2.9049999999999995e-05, |
|
"loss": 3.1088, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"learning_rate": 3.488333333333333e-05, |
|
"loss": 2.9853, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"eval_loss": 2.979321002960205, |
|
"eval_runtime": 76.5995, |
|
"eval_samples_per_second": 22.193, |
|
"eval_steps_per_second": 1.397, |
|
"eval_wer": 1.0, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 6.09, |
|
"learning_rate": 4.071666666666666e-05, |
|
"loss": 2.6633, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 6.96, |
|
"learning_rate": 4.655e-05, |
|
"loss": 1.3761, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 7.83, |
|
"learning_rate": 5.2383333333333324e-05, |
|
"loss": 0.6939, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 7.83, |
|
"eval_loss": 0.972253143787384, |
|
"eval_runtime": 77.2431, |
|
"eval_samples_per_second": 22.008, |
|
"eval_steps_per_second": 1.385, |
|
"eval_wer": 0.9149368374277915, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 8.7, |
|
"learning_rate": 5.821666666666666e-05, |
|
"loss": 0.5141, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 9.57, |
|
"learning_rate": 6.405e-05, |
|
"loss": 0.4189, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 10.43, |
|
"learning_rate": 6.988333333333333e-05, |
|
"loss": 0.352, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 10.43, |
|
"eval_loss": 0.6556435823440552, |
|
"eval_runtime": 76.3866, |
|
"eval_samples_per_second": 22.255, |
|
"eval_steps_per_second": 1.401, |
|
"eval_wer": 0.7296387989589285, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 11.3, |
|
"learning_rate": 6.849230769230769e-05, |
|
"loss": 0.307, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 12.17, |
|
"learning_rate": 6.695384615384615e-05, |
|
"loss": 0.2658, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 13.04, |
|
"learning_rate": 6.541538461538461e-05, |
|
"loss": 0.2369, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 13.04, |
|
"eval_loss": 0.6314664483070374, |
|
"eval_runtime": 76.2318, |
|
"eval_samples_per_second": 22.3, |
|
"eval_steps_per_second": 1.404, |
|
"eval_wer": 0.6927569351869485, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 13.91, |
|
"learning_rate": 6.387692307692307e-05, |
|
"loss": 0.2084, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 14.78, |
|
"learning_rate": 6.233846153846153e-05, |
|
"loss": 0.1904, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 15.65, |
|
"learning_rate": 6.0799999999999994e-05, |
|
"loss": 0.1757, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 15.65, |
|
"eval_loss": 0.6387060284614563, |
|
"eval_runtime": 76.1573, |
|
"eval_samples_per_second": 22.322, |
|
"eval_steps_per_second": 1.405, |
|
"eval_wer": 0.6723798641528598, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 16.52, |
|
"learning_rate": 5.9261538461538453e-05, |
|
"loss": 0.1693, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 17.39, |
|
"learning_rate": 5.772307692307692e-05, |
|
"loss": 0.1572, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 18.26, |
|
"learning_rate": 5.618461538461538e-05, |
|
"loss": 0.1517, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 18.26, |
|
"eval_loss": 0.6607237458229065, |
|
"eval_runtime": 77.027, |
|
"eval_samples_per_second": 22.07, |
|
"eval_steps_per_second": 1.389, |
|
"eval_wer": 0.6265473243191773, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 19.13, |
|
"learning_rate": 5.464615384615384e-05, |
|
"loss": 0.1403, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 5.3107692307692305e-05, |
|
"loss": 0.1382, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 20.87, |
|
"learning_rate": 5.1569230769230765e-05, |
|
"loss": 0.1229, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 20.87, |
|
"eval_loss": 0.6519985198974609, |
|
"eval_runtime": 76.1295, |
|
"eval_samples_per_second": 22.33, |
|
"eval_steps_per_second": 1.405, |
|
"eval_wer": 0.6182949279502317, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 21.74, |
|
"learning_rate": 5.0030769230769225e-05, |
|
"loss": 0.129, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 22.61, |
|
"learning_rate": 4.849230769230769e-05, |
|
"loss": 0.1172, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 23.48, |
|
"learning_rate": 4.695384615384615e-05, |
|
"loss": 0.1201, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 23.48, |
|
"eval_loss": 0.67499840259552, |
|
"eval_runtime": 76.4187, |
|
"eval_samples_per_second": 22.246, |
|
"eval_steps_per_second": 1.4, |
|
"eval_wer": 0.6115660509109376, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 24.35, |
|
"learning_rate": 4.541538461538461e-05, |
|
"loss": 0.1112, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 25.22, |
|
"learning_rate": 4.387692307692307e-05, |
|
"loss": 0.1131, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 26.09, |
|
"learning_rate": 4.2338461538461536e-05, |
|
"loss": 0.1076, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 26.09, |
|
"eval_loss": 0.6698060035705566, |
|
"eval_runtime": 75.169, |
|
"eval_samples_per_second": 22.616, |
|
"eval_steps_per_second": 1.423, |
|
"eval_wer": 0.6006474957151019, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 26.96, |
|
"learning_rate": 4.0799999999999996e-05, |
|
"loss": 0.104, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 27.83, |
|
"learning_rate": 3.9261538461538455e-05, |
|
"loss": 0.1014, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 28.7, |
|
"learning_rate": 3.772307692307692e-05, |
|
"loss": 0.1006, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 28.7, |
|
"eval_loss": 0.680385947227478, |
|
"eval_runtime": 75.8528, |
|
"eval_samples_per_second": 22.412, |
|
"eval_steps_per_second": 1.411, |
|
"eval_wer": 0.5871897416365137, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 29.57, |
|
"learning_rate": 3.618461538461538e-05, |
|
"loss": 0.1013, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 30.43, |
|
"learning_rate": 3.464615384615384e-05, |
|
"loss": 0.0972, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 31.3, |
|
"learning_rate": 3.310769230769231e-05, |
|
"loss": 0.0952, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 31.3, |
|
"eval_loss": 0.7009897828102112, |
|
"eval_runtime": 74.9836, |
|
"eval_samples_per_second": 22.672, |
|
"eval_steps_per_second": 1.427, |
|
"eval_wer": 0.600266615882689, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 32.17, |
|
"learning_rate": 3.156923076923077e-05, |
|
"loss": 0.091, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 33.04, |
|
"learning_rate": 3.0030769230769226e-05, |
|
"loss": 0.0924, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 33.91, |
|
"learning_rate": 2.849230769230769e-05, |
|
"loss": 0.0894, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 33.91, |
|
"eval_loss": 0.7068695425987244, |
|
"eval_runtime": 74.6323, |
|
"eval_samples_per_second": 22.778, |
|
"eval_steps_per_second": 1.434, |
|
"eval_wer": 0.584015743033073, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 34.78, |
|
"learning_rate": 2.695384615384615e-05, |
|
"loss": 0.0869, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 35.65, |
|
"learning_rate": 2.5415384615384612e-05, |
|
"loss": 0.0848, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 36.52, |
|
"learning_rate": 2.3876923076923075e-05, |
|
"loss": 0.0873, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 36.52, |
|
"eval_loss": 0.6764819622039795, |
|
"eval_runtime": 75.5212, |
|
"eval_samples_per_second": 22.51, |
|
"eval_steps_per_second": 1.417, |
|
"eval_wer": 0.5781121056306735, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 37.39, |
|
"learning_rate": 2.2338461538461534e-05, |
|
"loss": 0.0845, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 38.26, |
|
"learning_rate": 2.0799999999999997e-05, |
|
"loss": 0.0813, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 39.13, |
|
"learning_rate": 1.9261538461538457e-05, |
|
"loss": 0.0798, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 39.13, |
|
"eval_loss": 0.6821776032447815, |
|
"eval_runtime": 76.5806, |
|
"eval_samples_per_second": 22.199, |
|
"eval_steps_per_second": 1.397, |
|
"eval_wer": 0.5617342728369199, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"learning_rate": 1.772307692307692e-05, |
|
"loss": 0.0783, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 40.87, |
|
"learning_rate": 1.6199999999999997e-05, |
|
"loss": 0.0753, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 41.74, |
|
"learning_rate": 1.466153846153846e-05, |
|
"loss": 0.0767, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 41.74, |
|
"eval_loss": 0.7003459334373474, |
|
"eval_runtime": 74.7077, |
|
"eval_samples_per_second": 22.755, |
|
"eval_steps_per_second": 1.432, |
|
"eval_wer": 0.5637656319431219, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 42.61, |
|
"learning_rate": 1.3123076923076922e-05, |
|
"loss": 0.0739, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 43.48, |
|
"learning_rate": 1.1584615384615385e-05, |
|
"loss": 0.0766, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 44.35, |
|
"learning_rate": 1.0046153846153846e-05, |
|
"loss": 0.0717, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 44.35, |
|
"eval_loss": 0.6969868540763855, |
|
"eval_runtime": 74.6644, |
|
"eval_samples_per_second": 22.769, |
|
"eval_steps_per_second": 1.433, |
|
"eval_wer": 0.5590681140100299, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 45.22, |
|
"learning_rate": 8.507692307692307e-06, |
|
"loss": 0.0732, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 46.09, |
|
"learning_rate": 6.9692307692307684e-06, |
|
"loss": 0.0713, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 46.96, |
|
"learning_rate": 5.4307692307692306e-06, |
|
"loss": 0.0687, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 46.96, |
|
"eval_loss": 0.7072671055793762, |
|
"eval_runtime": 74.5796, |
|
"eval_samples_per_second": 22.794, |
|
"eval_steps_per_second": 1.435, |
|
"eval_wer": 0.560337713451406, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 47.83, |
|
"learning_rate": 3.892307692307692e-06, |
|
"loss": 0.0674, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 48.7, |
|
"learning_rate": 2.3538461538461536e-06, |
|
"loss": 0.0702, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 49.57, |
|
"learning_rate": 8.153846153846153e-07, |
|
"loss": 0.0669, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 49.57, |
|
"eval_loss": 0.7059715390205383, |
|
"eval_runtime": 74.624, |
|
"eval_samples_per_second": 22.781, |
|
"eval_steps_per_second": 1.434, |
|
"eval_wer": 0.5558941154065892, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"step": 5750, |
|
"total_flos": 2.9609940258263142e+19, |
|
"train_loss": 0.7598760600297347, |
|
"train_runtime": 13000.5685, |
|
"train_samples_per_second": 14.138, |
|
"train_steps_per_second": 0.442 |
|
} |
|
], |
|
"max_steps": 5750, |
|
"num_train_epochs": 50, |
|
"total_flos": 2.9609940258263142e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|