|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 28.11244979919679, |
|
"global_step": 7000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 4.97e-05, |
|
"loss": 4.8083, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"eval_loss": 2.9298839569091797, |
|
"eval_runtime": 72.494, |
|
"eval_samples_per_second": 23.174, |
|
"eval_steps_per_second": 2.897, |
|
"eval_wer": 1.0, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 9.970000000000001e-05, |
|
"loss": 1.5432, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"eval_loss": 0.3672347962856293, |
|
"eval_runtime": 72.6811, |
|
"eval_samples_per_second": 23.115, |
|
"eval_steps_per_second": 2.889, |
|
"eval_wer": 0.41520226035421404, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"learning_rate": 9.231839258114375e-05, |
|
"loss": 0.305, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"eval_loss": 0.2641327381134033, |
|
"eval_runtime": 74.7155, |
|
"eval_samples_per_second": 22.485, |
|
"eval_steps_per_second": 2.811, |
|
"eval_wer": 0.32127351664254705, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 8.03, |
|
"learning_rate": 8.459041731066461e-05, |
|
"loss": 0.1614, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 8.03, |
|
"eval_loss": 0.2683059871196747, |
|
"eval_runtime": 73.5962, |
|
"eval_samples_per_second": 22.827, |
|
"eval_steps_per_second": 2.853, |
|
"eval_wer": 0.2949486596375164, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 10.04, |
|
"learning_rate": 7.686244204018547e-05, |
|
"loss": 0.1096, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 10.04, |
|
"eval_loss": 0.25842466950416565, |
|
"eval_runtime": 72.9953, |
|
"eval_samples_per_second": 23.015, |
|
"eval_steps_per_second": 2.877, |
|
"eval_wer": 0.2761353455998897, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 12.05, |
|
"learning_rate": 6.913446676970633e-05, |
|
"loss": 0.0849, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 12.05, |
|
"eval_loss": 0.2633412778377533, |
|
"eval_runtime": 73.117, |
|
"eval_samples_per_second": 22.977, |
|
"eval_steps_per_second": 2.872, |
|
"eval_wer": 0.26834814967955345, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 14.06, |
|
"learning_rate": 6.14064914992272e-05, |
|
"loss": 0.0666, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 14.06, |
|
"eval_loss": 0.2584041357040405, |
|
"eval_runtime": 72.8822, |
|
"eval_samples_per_second": 23.051, |
|
"eval_steps_per_second": 2.881, |
|
"eval_wer": 0.2597339949004204, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 16.06, |
|
"learning_rate": 5.3678516228748066e-05, |
|
"loss": 0.0558, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 16.06, |
|
"eval_loss": 0.25760215520858765, |
|
"eval_runtime": 72.4376, |
|
"eval_samples_per_second": 23.192, |
|
"eval_steps_per_second": 2.899, |
|
"eval_wer": 0.24939700916546068, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 18.07, |
|
"learning_rate": 4.5950540958268935e-05, |
|
"loss": 0.0455, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 18.07, |
|
"eval_loss": 0.2717011272907257, |
|
"eval_runtime": 73.3321, |
|
"eval_samples_per_second": 22.909, |
|
"eval_steps_per_second": 2.864, |
|
"eval_wer": 0.2531183240300462, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 20.08, |
|
"learning_rate": 3.8222565687789804e-05, |
|
"loss": 0.0422, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 20.08, |
|
"eval_loss": 0.29290544986724854, |
|
"eval_runtime": 74.3543, |
|
"eval_samples_per_second": 22.595, |
|
"eval_steps_per_second": 2.824, |
|
"eval_wer": 0.2513265798359865, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 22.09, |
|
"learning_rate": 3.0494590417310663e-05, |
|
"loss": 0.0383, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 22.09, |
|
"eval_loss": 0.27808326482772827, |
|
"eval_runtime": 73.2045, |
|
"eval_samples_per_second": 22.949, |
|
"eval_steps_per_second": 2.869, |
|
"eval_wer": 0.2480187444007994, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 24.1, |
|
"learning_rate": 2.2766615146831533e-05, |
|
"loss": 0.033, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 24.1, |
|
"eval_loss": 0.2956600785255432, |
|
"eval_runtime": 73.3873, |
|
"eval_samples_per_second": 22.892, |
|
"eval_steps_per_second": 2.862, |
|
"eval_wer": 0.2503617945007236, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 26.1, |
|
"learning_rate": 1.5038639876352395e-05, |
|
"loss": 0.0314, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 26.1, |
|
"eval_loss": 0.2865420877933502, |
|
"eval_runtime": 73.2994, |
|
"eval_samples_per_second": 22.92, |
|
"eval_steps_per_second": 2.865, |
|
"eval_wer": 0.24863896354489698, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 28.11, |
|
"learning_rate": 7.310664605873262e-06, |
|
"loss": 0.0308, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 28.11, |
|
"eval_loss": 0.29938989877700806, |
|
"eval_runtime": 73.3049, |
|
"eval_samples_per_second": 22.918, |
|
"eval_steps_per_second": 2.865, |
|
"eval_wer": 0.24732961201846876, |
|
"step": 7000 |
|
} |
|
], |
|
"max_steps": 7470, |
|
"num_train_epochs": 30, |
|
"total_flos": 9.6467224304247e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|