|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 10.0, |
|
"global_step": 6250, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.000276096, |
|
"loss": 2.6773, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_loss": 1.390709400177002, |
|
"eval_runtime": 221.8176, |
|
"eval_samples_per_second": 22.541, |
|
"eval_steps_per_second": 2.818, |
|
"eval_wer": 0.9864438767290387, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.00025209599999999994, |
|
"loss": 0.9526, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_loss": 0.7760354280471802, |
|
"eval_runtime": 219.0168, |
|
"eval_samples_per_second": 22.829, |
|
"eval_steps_per_second": 2.854, |
|
"eval_wer": 0.644847982692352, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 0.00022809599999999998, |
|
"loss": 0.6418, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"eval_loss": 0.760543942451477, |
|
"eval_runtime": 217.3821, |
|
"eval_samples_per_second": 23.001, |
|
"eval_steps_per_second": 2.875, |
|
"eval_wer": 0.6193928513889848, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 0.000204096, |
|
"loss": 0.5028, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"eval_loss": 0.6515631675720215, |
|
"eval_runtime": 217.5138, |
|
"eval_samples_per_second": 22.987, |
|
"eval_steps_per_second": 2.873, |
|
"eval_wer": 0.532187161959999, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 0.00018009599999999998, |
|
"loss": 0.4133, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 0.630308210849762, |
|
"eval_runtime": 220.9761, |
|
"eval_samples_per_second": 22.627, |
|
"eval_steps_per_second": 2.828, |
|
"eval_wer": 0.5097010287923772, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 0.000156096, |
|
"loss": 0.3285, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"eval_loss": 0.6422050595283508, |
|
"eval_runtime": 216.2337, |
|
"eval_samples_per_second": 23.123, |
|
"eval_steps_per_second": 2.89, |
|
"eval_wer": 0.5062487053787199, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"learning_rate": 0.00013209599999999998, |
|
"loss": 0.2764, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"eval_loss": 0.5936103463172913, |
|
"eval_runtime": 221.9715, |
|
"eval_samples_per_second": 22.525, |
|
"eval_steps_per_second": 2.816, |
|
"eval_wer": 0.47476351584616444, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 0.00010809599999999998, |
|
"loss": 0.2361, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"eval_loss": 0.6486021280288696, |
|
"eval_runtime": 217.2436, |
|
"eval_samples_per_second": 23.016, |
|
"eval_steps_per_second": 2.877, |
|
"eval_wer": 0.4683191788073373, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"learning_rate": 8.4144e-05, |
|
"loss": 0.2049, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"eval_loss": 0.6320939064025879, |
|
"eval_runtime": 216.2222, |
|
"eval_samples_per_second": 23.124, |
|
"eval_steps_per_second": 2.891, |
|
"eval_wer": 0.453198002255518, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 6.014399999999999e-05, |
|
"loss": 0.176, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 0.6229676604270935, |
|
"eval_runtime": 217.7281, |
|
"eval_samples_per_second": 22.964, |
|
"eval_steps_per_second": 2.871, |
|
"eval_wer": 0.44822665653985133, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"learning_rate": 3.6191999999999995e-05, |
|
"loss": 0.1393, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"eval_loss": 0.6595410108566284, |
|
"eval_runtime": 217.4753, |
|
"eval_samples_per_second": 22.991, |
|
"eval_steps_per_second": 2.874, |
|
"eval_wer": 0.4402863126884393, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"learning_rate": 1.2192e-05, |
|
"loss": 0.1141, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"eval_loss": 0.6552333831787109, |
|
"eval_runtime": 216.5487, |
|
"eval_samples_per_second": 23.089, |
|
"eval_steps_per_second": 2.886, |
|
"eval_wer": 0.43480862620543625, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 6250, |
|
"total_flos": 1.0733519285960786e+19, |
|
"train_loss": 0.5376352182006836, |
|
"train_runtime": 6902.5833, |
|
"train_samples_per_second": 14.487, |
|
"train_steps_per_second": 0.905 |
|
} |
|
], |
|
"max_steps": 6250, |
|
"num_train_epochs": 10, |
|
"total_flos": 1.0733519285960786e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|