|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.416856492027335, |
|
"eval_steps": 200, |
|
"global_step": 6000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.11389521640091116, |
|
"eval_loss": 3.107494354248047, |
|
"eval_runtime": 174.2934, |
|
"eval_samples_per_second": 40.288, |
|
"eval_steps_per_second": 5.037, |
|
"eval_wer": 1.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.22779043280182232, |
|
"eval_loss": 1.6248737573623657, |
|
"eval_runtime": 174.1392, |
|
"eval_samples_per_second": 40.324, |
|
"eval_steps_per_second": 5.042, |
|
"eval_wer": 0.8958131253404908, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.2847380410022779, |
|
"grad_norm": 3.3784055709838867, |
|
"learning_rate": 0.00029699999999999996, |
|
"loss": 3.6043, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.3416856492027335, |
|
"eval_loss": 1.1246436834335327, |
|
"eval_runtime": 174.2087, |
|
"eval_samples_per_second": 40.308, |
|
"eval_steps_per_second": 5.04, |
|
"eval_wer": 0.8675772997249498, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.45558086560364464, |
|
"eval_loss": 0.9142205715179443, |
|
"eval_runtime": 174.1428, |
|
"eval_samples_per_second": 40.323, |
|
"eval_steps_per_second": 5.042, |
|
"eval_wer": 0.7093902390411778, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.5694760820045558, |
|
"grad_norm": 2.1683263778686523, |
|
"learning_rate": 0.0002730545454545454, |
|
"loss": 1.0665, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.5694760820045558, |
|
"eval_loss": 0.7569850087165833, |
|
"eval_runtime": 173.8805, |
|
"eval_samples_per_second": 40.384, |
|
"eval_steps_per_second": 5.049, |
|
"eval_wer": 0.5934306860309067, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.683371298405467, |
|
"eval_loss": 0.6979650259017944, |
|
"eval_runtime": 174.537, |
|
"eval_samples_per_second": 40.232, |
|
"eval_steps_per_second": 5.03, |
|
"eval_wer": 0.5284816433914881, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.7972665148063781, |
|
"eval_loss": 0.6941590905189514, |
|
"eval_runtime": 174.9006, |
|
"eval_samples_per_second": 40.149, |
|
"eval_steps_per_second": 5.02, |
|
"eval_wer": 0.5388990021127041, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.8542141230068337, |
|
"grad_norm": 2.244067907333374, |
|
"learning_rate": 0.0002459454545454545, |
|
"loss": 0.8811, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.9111617312072893, |
|
"eval_loss": 0.6152143478393555, |
|
"eval_runtime": 175.8479, |
|
"eval_samples_per_second": 39.932, |
|
"eval_steps_per_second": 4.993, |
|
"eval_wer": 0.4861611235865478, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.0250569476082005, |
|
"eval_loss": 0.5941163301467896, |
|
"eval_runtime": 174.9993, |
|
"eval_samples_per_second": 40.126, |
|
"eval_steps_per_second": 5.017, |
|
"eval_wer": 0.4534607156619142, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.1389521640091116, |
|
"grad_norm": 2.146209478378296, |
|
"learning_rate": 0.00021878181818181817, |
|
"loss": 0.7654, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.1389521640091116, |
|
"eval_loss": 0.5642105340957642, |
|
"eval_runtime": 174.8742, |
|
"eval_samples_per_second": 40.155, |
|
"eval_steps_per_second": 5.021, |
|
"eval_wer": 0.43286517227175486, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.2528473804100229, |
|
"eval_loss": 0.5727251768112183, |
|
"eval_runtime": 175.3644, |
|
"eval_samples_per_second": 40.042, |
|
"eval_steps_per_second": 5.007, |
|
"eval_wer": 0.43133711582667855, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.366742596810934, |
|
"eval_loss": 0.5466997027397156, |
|
"eval_runtime": 175.0585, |
|
"eval_samples_per_second": 40.112, |
|
"eval_steps_per_second": 5.015, |
|
"eval_wer": 0.4316825894577393, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.4236902050113895, |
|
"grad_norm": 1.5887110233306885, |
|
"learning_rate": 0.00019167272727272725, |
|
"loss": 0.6896, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.4806378132118452, |
|
"eval_loss": 0.5658189654350281, |
|
"eval_runtime": 178.0734, |
|
"eval_samples_per_second": 39.433, |
|
"eval_steps_per_second": 4.931, |
|
"eval_wer": 0.43980121978766656, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.5945330296127562, |
|
"eval_loss": 0.6008076071739197, |
|
"eval_runtime": 175.5831, |
|
"eval_samples_per_second": 39.992, |
|
"eval_steps_per_second": 5.0, |
|
"eval_wer": 0.4445182635963805, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.7084282460136673, |
|
"grad_norm": 3.4384372234344482, |
|
"learning_rate": 0.0001645090909090909, |
|
"loss": 0.7558, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.7084282460136673, |
|
"eval_loss": 0.7089642286300659, |
|
"eval_runtime": 175.5809, |
|
"eval_samples_per_second": 39.993, |
|
"eval_steps_per_second": 5.001, |
|
"eval_wer": 0.5020927729573872, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.8223234624145785, |
|
"eval_loss": 0.8049871921539307, |
|
"eval_runtime": 175.5153, |
|
"eval_samples_per_second": 40.008, |
|
"eval_steps_per_second": 5.002, |
|
"eval_wer": 0.5291327283115641, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.9362186788154898, |
|
"eval_loss": 0.824817955493927, |
|
"eval_runtime": 175.9347, |
|
"eval_samples_per_second": 39.913, |
|
"eval_steps_per_second": 4.99, |
|
"eval_wer": 0.5275382346297453, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.9931662870159452, |
|
"grad_norm": 3.6540627479553223, |
|
"learning_rate": 0.00013723636363636361, |
|
"loss": 0.9823, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.050113895216401, |
|
"eval_loss": 0.7944601774215698, |
|
"eval_runtime": 176.7964, |
|
"eval_samples_per_second": 39.718, |
|
"eval_steps_per_second": 4.966, |
|
"eval_wer": 0.5072748774232982, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 2.164009111617312, |
|
"eval_loss": 0.7690120935440063, |
|
"eval_runtime": 177.7491, |
|
"eval_samples_per_second": 39.505, |
|
"eval_steps_per_second": 4.94, |
|
"eval_wer": 0.4982128383316281, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 2.277904328018223, |
|
"grad_norm": 12.11134147644043, |
|
"learning_rate": 0.00011001818181818181, |
|
"loss": 0.9181, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.277904328018223, |
|
"eval_loss": 0.8668798804283142, |
|
"eval_runtime": 176.748, |
|
"eval_samples_per_second": 39.729, |
|
"eval_steps_per_second": 4.968, |
|
"eval_wer": 0.6929802415657927, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.3917995444191344, |
|
"eval_loss": 0.88874351978302, |
|
"eval_runtime": 177.153, |
|
"eval_samples_per_second": 39.638, |
|
"eval_steps_per_second": 4.956, |
|
"eval_wer": 0.6311271741585724, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 2.5056947608200457, |
|
"eval_loss": 1.150854468345642, |
|
"eval_runtime": 176.8956, |
|
"eval_samples_per_second": 39.696, |
|
"eval_steps_per_second": 4.963, |
|
"eval_wer": 0.8436200321556225, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 2.562642369020501, |
|
"grad_norm": 6.307694911956787, |
|
"learning_rate": 8.285454545454545e-05, |
|
"loss": 1.0805, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.619589977220957, |
|
"eval_loss": 1.2166552543640137, |
|
"eval_runtime": 176.6784, |
|
"eval_samples_per_second": 39.745, |
|
"eval_steps_per_second": 4.969, |
|
"eval_wer": 0.9458935143969492, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 2.733485193621868, |
|
"eval_loss": 1.2323366403579712, |
|
"eval_runtime": 177.2283, |
|
"eval_samples_per_second": 39.621, |
|
"eval_steps_per_second": 4.954, |
|
"eval_wer": 0.9307059620776252, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 2.847380410022779, |
|
"grad_norm": 0.8214463591575623, |
|
"learning_rate": 5.569090909090908e-05, |
|
"loss": 1.3977, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.847380410022779, |
|
"eval_loss": 1.65969979763031, |
|
"eval_runtime": 176.9601, |
|
"eval_samples_per_second": 39.681, |
|
"eval_steps_per_second": 4.962, |
|
"eval_wer": 0.9527896995708155, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.9612756264236904, |
|
"eval_loss": 2.022368907928467, |
|
"eval_runtime": 176.5944, |
|
"eval_samples_per_second": 39.763, |
|
"eval_steps_per_second": 4.972, |
|
"eval_wer": 0.9162359319151198, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 3.075170842824601, |
|
"eval_loss": 2.105426788330078, |
|
"eval_runtime": 176.3683, |
|
"eval_samples_per_second": 39.814, |
|
"eval_steps_per_second": 4.978, |
|
"eval_wer": 0.9103761676344357, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 3.132118451025057, |
|
"grad_norm": 0.6184139251708984, |
|
"learning_rate": 2.8472727272727272e-05, |
|
"loss": 2.1028, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 3.1890660592255125, |
|
"eval_loss": 2.248642683029175, |
|
"eval_runtime": 175.7372, |
|
"eval_samples_per_second": 39.957, |
|
"eval_steps_per_second": 4.996, |
|
"eval_wer": 0.9280750475026243, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 3.3029612756264237, |
|
"eval_loss": 2.2446603775024414, |
|
"eval_runtime": 176.6127, |
|
"eval_samples_per_second": 39.759, |
|
"eval_steps_per_second": 4.971, |
|
"eval_wer": 0.9215774857492127, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 3.416856492027335, |
|
"grad_norm": 0.6574464440345764, |
|
"learning_rate": 1.3636363636363634e-06, |
|
"loss": 2.2911, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 3.416856492027335, |
|
"eval_loss": 2.2309672832489014, |
|
"eval_runtime": 176.1017, |
|
"eval_samples_per_second": 39.875, |
|
"eval_steps_per_second": 4.986, |
|
"eval_wer": 0.9239559388245924, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 3.416856492027335, |
|
"step": 6000, |
|
"total_flos": 1.116875973325552e+19, |
|
"train_loss": 1.37794411722819, |
|
"train_runtime": 8509.4041, |
|
"train_samples_per_second": 11.282, |
|
"train_steps_per_second": 0.705 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 6000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.116875973325552e+19, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|