|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.013162988772745, |
|
"eval_steps": 100, |
|
"global_step": 2600, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.07742934572202866, |
|
"eval_loss": 3.5325253009796143, |
|
"eval_runtime": 173.1277, |
|
"eval_samples_per_second": 32.67, |
|
"eval_steps_per_second": 4.084, |
|
"eval_wer": 1.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.1548586914440573, |
|
"eval_loss": 2.965233325958252, |
|
"eval_runtime": 171.2442, |
|
"eval_samples_per_second": 33.029, |
|
"eval_steps_per_second": 4.129, |
|
"eval_wer": 1.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.23228803716608595, |
|
"eval_loss": 2.8520941734313965, |
|
"eval_runtime": 171.3893, |
|
"eval_samples_per_second": 33.001, |
|
"eval_steps_per_second": 4.125, |
|
"eval_wer": 1.0, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.3097173828881146, |
|
"eval_loss": 1.247292160987854, |
|
"eval_runtime": 171.2844, |
|
"eval_samples_per_second": 33.021, |
|
"eval_steps_per_second": 4.128, |
|
"eval_wer": 0.8264832854552165, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.38714672861014326, |
|
"grad_norm": 1.9864516258239746, |
|
"learning_rate": 0.00029699999999999996, |
|
"loss": 3.7403, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.38714672861014326, |
|
"eval_loss": 0.9730328321456909, |
|
"eval_runtime": 172.0915, |
|
"eval_samples_per_second": 32.866, |
|
"eval_steps_per_second": 4.108, |
|
"eval_wer": 0.7234356694644605, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.4645760743321719, |
|
"eval_loss": 0.8328432440757751, |
|
"eval_runtime": 172.9073, |
|
"eval_samples_per_second": 32.711, |
|
"eval_steps_per_second": 4.089, |
|
"eval_wer": 0.6177560944295549, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.5420054200542005, |
|
"eval_loss": 0.7426055073738098, |
|
"eval_runtime": 173.8661, |
|
"eval_samples_per_second": 32.531, |
|
"eval_steps_per_second": 4.066, |
|
"eval_wer": 0.5505127505576864, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.6194347657762292, |
|
"eval_loss": 0.7127000093460083, |
|
"eval_runtime": 172.8776, |
|
"eval_samples_per_second": 32.717, |
|
"eval_steps_per_second": 4.09, |
|
"eval_wer": 0.5540113302627144, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.6968641114982579, |
|
"eval_loss": 0.6691900491714478, |
|
"eval_runtime": 171.858, |
|
"eval_samples_per_second": 32.911, |
|
"eval_steps_per_second": 4.114, |
|
"eval_wer": 0.5079680955208551, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.7742934572202865, |
|
"grad_norm": 3.1098098754882812, |
|
"learning_rate": 0.00022928571428571426, |
|
"loss": 0.7271, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.7742934572202865, |
|
"eval_loss": 0.6375711560249329, |
|
"eval_runtime": 171.5356, |
|
"eval_samples_per_second": 32.973, |
|
"eval_steps_per_second": 4.122, |
|
"eval_wer": 0.5256214793535652, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.8517228029423152, |
|
"eval_loss": 0.6119316816329956, |
|
"eval_runtime": 171.7234, |
|
"eval_samples_per_second": 32.937, |
|
"eval_steps_per_second": 4.117, |
|
"eval_wer": 0.47057501885702363, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.9291521486643438, |
|
"eval_loss": 0.5987285375595093, |
|
"eval_runtime": 171.8326, |
|
"eval_samples_per_second": 32.916, |
|
"eval_steps_per_second": 4.114, |
|
"eval_wer": 0.4651024698688835, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.0065814943863725, |
|
"eval_loss": 0.56138676404953, |
|
"eval_runtime": 173.244, |
|
"eval_samples_per_second": 32.648, |
|
"eval_steps_per_second": 4.081, |
|
"eval_wer": 0.4267304328288745, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.084010840108401, |
|
"eval_loss": 0.5463124513626099, |
|
"eval_runtime": 171.8011, |
|
"eval_samples_per_second": 32.922, |
|
"eval_steps_per_second": 4.115, |
|
"eval_wer": 0.4228948339779493, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.1614401858304297, |
|
"grad_norm": 0.41259104013442993, |
|
"learning_rate": 0.00015799999999999996, |
|
"loss": 0.5511, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.1614401858304297, |
|
"eval_loss": 0.5231888890266418, |
|
"eval_runtime": 173.7205, |
|
"eval_samples_per_second": 32.558, |
|
"eval_steps_per_second": 4.07, |
|
"eval_wer": 0.40788945772014573, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.2388695315524583, |
|
"eval_loss": 0.518454909324646, |
|
"eval_runtime": 172.0395, |
|
"eval_samples_per_second": 32.876, |
|
"eval_steps_per_second": 4.11, |
|
"eval_wer": 0.4029465102469869, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.316298877274487, |
|
"eval_loss": 0.5089535713195801, |
|
"eval_runtime": 171.996, |
|
"eval_samples_per_second": 32.884, |
|
"eval_steps_per_second": 4.111, |
|
"eval_wer": 0.4042303927075476, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.3937282229965158, |
|
"eval_loss": 0.47846707701683044, |
|
"eval_runtime": 173.133, |
|
"eval_samples_per_second": 32.669, |
|
"eval_steps_per_second": 4.084, |
|
"eval_wer": 0.38505239845292166, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.4711575687185443, |
|
"eval_loss": 0.47747060656547546, |
|
"eval_runtime": 172.0577, |
|
"eval_samples_per_second": 32.873, |
|
"eval_steps_per_second": 4.109, |
|
"eval_wer": 0.3802699362873329, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.5485869144405728, |
|
"grad_norm": 0.49539849162101746, |
|
"learning_rate": 8.685714285714285e-05, |
|
"loss": 0.4529, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.5485869144405728, |
|
"eval_loss": 0.46770602464675903, |
|
"eval_runtime": 172.1877, |
|
"eval_samples_per_second": 32.848, |
|
"eval_steps_per_second": 4.106, |
|
"eval_wer": 0.37218147678580027, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.6260162601626016, |
|
"eval_loss": 0.4573723077774048, |
|
"eval_runtime": 173.1529, |
|
"eval_samples_per_second": 32.665, |
|
"eval_steps_per_second": 4.083, |
|
"eval_wer": 0.3543997047070341, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.7034456058846303, |
|
"eval_loss": 0.4472625255584717, |
|
"eval_runtime": 173.3523, |
|
"eval_samples_per_second": 32.627, |
|
"eval_steps_per_second": 4.078, |
|
"eval_wer": 0.3561810916210621, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.7808749516066589, |
|
"eval_loss": 0.4436591863632202, |
|
"eval_runtime": 173.0139, |
|
"eval_samples_per_second": 32.691, |
|
"eval_steps_per_second": 4.086, |
|
"eval_wer": 0.34703342908956686, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.8583042973286876, |
|
"eval_loss": 0.43528568744659424, |
|
"eval_runtime": 173.1967, |
|
"eval_samples_per_second": 32.657, |
|
"eval_steps_per_second": 4.082, |
|
"eval_wer": 0.3450113142141837, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.9357336430507162, |
|
"grad_norm": 0.6141678094863892, |
|
"learning_rate": 1.557142857142857e-05, |
|
"loss": 0.4149, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.9357336430507162, |
|
"eval_loss": 0.4299843907356262, |
|
"eval_runtime": 172.6246, |
|
"eval_samples_per_second": 32.765, |
|
"eval_steps_per_second": 4.096, |
|
"eval_wer": 0.34008441527178185, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.013162988772745, |
|
"eval_loss": 0.4289664328098297, |
|
"eval_runtime": 173.2622, |
|
"eval_samples_per_second": 32.644, |
|
"eval_steps_per_second": 4.081, |
|
"eval_wer": 0.33783762096580056, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 2.013162988772745, |
|
"step": 2600, |
|
"total_flos": 1.1633941226063049e+19, |
|
"train_loss": 1.1463891924344576, |
|
"train_runtime": 8433.2953, |
|
"train_samples_per_second": 9.866, |
|
"train_steps_per_second": 0.308 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2600, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 400, |
|
"total_flos": 1.1633941226063049e+19, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|