|
{ |
|
"best_metric": 6.602455701786633, |
|
"best_model_checkpoint": "checkpoint-130000", |
|
"epoch": 98.21512890735669, |
|
"eval_steps": 10000, |
|
"global_step": 130000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 7.56, |
|
"learning_rate": 3.7792894935752085e-05, |
|
"loss": 3.1181, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 7.56, |
|
"eval_cer": 12.22214052888268, |
|
"eval_loss": 0.39288055896759033, |
|
"eval_runtime": 80.6046, |
|
"eval_samples_per_second": 6.178, |
|
"eval_steps_per_second": 3.089, |
|
"eval_wer": 38.847117794486216, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 15.11, |
|
"learning_rate": 7.558578987150417e-05, |
|
"loss": 0.4086, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 15.11, |
|
"eval_cer": 9.646349533122564, |
|
"eval_loss": 0.3199174404144287, |
|
"eval_runtime": 78.777, |
|
"eval_samples_per_second": 6.322, |
|
"eval_steps_per_second": 3.161, |
|
"eval_wer": 30.367585630743527, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 22.67, |
|
"learning_rate": 9.665532879818595e-05, |
|
"loss": 0.3126, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 22.67, |
|
"eval_cer": 9.011592284881013, |
|
"eval_loss": 0.3147233724594116, |
|
"eval_runtime": 77.284, |
|
"eval_samples_per_second": 6.444, |
|
"eval_steps_per_second": 3.222, |
|
"eval_wer": 28.390420495683657, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 30.22, |
|
"learning_rate": 8.720710506424793e-05, |
|
"loss": 0.2509, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 30.22, |
|
"eval_cer": 8.134205818199643, |
|
"eval_loss": 0.3039480447769165, |
|
"eval_runtime": 68.0983, |
|
"eval_samples_per_second": 7.313, |
|
"eval_steps_per_second": 3.656, |
|
"eval_wer": 26.427179058758004, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 37.78, |
|
"learning_rate": 7.77588813303099e-05, |
|
"loss": 0.2084, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 37.78, |
|
"eval_cer": 7.7028649854177385, |
|
"eval_loss": 0.2937542498111725, |
|
"eval_runtime": 66.6192, |
|
"eval_samples_per_second": 7.475, |
|
"eval_steps_per_second": 3.738, |
|
"eval_wer": 25.382901698691175, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 45.33, |
|
"learning_rate": 6.831065759637189e-05, |
|
"loss": 0.1794, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 45.33, |
|
"eval_cer": 7.658750582065044, |
|
"eval_loss": 0.3241848349571228, |
|
"eval_runtime": 65.9465, |
|
"eval_samples_per_second": 7.552, |
|
"eval_steps_per_second": 3.776, |
|
"eval_wer": 24.770258980785297, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 52.89, |
|
"learning_rate": 5.886243386243386e-05, |
|
"loss": 0.1566, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 52.89, |
|
"eval_cer": 7.2984829546847045, |
|
"eval_loss": 0.33441099524497986, |
|
"eval_runtime": 66.4973, |
|
"eval_samples_per_second": 7.489, |
|
"eval_steps_per_second": 3.745, |
|
"eval_wer": 24.33862433862434, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 60.44, |
|
"learning_rate": 4.9414210128495846e-05, |
|
"loss": 0.1381, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 60.44, |
|
"eval_cer": 7.394064161948877, |
|
"eval_loss": 0.3713204860687256, |
|
"eval_runtime": 66.5533, |
|
"eval_samples_per_second": 7.483, |
|
"eval_steps_per_second": 3.741, |
|
"eval_wer": 23.62851573377889, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"learning_rate": 3.9965986394557825e-05, |
|
"loss": 0.1227, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_cer": 7.109771340342622, |
|
"eval_loss": 0.3827340006828308, |
|
"eval_runtime": 66.2381, |
|
"eval_samples_per_second": 7.518, |
|
"eval_steps_per_second": 3.759, |
|
"eval_wer": 22.946254525201894, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 75.55, |
|
"learning_rate": 3.0517762660619804e-05, |
|
"loss": 0.1097, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 75.55, |
|
"eval_cer": 7.124476141460187, |
|
"eval_loss": 0.415243923664093, |
|
"eval_runtime": 149.6844, |
|
"eval_samples_per_second": 3.327, |
|
"eval_steps_per_second": 1.663, |
|
"eval_wer": 22.988025619604567, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 83.11, |
|
"learning_rate": 2.1069538926681782e-05, |
|
"loss": 0.0988, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 83.11, |
|
"eval_cer": 6.854888120971498, |
|
"eval_loss": 0.4487506151199341, |
|
"eval_runtime": 66.6595, |
|
"eval_samples_per_second": 7.471, |
|
"eval_steps_per_second": 3.735, |
|
"eval_wer": 22.570314675577833, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 90.66, |
|
"learning_rate": 1.1621315192743764e-05, |
|
"loss": 0.0896, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 90.66, |
|
"eval_cer": 6.808322917432541, |
|
"eval_loss": 0.44282594323158264, |
|
"eval_runtime": 66.7552, |
|
"eval_samples_per_second": 7.46, |
|
"eval_steps_per_second": 3.73, |
|
"eval_wer": 21.99944305207463, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 98.22, |
|
"learning_rate": 2.1730914588057445e-06, |
|
"loss": 0.0823, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 98.22, |
|
"eval_cer": 6.602455701786633, |
|
"eval_loss": 0.4513276517391205, |
|
"eval_runtime": 66.6171, |
|
"eval_samples_per_second": 7.476, |
|
"eval_steps_per_second": 3.738, |
|
"eval_wer": 21.74881648565859, |
|
"step": 130000 |
|
} |
|
], |
|
"logging_steps": 10000, |
|
"max_steps": 132300, |
|
"num_train_epochs": 100, |
|
"save_steps": 10000, |
|
"total_flos": 4.063479904391249e+20, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|