|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 5.999854956849663, |
|
"eval_steps": 4000, |
|
"global_step": 124098, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.756372971128695e-05, |
|
"loss": 0.7054, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_cer": 0.02274243901983751, |
|
"eval_loss": 0.10111288726329803, |
|
"eval_runtime": 66.987, |
|
"eval_samples_per_second": 61.131, |
|
"eval_steps_per_second": 3.822, |
|
"eval_wer": 0.08709714849302352, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 9.431509853937514e-05, |
|
"loss": 0.0856, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_cer": 0.020731155298169313, |
|
"eval_loss": 0.09945787489414215, |
|
"eval_runtime": 67.6612, |
|
"eval_samples_per_second": 60.522, |
|
"eval_steps_per_second": 3.784, |
|
"eval_wer": 0.07468163490267746, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 9.106727972834653e-05, |
|
"loss": 0.075, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_cer": 0.018470211068126987, |
|
"eval_loss": 0.08679623156785965, |
|
"eval_runtime": 72.7557, |
|
"eval_samples_per_second": 56.284, |
|
"eval_steps_per_second": 3.519, |
|
"eval_wer": 0.06474632320946828, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 8.781864855643472e-05, |
|
"loss": 0.0694, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_cer": 0.01828354900579119, |
|
"eval_loss": 0.08532032370567322, |
|
"eval_runtime": 71.0749, |
|
"eval_samples_per_second": 57.615, |
|
"eval_steps_per_second": 3.602, |
|
"eval_wer": 0.06190351869579091, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 8.45708297454061e-05, |
|
"loss": 0.0658, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_cer": 0.017056245945933333, |
|
"eval_loss": 0.0778348445892334, |
|
"eval_runtime": 68.8966, |
|
"eval_samples_per_second": 59.437, |
|
"eval_steps_per_second": 3.716, |
|
"eval_wer": 0.057262205204072755, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 8.13230109343775e-05, |
|
"loss": 0.0589, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"eval_cer": 0.016645589408794584, |
|
"eval_loss": 0.08211962133646011, |
|
"eval_runtime": 71.5048, |
|
"eval_samples_per_second": 57.269, |
|
"eval_steps_per_second": 3.58, |
|
"eval_wer": 0.05463696226031967, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 7.807437976246568e-05, |
|
"loss": 0.0572, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"eval_cer": 0.0169955807756742, |
|
"eval_loss": 0.08269500732421875, |
|
"eval_runtime": 64.9267, |
|
"eval_samples_per_second": 63.071, |
|
"eval_steps_per_second": 3.943, |
|
"eval_wer": 0.05579729063324921, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 7.482656095143708e-05, |
|
"loss": 0.0551, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"eval_cer": 0.016853250953143155, |
|
"eval_loss": 0.08304612338542938, |
|
"eval_runtime": 67.5544, |
|
"eval_samples_per_second": 60.618, |
|
"eval_steps_per_second": 3.79, |
|
"eval_wer": 0.05333159284077394, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 7.157874214040847e-05, |
|
"loss": 0.054, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"eval_cer": 0.016157934770942316, |
|
"eval_loss": 0.0788031816482544, |
|
"eval_runtime": 68.7321, |
|
"eval_samples_per_second": 59.579, |
|
"eval_steps_per_second": 3.725, |
|
"eval_wer": 0.05122849766483915, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 6.832929860761345e-05, |
|
"loss": 0.0524, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"eval_cer": 0.015562949447246968, |
|
"eval_loss": 0.07834739238023758, |
|
"eval_runtime": 66.4802, |
|
"eval_samples_per_second": 61.597, |
|
"eval_steps_per_second": 3.851, |
|
"eval_wer": 0.04893684912830331, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 6.508147979658484e-05, |
|
"loss": 0.048, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"eval_cer": 0.016024938051528062, |
|
"eval_loss": 0.08605939149856567, |
|
"eval_runtime": 68.4968, |
|
"eval_samples_per_second": 59.784, |
|
"eval_steps_per_second": 3.737, |
|
"eval_wer": 0.04918341890755084, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 6.183284862467303e-05, |
|
"loss": 0.046, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"eval_cer": 0.015420619624715923, |
|
"eval_loss": 0.07625599950551987, |
|
"eval_runtime": 73.2954, |
|
"eval_samples_per_second": 55.87, |
|
"eval_steps_per_second": 3.493, |
|
"eval_wer": 0.049400980477475126, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 5.8584217452761215e-05, |
|
"loss": 0.0456, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"eval_cer": 0.015303955835756052, |
|
"eval_loss": 0.08352649956941605, |
|
"eval_runtime": 71.5001, |
|
"eval_samples_per_second": 57.273, |
|
"eval_steps_per_second": 3.58, |
|
"eval_wer": 0.047094827836277664, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 5.53363986417326e-05, |
|
"loss": 0.0439, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"eval_cer": 0.015152292910108217, |
|
"eval_loss": 0.07897598296403885, |
|
"eval_runtime": 71.5932, |
|
"eval_samples_per_second": 57.198, |
|
"eval_steps_per_second": 3.576, |
|
"eval_wer": 0.04686276216169176, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 5.2088579830704e-05, |
|
"loss": 0.0436, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"eval_cer": 0.0155349501378966, |
|
"eval_loss": 0.08321597427129745, |
|
"eval_runtime": 69.796, |
|
"eval_samples_per_second": 58.671, |
|
"eval_steps_per_second": 3.668, |
|
"eval_wer": 0.047196356568909, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 4.8840761019675384e-05, |
|
"loss": 0.0406, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"eval_cer": 0.014837300679916562, |
|
"eval_loss": 0.08103086799383163, |
|
"eval_runtime": 69.1811, |
|
"eval_samples_per_second": 59.192, |
|
"eval_steps_per_second": 3.7, |
|
"eval_wer": 0.04416499869463058, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"learning_rate": 4.559212984776357e-05, |
|
"loss": 0.0386, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"eval_cer": 0.014624972584009594, |
|
"eval_loss": 0.08100830018520355, |
|
"eval_runtime": 72.8594, |
|
"eval_samples_per_second": 56.204, |
|
"eval_steps_per_second": 3.514, |
|
"eval_wer": 0.043628346822150665, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 4.234349867585176e-05, |
|
"loss": 0.038, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"eval_cer": 0.014267981389792385, |
|
"eval_loss": 0.07782719284296036, |
|
"eval_runtime": 73.706, |
|
"eval_samples_per_second": 55.559, |
|
"eval_steps_per_second": 3.473, |
|
"eval_wer": 0.04301917442636266, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 3.909567986482315e-05, |
|
"loss": 0.0373, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"eval_cer": 0.014431310694336206, |
|
"eval_loss": 0.07849407941102982, |
|
"eval_runtime": 74.4035, |
|
"eval_samples_per_second": 55.038, |
|
"eval_steps_per_second": 3.441, |
|
"eval_wer": 0.042975662112377806, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 3.584786105379454e-05, |
|
"loss": 0.0363, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"eval_cer": 0.014375312075635468, |
|
"eval_loss": 0.07884030044078827, |
|
"eval_runtime": 68.8676, |
|
"eval_samples_per_second": 59.462, |
|
"eval_steps_per_second": 3.717, |
|
"eval_wer": 0.042119919937342265, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 3.259922988188273e-05, |
|
"loss": 0.0348, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"eval_cer": 0.01435431259362269, |
|
"eval_loss": 0.08232194930315018, |
|
"eval_runtime": 67.1145, |
|
"eval_samples_per_second": 61.015, |
|
"eval_steps_per_second": 3.814, |
|
"eval_wer": 0.04232297740260494, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"learning_rate": 2.935059870997092e-05, |
|
"loss": 0.0323, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"eval_cer": 0.014316980181155532, |
|
"eval_loss": 0.08194055408239365, |
|
"eval_runtime": 76.1231, |
|
"eval_samples_per_second": 53.794, |
|
"eval_steps_per_second": 3.363, |
|
"eval_wer": 0.04068401357584196, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"learning_rate": 2.610277989894231e-05, |
|
"loss": 0.0319, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"eval_cer": 0.014167650531286895, |
|
"eval_loss": 0.08085062354803085, |
|
"eval_runtime": 74.2762, |
|
"eval_samples_per_second": 55.132, |
|
"eval_steps_per_second": 3.447, |
|
"eval_wer": 0.041032112087720826, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 4.64, |
|
"learning_rate": 2.2854148727030496e-05, |
|
"loss": 0.0314, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 4.64, |
|
"eval_cer": 0.01383399209486166, |
|
"eval_loss": 0.08210451155900955, |
|
"eval_runtime": 73.1195, |
|
"eval_samples_per_second": 56.004, |
|
"eval_steps_per_second": 3.501, |
|
"eval_wer": 0.040045832970730715, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"learning_rate": 1.9606329916001885e-05, |
|
"loss": 0.0306, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"eval_cer": 0.013700995375447406, |
|
"eval_loss": 0.08130906522274017, |
|
"eval_runtime": 70.4204, |
|
"eval_samples_per_second": 58.151, |
|
"eval_steps_per_second": 3.635, |
|
"eval_wer": 0.03885649638847794, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"learning_rate": 1.6358511104973274e-05, |
|
"loss": 0.0295, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"eval_cer": 0.013115343154868847, |
|
"eval_loss": 0.0819702297449112, |
|
"eval_runtime": 67.1904, |
|
"eval_samples_per_second": 60.946, |
|
"eval_steps_per_second": 3.81, |
|
"eval_wer": 0.037710672120210016, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"learning_rate": 1.3109879933061465e-05, |
|
"loss": 0.0275, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"eval_cer": 0.013700995375447406, |
|
"eval_loss": 0.08659966289997101, |
|
"eval_runtime": 75.5877, |
|
"eval_samples_per_second": 54.176, |
|
"eval_steps_per_second": 3.387, |
|
"eval_wer": 0.037826704957502975, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 5.41, |
|
"learning_rate": 9.862061122032852e-06, |
|
"loss": 0.0267, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 5.41, |
|
"eval_cer": 0.013351004008567788, |
|
"eval_loss": 0.08306384831666946, |
|
"eval_runtime": 71.1622, |
|
"eval_samples_per_second": 57.545, |
|
"eval_steps_per_second": 3.597, |
|
"eval_wer": 0.03755112696893221, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 5.61, |
|
"learning_rate": 6.6134299501210415e-06, |
|
"loss": 0.0264, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 5.61, |
|
"eval_cer": 0.013194674531361559, |
|
"eval_loss": 0.08454854041337967, |
|
"eval_runtime": 69.2173, |
|
"eval_samples_per_second": 59.162, |
|
"eval_steps_per_second": 3.698, |
|
"eval_wer": 0.03691294636382096, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 5.8, |
|
"learning_rate": 3.364798778209232e-06, |
|
"loss": 0.0258, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 5.8, |
|
"eval_cer": 0.013325337974996616, |
|
"eval_loss": 0.08589179813861847, |
|
"eval_runtime": 73.5677, |
|
"eval_samples_per_second": 55.663, |
|
"eval_steps_per_second": 3.48, |
|
"eval_wer": 0.037028979201113914, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 1.1616760629742156e-07, |
|
"loss": 0.0254, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_cer": 0.013243673322724706, |
|
"eval_loss": 0.0846036821603775, |
|
"eval_runtime": 67.6027, |
|
"eval_samples_per_second": 60.575, |
|
"eval_steps_per_second": 3.787, |
|
"eval_wer": 0.03666637658457343, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"step": 124098, |
|
"total_flos": 8.637354948645209e+20, |
|
"train_loss": 0.0650978993577199, |
|
"train_runtime": 137826.0599, |
|
"train_samples_per_second": 28.813, |
|
"train_steps_per_second": 0.9 |
|
} |
|
], |
|
"logging_steps": 4000, |
|
"max_steps": 124098, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 6, |
|
"save_steps": 500, |
|
"total_flos": 8.637354948645209e+20, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|