{ "best_metric": null, "best_model_checkpoint": null, "epoch": 5.999854956849663, "eval_steps": 4000, "global_step": 124098, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.19, "learning_rate": 9.756372971128695e-05, "loss": 0.7054, "step": 4000 }, { "epoch": 0.19, "eval_cer": 0.02274243901983751, "eval_loss": 0.10111288726329803, "eval_runtime": 66.987, "eval_samples_per_second": 61.131, "eval_steps_per_second": 3.822, "eval_wer": 0.08709714849302352, "step": 4000 }, { "epoch": 0.39, "learning_rate": 9.431509853937514e-05, "loss": 0.0856, "step": 8000 }, { "epoch": 0.39, "eval_cer": 0.020731155298169313, "eval_loss": 0.09945787489414215, "eval_runtime": 67.6612, "eval_samples_per_second": 60.522, "eval_steps_per_second": 3.784, "eval_wer": 0.07468163490267746, "step": 8000 }, { "epoch": 0.58, "learning_rate": 9.106727972834653e-05, "loss": 0.075, "step": 12000 }, { "epoch": 0.58, "eval_cer": 0.018470211068126987, "eval_loss": 0.08679623156785965, "eval_runtime": 72.7557, "eval_samples_per_second": 56.284, "eval_steps_per_second": 3.519, "eval_wer": 0.06474632320946828, "step": 12000 }, { "epoch": 0.77, "learning_rate": 8.781864855643472e-05, "loss": 0.0694, "step": 16000 }, { "epoch": 0.77, "eval_cer": 0.01828354900579119, "eval_loss": 0.08532032370567322, "eval_runtime": 71.0749, "eval_samples_per_second": 57.615, "eval_steps_per_second": 3.602, "eval_wer": 0.06190351869579091, "step": 16000 }, { "epoch": 0.97, "learning_rate": 8.45708297454061e-05, "loss": 0.0658, "step": 20000 }, { "epoch": 0.97, "eval_cer": 0.017056245945933333, "eval_loss": 0.0778348445892334, "eval_runtime": 68.8966, "eval_samples_per_second": 59.437, "eval_steps_per_second": 3.716, "eval_wer": 0.057262205204072755, "step": 20000 }, { "epoch": 1.16, "learning_rate": 8.13230109343775e-05, "loss": 0.0589, "step": 24000 }, { "epoch": 1.16, "eval_cer": 0.016645589408794584, "eval_loss": 0.08211962133646011, "eval_runtime": 71.5048, "eval_samples_per_second": 57.269, "eval_steps_per_second": 3.58, "eval_wer": 0.05463696226031967, "step": 24000 }, { "epoch": 1.35, "learning_rate": 7.807437976246568e-05, "loss": 0.0572, "step": 28000 }, { "epoch": 1.35, "eval_cer": 0.0169955807756742, "eval_loss": 0.08269500732421875, "eval_runtime": 64.9267, "eval_samples_per_second": 63.071, "eval_steps_per_second": 3.943, "eval_wer": 0.05579729063324921, "step": 28000 }, { "epoch": 1.55, "learning_rate": 7.482656095143708e-05, "loss": 0.0551, "step": 32000 }, { "epoch": 1.55, "eval_cer": 0.016853250953143155, "eval_loss": 0.08304612338542938, "eval_runtime": 67.5544, "eval_samples_per_second": 60.618, "eval_steps_per_second": 3.79, "eval_wer": 0.05333159284077394, "step": 32000 }, { "epoch": 1.74, "learning_rate": 7.157874214040847e-05, "loss": 0.054, "step": 36000 }, { "epoch": 1.74, "eval_cer": 0.016157934770942316, "eval_loss": 0.0788031816482544, "eval_runtime": 68.7321, "eval_samples_per_second": 59.579, "eval_steps_per_second": 3.725, "eval_wer": 0.05122849766483915, "step": 36000 }, { "epoch": 1.93, "learning_rate": 6.832929860761345e-05, "loss": 0.0524, "step": 40000 }, { "epoch": 1.93, "eval_cer": 0.015562949447246968, "eval_loss": 0.07834739238023758, "eval_runtime": 66.4802, "eval_samples_per_second": 61.597, "eval_steps_per_second": 3.851, "eval_wer": 0.04893684912830331, "step": 40000 }, { "epoch": 2.13, "learning_rate": 6.508147979658484e-05, "loss": 0.048, "step": 44000 }, { "epoch": 2.13, "eval_cer": 0.016024938051528062, "eval_loss": 0.08605939149856567, "eval_runtime": 68.4968, "eval_samples_per_second": 59.784, "eval_steps_per_second": 3.737, "eval_wer": 0.04918341890755084, "step": 44000 }, { "epoch": 2.32, "learning_rate": 6.183284862467303e-05, "loss": 0.046, "step": 48000 }, { "epoch": 2.32, "eval_cer": 0.015420619624715923, "eval_loss": 0.07625599950551987, "eval_runtime": 73.2954, "eval_samples_per_second": 55.87, "eval_steps_per_second": 3.493, "eval_wer": 0.049400980477475126, "step": 48000 }, { "epoch": 2.51, "learning_rate": 5.8584217452761215e-05, "loss": 0.0456, "step": 52000 }, { "epoch": 2.51, "eval_cer": 0.015303955835756052, "eval_loss": 0.08352649956941605, "eval_runtime": 71.5001, "eval_samples_per_second": 57.273, "eval_steps_per_second": 3.58, "eval_wer": 0.047094827836277664, "step": 52000 }, { "epoch": 2.71, "learning_rate": 5.53363986417326e-05, "loss": 0.0439, "step": 56000 }, { "epoch": 2.71, "eval_cer": 0.015152292910108217, "eval_loss": 0.07897598296403885, "eval_runtime": 71.5932, "eval_samples_per_second": 57.198, "eval_steps_per_second": 3.576, "eval_wer": 0.04686276216169176, "step": 56000 }, { "epoch": 2.9, "learning_rate": 5.2088579830704e-05, "loss": 0.0436, "step": 60000 }, { "epoch": 2.9, "eval_cer": 0.0155349501378966, "eval_loss": 0.08321597427129745, "eval_runtime": 69.796, "eval_samples_per_second": 58.671, "eval_steps_per_second": 3.668, "eval_wer": 0.047196356568909, "step": 60000 }, { "epoch": 3.09, "learning_rate": 4.8840761019675384e-05, "loss": 0.0406, "step": 64000 }, { "epoch": 3.09, "eval_cer": 0.014837300679916562, "eval_loss": 0.08103086799383163, "eval_runtime": 69.1811, "eval_samples_per_second": 59.192, "eval_steps_per_second": 3.7, "eval_wer": 0.04416499869463058, "step": 64000 }, { "epoch": 3.29, "learning_rate": 4.559212984776357e-05, "loss": 0.0386, "step": 68000 }, { "epoch": 3.29, "eval_cer": 0.014624972584009594, "eval_loss": 0.08100830018520355, "eval_runtime": 72.8594, "eval_samples_per_second": 56.204, "eval_steps_per_second": 3.514, "eval_wer": 0.043628346822150665, "step": 68000 }, { "epoch": 3.48, "learning_rate": 4.234349867585176e-05, "loss": 0.038, "step": 72000 }, { "epoch": 3.48, "eval_cer": 0.014267981389792385, "eval_loss": 0.07782719284296036, "eval_runtime": 73.706, "eval_samples_per_second": 55.559, "eval_steps_per_second": 3.473, "eval_wer": 0.04301917442636266, "step": 72000 }, { "epoch": 3.67, "learning_rate": 3.909567986482315e-05, "loss": 0.0373, "step": 76000 }, { "epoch": 3.67, "eval_cer": 0.014431310694336206, "eval_loss": 0.07849407941102982, "eval_runtime": 74.4035, "eval_samples_per_second": 55.038, "eval_steps_per_second": 3.441, "eval_wer": 0.042975662112377806, "step": 76000 }, { "epoch": 3.87, "learning_rate": 3.584786105379454e-05, "loss": 0.0363, "step": 80000 }, { "epoch": 3.87, "eval_cer": 0.014375312075635468, "eval_loss": 0.07884030044078827, "eval_runtime": 68.8676, "eval_samples_per_second": 59.462, "eval_steps_per_second": 3.717, "eval_wer": 0.042119919937342265, "step": 80000 }, { "epoch": 4.06, "learning_rate": 3.259922988188273e-05, "loss": 0.0348, "step": 84000 }, { "epoch": 4.06, "eval_cer": 0.01435431259362269, "eval_loss": 0.08232194930315018, "eval_runtime": 67.1145, "eval_samples_per_second": 61.015, "eval_steps_per_second": 3.814, "eval_wer": 0.04232297740260494, "step": 84000 }, { "epoch": 4.25, "learning_rate": 2.935059870997092e-05, "loss": 0.0323, "step": 88000 }, { "epoch": 4.25, "eval_cer": 0.014316980181155532, "eval_loss": 0.08194055408239365, "eval_runtime": 76.1231, "eval_samples_per_second": 53.794, "eval_steps_per_second": 3.363, "eval_wer": 0.04068401357584196, "step": 88000 }, { "epoch": 4.45, "learning_rate": 2.610277989894231e-05, "loss": 0.0319, "step": 92000 }, { "epoch": 4.45, "eval_cer": 0.014167650531286895, "eval_loss": 0.08085062354803085, "eval_runtime": 74.2762, "eval_samples_per_second": 55.132, "eval_steps_per_second": 3.447, "eval_wer": 0.041032112087720826, "step": 92000 }, { "epoch": 4.64, "learning_rate": 2.2854148727030496e-05, "loss": 0.0314, "step": 96000 }, { "epoch": 4.64, "eval_cer": 0.01383399209486166, "eval_loss": 0.08210451155900955, "eval_runtime": 73.1195, "eval_samples_per_second": 56.004, "eval_steps_per_second": 3.501, "eval_wer": 0.040045832970730715, "step": 96000 }, { "epoch": 4.83, "learning_rate": 1.9606329916001885e-05, "loss": 0.0306, "step": 100000 }, { "epoch": 4.83, "eval_cer": 0.013700995375447406, "eval_loss": 0.08130906522274017, "eval_runtime": 70.4204, "eval_samples_per_second": 58.151, "eval_steps_per_second": 3.635, "eval_wer": 0.03885649638847794, "step": 100000 }, { "epoch": 5.03, "learning_rate": 1.6358511104973274e-05, "loss": 0.0295, "step": 104000 }, { "epoch": 5.03, "eval_cer": 0.013115343154868847, "eval_loss": 0.0819702297449112, "eval_runtime": 67.1904, "eval_samples_per_second": 60.946, "eval_steps_per_second": 3.81, "eval_wer": 0.037710672120210016, "step": 104000 }, { "epoch": 5.22, "learning_rate": 1.3109879933061465e-05, "loss": 0.0275, "step": 108000 }, { "epoch": 5.22, "eval_cer": 0.013700995375447406, "eval_loss": 0.08659966289997101, "eval_runtime": 75.5877, "eval_samples_per_second": 54.176, "eval_steps_per_second": 3.387, "eval_wer": 0.037826704957502975, "step": 108000 }, { "epoch": 5.41, "learning_rate": 9.862061122032852e-06, "loss": 0.0267, "step": 112000 }, { "epoch": 5.41, "eval_cer": 0.013351004008567788, "eval_loss": 0.08306384831666946, "eval_runtime": 71.1622, "eval_samples_per_second": 57.545, "eval_steps_per_second": 3.597, "eval_wer": 0.03755112696893221, "step": 112000 }, { "epoch": 5.61, "learning_rate": 6.6134299501210415e-06, "loss": 0.0264, "step": 116000 }, { "epoch": 5.61, "eval_cer": 0.013194674531361559, "eval_loss": 0.08454854041337967, "eval_runtime": 69.2173, "eval_samples_per_second": 59.162, "eval_steps_per_second": 3.698, "eval_wer": 0.03691294636382096, "step": 116000 }, { "epoch": 5.8, "learning_rate": 3.364798778209232e-06, "loss": 0.0258, "step": 120000 }, { "epoch": 5.8, "eval_cer": 0.013325337974996616, "eval_loss": 0.08589179813861847, "eval_runtime": 73.5677, "eval_samples_per_second": 55.663, "eval_steps_per_second": 3.48, "eval_wer": 0.037028979201113914, "step": 120000 }, { "epoch": 6.0, "learning_rate": 1.1616760629742156e-07, "loss": 0.0254, "step": 124000 }, { "epoch": 6.0, "eval_cer": 0.013243673322724706, "eval_loss": 0.0846036821603775, "eval_runtime": 67.6027, "eval_samples_per_second": 60.575, "eval_steps_per_second": 3.787, "eval_wer": 0.03666637658457343, "step": 124000 }, { "epoch": 6.0, "step": 124098, "total_flos": 8.637354948645209e+20, "train_loss": 0.0650978993577199, "train_runtime": 137826.0599, "train_samples_per_second": 28.813, "train_steps_per_second": 0.9 } ], "logging_steps": 4000, "max_steps": 124098, "num_input_tokens_seen": 0, "num_train_epochs": 6, "save_steps": 500, "total_flos": 8.637354948645209e+20, "trial_name": null, "trial_params": null }