{ "best_metric": 73.80372709831596, "best_model_checkpoint": "./whisper-distil-v3/checkpoint-1000", "epoch": 0.35561877667140823, "eval_steps": 1000, "global_step": 1000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.007112375533428165, "grad_norm": 5.77730655670166, "learning_rate": 3.4000000000000005e-06, "loss": 8.7115, "step": 20 }, { "epoch": 0.01422475106685633, "grad_norm": 5.685363292694092, "learning_rate": 7.4e-06, "loss": 8.4512, "step": 40 }, { "epoch": 0.021337126600284494, "grad_norm": 6.336855888366699, "learning_rate": 1.1400000000000001e-05, "loss": 8.118, "step": 60 }, { "epoch": 0.02844950213371266, "grad_norm": 5.820839881896973, "learning_rate": 1.54e-05, "loss": 7.5291, "step": 80 }, { "epoch": 0.03556187766714083, "grad_norm": 6.599063873291016, "learning_rate": 1.94e-05, "loss": 6.4103, "step": 100 }, { "epoch": 0.04267425320056899, "grad_norm": 3.500046968460083, "learning_rate": 2.3400000000000003e-05, "loss": 5.3304, "step": 120 }, { "epoch": 0.049786628733997154, "grad_norm": 3.011868476867676, "learning_rate": 2.7400000000000002e-05, "loss": 4.3763, "step": 140 }, { "epoch": 0.05689900426742532, "grad_norm": 2.6885221004486084, "learning_rate": 3.1400000000000004e-05, "loss": 3.7984, "step": 160 }, { "epoch": 0.06401137980085349, "grad_norm": 2.7655692100524902, "learning_rate": 3.54e-05, "loss": 3.1392, "step": 180 }, { "epoch": 0.07112375533428165, "grad_norm": 2.893932580947876, "learning_rate": 3.94e-05, "loss": 2.9927, "step": 200 }, { "epoch": 0.07823613086770982, "grad_norm": 2.6793711185455322, "learning_rate": 4.3400000000000005e-05, "loss": 2.719, "step": 220 }, { "epoch": 0.08534850640113797, "grad_norm": 2.8600358963012695, "learning_rate": 4.74e-05, "loss": 2.6649, "step": 240 }, { "epoch": 0.09246088193456614, "grad_norm": 3.3596479892730713, "learning_rate": 5.14e-05, "loss": 2.6195, "step": 260 }, { "epoch": 0.09957325746799431, "grad_norm": 3.0968446731567383, "learning_rate": 5.5400000000000005e-05, "loss": 2.2805, "step": 280 }, { "epoch": 0.10668563300142248, "grad_norm": 4.1708903312683105, "learning_rate": 5.94e-05, "loss": 2.1195, "step": 300 }, { "epoch": 0.11379800853485064, "grad_norm": 3.3060481548309326, "learning_rate": 6.340000000000001e-05, "loss": 2.2916, "step": 320 }, { "epoch": 0.12091038406827881, "grad_norm": 3.289092540740967, "learning_rate": 6.740000000000001e-05, "loss": 2.1663, "step": 340 }, { "epoch": 0.12802275960170698, "grad_norm": 3.3083698749542236, "learning_rate": 7.14e-05, "loss": 2.1373, "step": 360 }, { "epoch": 0.13513513513513514, "grad_norm": 3.4208426475524902, "learning_rate": 7.54e-05, "loss": 2.0894, "step": 380 }, { "epoch": 0.1422475106685633, "grad_norm": 3.856208562850952, "learning_rate": 7.94e-05, "loss": 1.9931, "step": 400 }, { "epoch": 0.14935988620199148, "grad_norm": 3.556122303009033, "learning_rate": 8.34e-05, "loss": 1.9146, "step": 420 }, { "epoch": 0.15647226173541964, "grad_norm": 3.152322769165039, "learning_rate": 8.740000000000001e-05, "loss": 1.9123, "step": 440 }, { "epoch": 0.16358463726884778, "grad_norm": 3.6083829402923584, "learning_rate": 9.140000000000001e-05, "loss": 1.8787, "step": 460 }, { "epoch": 0.17069701280227595, "grad_norm": 3.5843188762664795, "learning_rate": 9.54e-05, "loss": 1.8949, "step": 480 }, { "epoch": 0.17780938833570412, "grad_norm": 4.271010398864746, "learning_rate": 9.94e-05, "loss": 1.8362, "step": 500 }, { "epoch": 0.18492176386913228, "grad_norm": 3.9229135513305664, "learning_rate": 0.0001, "loss": 1.81, "step": 520 }, { "epoch": 0.19203413940256045, "grad_norm": 3.9755284786224365, "learning_rate": 0.0001, "loss": 1.7339, "step": 540 }, { "epoch": 0.19914651493598862, "grad_norm": 4.1353912353515625, "learning_rate": 0.0001, "loss": 1.5915, "step": 560 }, { "epoch": 0.20625889046941678, "grad_norm": 3.9695911407470703, "learning_rate": 0.0001, "loss": 1.6895, "step": 580 }, { "epoch": 0.21337126600284495, "grad_norm": 5.034478664398193, "learning_rate": 0.0001, "loss": 1.6259, "step": 600 }, { "epoch": 0.22048364153627312, "grad_norm": 3.5285422801971436, "learning_rate": 0.0001, "loss": 1.7478, "step": 620 }, { "epoch": 0.22759601706970128, "grad_norm": 2.889526605606079, "learning_rate": 0.0001, "loss": 1.6589, "step": 640 }, { "epoch": 0.23470839260312945, "grad_norm": 3.114544153213501, "learning_rate": 0.0001, "loss": 1.5978, "step": 660 }, { "epoch": 0.24182076813655762, "grad_norm": 3.4191863536834717, "learning_rate": 0.0001, "loss": 1.7244, "step": 680 }, { "epoch": 0.24893314366998578, "grad_norm": 3.815669298171997, "learning_rate": 0.0001, "loss": 1.6637, "step": 700 }, { "epoch": 0.25604551920341395, "grad_norm": 3.644179105758667, "learning_rate": 0.0001, "loss": 1.5496, "step": 720 }, { "epoch": 0.2631578947368421, "grad_norm": 2.65865159034729, "learning_rate": 0.0001, "loss": 1.5384, "step": 740 }, { "epoch": 0.2702702702702703, "grad_norm": 3.979856491088867, "learning_rate": 0.0001, "loss": 1.5651, "step": 760 }, { "epoch": 0.2773826458036984, "grad_norm": 4.229691028594971, "learning_rate": 0.0001, "loss": 1.5942, "step": 780 }, { "epoch": 0.2844950213371266, "grad_norm": 3.7266907691955566, "learning_rate": 0.0001, "loss": 1.5734, "step": 800 }, { "epoch": 0.29160739687055476, "grad_norm": 2.8548645973205566, "learning_rate": 0.0001, "loss": 1.4683, "step": 820 }, { "epoch": 0.29871977240398295, "grad_norm": 3.398973226547241, "learning_rate": 0.0001, "loss": 1.5065, "step": 840 }, { "epoch": 0.3058321479374111, "grad_norm": 3.1035850048065186, "learning_rate": 0.0001, "loss": 1.5188, "step": 860 }, { "epoch": 0.3129445234708393, "grad_norm": 3.0334548950195312, "learning_rate": 0.0001, "loss": 1.5774, "step": 880 }, { "epoch": 0.3200568990042674, "grad_norm": 3.0562374591827393, "learning_rate": 0.0001, "loss": 1.4533, "step": 900 }, { "epoch": 0.32716927453769556, "grad_norm": 2.881594657897949, "learning_rate": 0.0001, "loss": 1.4867, "step": 920 }, { "epoch": 0.33428165007112376, "grad_norm": 3.8474974632263184, "learning_rate": 0.0001, "loss": 1.4431, "step": 940 }, { "epoch": 0.3413940256045519, "grad_norm": 3.3145081996917725, "learning_rate": 0.0001, "loss": 1.588, "step": 960 }, { "epoch": 0.3485064011379801, "grad_norm": 3.1782026290893555, "learning_rate": 0.0001, "loss": 1.396, "step": 980 }, { "epoch": 0.35561877667140823, "grad_norm": 2.8277013301849365, "learning_rate": 0.0001, "loss": 1.3896, "step": 1000 }, { "epoch": 0.35561877667140823, "eval_loss": 1.4457485675811768, "eval_runtime": 4741.188, "eval_samples_per_second": 2.109, "eval_steps_per_second": 0.066, "eval_wer": 73.80372709831596, "step": 1000 } ], "logging_steps": 20, "max_steps": 8436, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }