{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.416856492027335, "eval_steps": 200, "global_step": 6000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.11389521640091116, "eval_loss": 3.107494354248047, "eval_runtime": 174.2934, "eval_samples_per_second": 40.288, "eval_steps_per_second": 5.037, "eval_wer": 1.0, "step": 200 }, { "epoch": 0.22779043280182232, "eval_loss": 1.6248737573623657, "eval_runtime": 174.1392, "eval_samples_per_second": 40.324, "eval_steps_per_second": 5.042, "eval_wer": 0.8958131253404908, "step": 400 }, { "epoch": 0.2847380410022779, "grad_norm": 3.3784055709838867, "learning_rate": 0.00029699999999999996, "loss": 3.6043, "step": 500 }, { "epoch": 0.3416856492027335, "eval_loss": 1.1246436834335327, "eval_runtime": 174.2087, "eval_samples_per_second": 40.308, "eval_steps_per_second": 5.04, "eval_wer": 0.8675772997249498, "step": 600 }, { "epoch": 0.45558086560364464, "eval_loss": 0.9142205715179443, "eval_runtime": 174.1428, "eval_samples_per_second": 40.323, "eval_steps_per_second": 5.042, "eval_wer": 0.7093902390411778, "step": 800 }, { "epoch": 0.5694760820045558, "grad_norm": 2.1683263778686523, "learning_rate": 0.0002730545454545454, "loss": 1.0665, "step": 1000 }, { "epoch": 0.5694760820045558, "eval_loss": 0.7569850087165833, "eval_runtime": 173.8805, "eval_samples_per_second": 40.384, "eval_steps_per_second": 5.049, "eval_wer": 0.5934306860309067, "step": 1000 }, { "epoch": 0.683371298405467, "eval_loss": 0.6979650259017944, "eval_runtime": 174.537, "eval_samples_per_second": 40.232, "eval_steps_per_second": 5.03, "eval_wer": 0.5284816433914881, "step": 1200 }, { "epoch": 0.7972665148063781, "eval_loss": 0.6941590905189514, "eval_runtime": 174.9006, "eval_samples_per_second": 40.149, "eval_steps_per_second": 5.02, "eval_wer": 0.5388990021127041, "step": 1400 }, { "epoch": 0.8542141230068337, "grad_norm": 2.244067907333374, "learning_rate": 0.0002459454545454545, "loss": 0.8811, "step": 1500 }, { "epoch": 0.9111617312072893, "eval_loss": 0.6152143478393555, "eval_runtime": 175.8479, "eval_samples_per_second": 39.932, "eval_steps_per_second": 4.993, "eval_wer": 0.4861611235865478, "step": 1600 }, { "epoch": 1.0250569476082005, "eval_loss": 0.5941163301467896, "eval_runtime": 174.9993, "eval_samples_per_second": 40.126, "eval_steps_per_second": 5.017, "eval_wer": 0.4534607156619142, "step": 1800 }, { "epoch": 1.1389521640091116, "grad_norm": 2.146209478378296, "learning_rate": 0.00021878181818181817, "loss": 0.7654, "step": 2000 }, { "epoch": 1.1389521640091116, "eval_loss": 0.5642105340957642, "eval_runtime": 174.8742, "eval_samples_per_second": 40.155, "eval_steps_per_second": 5.021, "eval_wer": 0.43286517227175486, "step": 2000 }, { "epoch": 1.2528473804100229, "eval_loss": 0.5727251768112183, "eval_runtime": 175.3644, "eval_samples_per_second": 40.042, "eval_steps_per_second": 5.007, "eval_wer": 0.43133711582667855, "step": 2200 }, { "epoch": 1.366742596810934, "eval_loss": 0.5466997027397156, "eval_runtime": 175.0585, "eval_samples_per_second": 40.112, "eval_steps_per_second": 5.015, "eval_wer": 0.4316825894577393, "step": 2400 }, { "epoch": 1.4236902050113895, "grad_norm": 1.5887110233306885, "learning_rate": 0.00019167272727272725, "loss": 0.6896, "step": 2500 }, { "epoch": 1.4806378132118452, "eval_loss": 0.5658189654350281, "eval_runtime": 178.0734, "eval_samples_per_second": 39.433, "eval_steps_per_second": 4.931, "eval_wer": 0.43980121978766656, "step": 2600 }, { "epoch": 1.5945330296127562, "eval_loss": 0.6008076071739197, "eval_runtime": 175.5831, "eval_samples_per_second": 39.992, "eval_steps_per_second": 5.0, "eval_wer": 0.4445182635963805, "step": 2800 }, { "epoch": 1.7084282460136673, "grad_norm": 3.4384372234344482, "learning_rate": 0.0001645090909090909, "loss": 0.7558, "step": 3000 }, { "epoch": 1.7084282460136673, "eval_loss": 0.7089642286300659, "eval_runtime": 175.5809, "eval_samples_per_second": 39.993, "eval_steps_per_second": 5.001, "eval_wer": 0.5020927729573872, "step": 3000 }, { "epoch": 1.8223234624145785, "eval_loss": 0.8049871921539307, "eval_runtime": 175.5153, "eval_samples_per_second": 40.008, "eval_steps_per_second": 5.002, "eval_wer": 0.5291327283115641, "step": 3200 }, { "epoch": 1.9362186788154898, "eval_loss": 0.824817955493927, "eval_runtime": 175.9347, "eval_samples_per_second": 39.913, "eval_steps_per_second": 4.99, "eval_wer": 0.5275382346297453, "step": 3400 }, { "epoch": 1.9931662870159452, "grad_norm": 3.6540627479553223, "learning_rate": 0.00013723636363636361, "loss": 0.9823, "step": 3500 }, { "epoch": 2.050113895216401, "eval_loss": 0.7944601774215698, "eval_runtime": 176.7964, "eval_samples_per_second": 39.718, "eval_steps_per_second": 4.966, "eval_wer": 0.5072748774232982, "step": 3600 }, { "epoch": 2.164009111617312, "eval_loss": 0.7690120935440063, "eval_runtime": 177.7491, "eval_samples_per_second": 39.505, "eval_steps_per_second": 4.94, "eval_wer": 0.4982128383316281, "step": 3800 }, { "epoch": 2.277904328018223, "grad_norm": 12.11134147644043, "learning_rate": 0.00011001818181818181, "loss": 0.9181, "step": 4000 }, { "epoch": 2.277904328018223, "eval_loss": 0.8668798804283142, "eval_runtime": 176.748, "eval_samples_per_second": 39.729, "eval_steps_per_second": 4.968, "eval_wer": 0.6929802415657927, "step": 4000 }, { "epoch": 2.3917995444191344, "eval_loss": 0.88874351978302, "eval_runtime": 177.153, "eval_samples_per_second": 39.638, "eval_steps_per_second": 4.956, "eval_wer": 0.6311271741585724, "step": 4200 }, { "epoch": 2.5056947608200457, "eval_loss": 1.150854468345642, "eval_runtime": 176.8956, "eval_samples_per_second": 39.696, "eval_steps_per_second": 4.963, "eval_wer": 0.8436200321556225, "step": 4400 }, { "epoch": 2.562642369020501, "grad_norm": 6.307694911956787, "learning_rate": 8.285454545454545e-05, "loss": 1.0805, "step": 4500 }, { "epoch": 2.619589977220957, "eval_loss": 1.2166552543640137, "eval_runtime": 176.6784, "eval_samples_per_second": 39.745, "eval_steps_per_second": 4.969, "eval_wer": 0.9458935143969492, "step": 4600 }, { "epoch": 2.733485193621868, "eval_loss": 1.2323366403579712, "eval_runtime": 177.2283, "eval_samples_per_second": 39.621, "eval_steps_per_second": 4.954, "eval_wer": 0.9307059620776252, "step": 4800 }, { "epoch": 2.847380410022779, "grad_norm": 0.8214463591575623, "learning_rate": 5.569090909090908e-05, "loss": 1.3977, "step": 5000 }, { "epoch": 2.847380410022779, "eval_loss": 1.65969979763031, "eval_runtime": 176.9601, "eval_samples_per_second": 39.681, "eval_steps_per_second": 4.962, "eval_wer": 0.9527896995708155, "step": 5000 }, { "epoch": 2.9612756264236904, "eval_loss": 2.022368907928467, "eval_runtime": 176.5944, "eval_samples_per_second": 39.763, "eval_steps_per_second": 4.972, "eval_wer": 0.9162359319151198, "step": 5200 }, { "epoch": 3.075170842824601, "eval_loss": 2.105426788330078, "eval_runtime": 176.3683, "eval_samples_per_second": 39.814, "eval_steps_per_second": 4.978, "eval_wer": 0.9103761676344357, "step": 5400 }, { "epoch": 3.132118451025057, "grad_norm": 0.6184139251708984, "learning_rate": 2.8472727272727272e-05, "loss": 2.1028, "step": 5500 }, { "epoch": 3.1890660592255125, "eval_loss": 2.248642683029175, "eval_runtime": 175.7372, "eval_samples_per_second": 39.957, "eval_steps_per_second": 4.996, "eval_wer": 0.9280750475026243, "step": 5600 }, { "epoch": 3.3029612756264237, "eval_loss": 2.2446603775024414, "eval_runtime": 176.6127, "eval_samples_per_second": 39.759, "eval_steps_per_second": 4.971, "eval_wer": 0.9215774857492127, "step": 5800 }, { "epoch": 3.416856492027335, "grad_norm": 0.6574464440345764, "learning_rate": 1.3636363636363634e-06, "loss": 2.2911, "step": 6000 }, { "epoch": 3.416856492027335, "eval_loss": 2.2309672832489014, "eval_runtime": 176.1017, "eval_samples_per_second": 39.875, "eval_steps_per_second": 4.986, "eval_wer": 0.9239559388245924, "step": 6000 }, { "epoch": 3.416856492027335, "step": 6000, "total_flos": 1.116875973325552e+19, "train_loss": 1.37794411722819, "train_runtime": 8509.4041, "train_samples_per_second": 11.282, "train_steps_per_second": 0.705 } ], "logging_steps": 500, "max_steps": 6000, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.116875973325552e+19, "train_batch_size": 16, "trial_name": null, "trial_params": null }