{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.5759306677171558, "global_step": 4000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.16, "learning_rate": 0.0002388, "loss": 4.5334, "step": 400 }, { "epoch": 0.16, "eval_loss": 2.026416063308716, "eval_runtime": 471.8471, "eval_samples_per_second": 10.758, "eval_steps_per_second": 1.346, "eval_wer": 0.9800369528744665, "step": 400 }, { "epoch": 0.32, "learning_rate": 0.00029881808566895816, "loss": 1.0808, "step": 800 }, { "epoch": 0.32, "eval_loss": 0.7069017887115479, "eval_runtime": 472.368, "eval_samples_per_second": 10.746, "eval_steps_per_second": 1.344, "eval_wer": 0.593412194448574, "step": 800 }, { "epoch": 0.47, "learning_rate": 0.0002972316234796404, "loss": 0.7499, "step": 1200 }, { "epoch": 0.47, "eval_loss": 0.5792935490608215, "eval_runtime": 471.2762, "eval_samples_per_second": 10.771, "eval_steps_per_second": 1.347, "eval_wer": 0.49153694225582434, "step": 1200 }, { "epoch": 0.63, "learning_rate": 0.00029564516129032255, "loss": 0.6397, "step": 1600 }, { "epoch": 0.63, "eval_loss": 0.5181192755699158, "eval_runtime": 470.1504, "eval_samples_per_second": 10.797, "eval_steps_per_second": 1.351, "eval_wer": 0.41618705799902306, "step": 1600 }, { "epoch": 0.79, "learning_rate": 0.0002940586991010047, "loss": 0.597, "step": 2000 }, { "epoch": 0.79, "eval_loss": 0.47279468178749084, "eval_runtime": 470.9283, "eval_samples_per_second": 10.779, "eval_steps_per_second": 1.348, "eval_wer": 0.37248072716461017, "step": 2000 }, { "epoch": 0.95, "learning_rate": 0.00029247223691168694, "loss": 0.5666, "step": 2400 }, { "epoch": 0.95, "eval_loss": 0.4448830783367157, "eval_runtime": 471.0174, "eval_samples_per_second": 10.777, "eval_steps_per_second": 1.348, "eval_wer": 0.3520292225030263, "step": 2400 }, { "epoch": 1.1, "learning_rate": 0.0002908857747223691, "loss": 0.5067, "step": 2800 }, { "epoch": 1.1, "eval_loss": 0.421601802110672, "eval_runtime": 470.0998, "eval_samples_per_second": 10.798, "eval_steps_per_second": 1.351, "eval_wer": 0.3330855650179455, "step": 2800 }, { "epoch": 1.26, "learning_rate": 0.0002892993125330513, "loss": 0.477, "step": 3200 }, { "epoch": 1.26, "eval_loss": 0.39891988039016724, "eval_runtime": 471.7321, "eval_samples_per_second": 10.76, "eval_steps_per_second": 1.346, "eval_wer": 0.30331089260305394, "step": 3200 }, { "epoch": 1.42, "learning_rate": 0.00028771285034373344, "loss": 0.4633, "step": 3600 }, { "epoch": 1.42, "eval_loss": 0.41667959094047546, "eval_runtime": 473.0088, "eval_samples_per_second": 10.731, "eval_steps_per_second": 1.342, "eval_wer": 0.3046276042219721, "step": 3600 }, { "epoch": 1.58, "learning_rate": 0.0002861263881544156, "loss": 0.4606, "step": 4000 }, { "epoch": 1.58, "eval_loss": 0.3892616033554077, "eval_runtime": 473.5469, "eval_samples_per_second": 10.719, "eval_steps_per_second": 1.341, "eval_wer": 0.29747063945462654, "step": 4000 } ], "max_steps": 76140, "num_train_epochs": 30, "total_flos": 5.025867257304392e+18, "trial_name": null, "trial_params": null }