{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.1422044545973729, "eval_steps": 500, "global_step": 8000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0713877784123358, "grad_norm": 5.507429599761963, "learning_rate": 0.00018487499999999998, "loss": 4.6973, "step": 500 }, { "epoch": 0.0713877784123358, "eval_loss": Infinity, "eval_runtime": 112.8446, "eval_samples_per_second": 34.57, "eval_steps_per_second": 0.541, "eval_wer": 1.0, "step": 500 }, { "epoch": 0.1427755568246716, "grad_norm": 17.29350471496582, "learning_rate": 0.0002919583333333333, "loss": 1.448, "step": 1000 }, { "epoch": 0.1427755568246716, "eval_loss": Infinity, "eval_runtime": 111.1281, "eval_samples_per_second": 35.104, "eval_steps_per_second": 0.549, "eval_wer": 0.7573781577549148, "step": 1000 }, { "epoch": 0.21416333523700742, "grad_norm": 6.642392158508301, "learning_rate": 0.000271125, "loss": 1.053, "step": 1500 }, { "epoch": 0.21416333523700742, "eval_loss": Infinity, "eval_runtime": 112.2463, "eval_samples_per_second": 34.754, "eval_steps_per_second": 0.543, "eval_wer": 0.6583828430662223, "step": 1500 }, { "epoch": 0.2855511136493432, "grad_norm": 7.0610857009887695, "learning_rate": 0.0002502916666666666, "loss": 0.9304, "step": 2000 }, { "epoch": 0.2855511136493432, "eval_loss": Infinity, "eval_runtime": 112.8186, "eval_samples_per_second": 34.578, "eval_steps_per_second": 0.541, "eval_wer": 0.5963386948751389, "step": 2000 }, { "epoch": 0.35693889206167906, "grad_norm": 8.276483535766602, "learning_rate": 0.0002295, "loss": 0.8755, "step": 2500 }, { "epoch": 0.35693889206167906, "eval_loss": Infinity, "eval_runtime": 111.0309, "eval_samples_per_second": 35.134, "eval_steps_per_second": 0.549, "eval_wer": 0.5945515142732938, "step": 2500 }, { "epoch": 0.42832667047401485, "grad_norm": 7.304489612579346, "learning_rate": 0.00020874999999999998, "loss": 0.8238, "step": 3000 }, { "epoch": 0.42832667047401485, "eval_loss": Infinity, "eval_runtime": 107.1934, "eval_samples_per_second": 36.392, "eval_steps_per_second": 0.569, "eval_wer": 0.5392213688837366, "step": 3000 }, { "epoch": 0.49971444888635064, "grad_norm": 7.1927947998046875, "learning_rate": 0.00018795833333333333, "loss": 0.7819, "step": 3500 }, { "epoch": 0.49971444888635064, "eval_loss": Infinity, "eval_runtime": 107.346, "eval_samples_per_second": 36.34, "eval_steps_per_second": 0.568, "eval_wer": 0.49666714968845094, "step": 3500 }, { "epoch": 0.5711022272986864, "grad_norm": 4.308356761932373, "learning_rate": 0.000167125, "loss": 0.729, "step": 4000 }, { "epoch": 0.5711022272986864, "eval_loss": Infinity, "eval_runtime": 107.5572, "eval_samples_per_second": 36.269, "eval_steps_per_second": 0.567, "eval_wer": 0.4834082017098971, "step": 4000 }, { "epoch": 0.6424900057110223, "grad_norm": 6.571809768676758, "learning_rate": 0.0001463333333333333, "loss": 0.6923, "step": 4500 }, { "epoch": 0.6424900057110223, "eval_loss": Infinity, "eval_runtime": 107.6399, "eval_samples_per_second": 36.241, "eval_steps_per_second": 0.567, "eval_wer": 0.4564314350577211, "step": 4500 }, { "epoch": 0.7138777841233581, "grad_norm": 5.07558536529541, "learning_rate": 0.0001255, "loss": 0.7052, "step": 5000 }, { "epoch": 0.7138777841233581, "eval_loss": Infinity, "eval_runtime": 107.7831, "eval_samples_per_second": 36.193, "eval_steps_per_second": 0.566, "eval_wer": 0.43462300149736754, "step": 5000 }, { "epoch": 0.7852655625356939, "grad_norm": 4.797505855560303, "learning_rate": 0.00010466666666666667, "loss": 0.6675, "step": 5500 }, { "epoch": 0.7852655625356939, "eval_loss": Infinity, "eval_runtime": 108.5624, "eval_samples_per_second": 35.933, "eval_steps_per_second": 0.562, "eval_wer": 0.41626817369463365, "step": 5500 }, { "epoch": 0.8566533409480297, "grad_norm": 4.757002830505371, "learning_rate": 8.383333333333333e-05, "loss": 0.6217, "step": 6000 }, { "epoch": 0.8566533409480297, "eval_loss": Infinity, "eval_runtime": 107.7978, "eval_samples_per_second": 36.188, "eval_steps_per_second": 0.566, "eval_wer": 0.39619861855769695, "step": 6000 }, { "epoch": 0.9280411193603655, "grad_norm": 3.95064640045166, "learning_rate": 6.299999999999999e-05, "loss": 0.5954, "step": 6500 }, { "epoch": 0.9280411193603655, "eval_loss": Infinity, "eval_runtime": 108.005, "eval_samples_per_second": 36.119, "eval_steps_per_second": 0.565, "eval_wer": 0.3882529102062503, "step": 6500 }, { "epoch": 0.9994288977727013, "grad_norm": 5.451465129852295, "learning_rate": 4.216666666666666e-05, "loss": 0.5687, "step": 7000 }, { "epoch": 0.9994288977727013, "eval_loss": Infinity, "eval_runtime": 108.8578, "eval_samples_per_second": 35.836, "eval_steps_per_second": 0.56, "eval_wer": 0.3746075448002705, "step": 7000 }, { "epoch": 1.070816676185037, "grad_norm": 2.5018062591552734, "learning_rate": 2.133333333333333e-05, "loss": 0.477, "step": 7500 }, { "epoch": 1.070816676185037, "eval_loss": Infinity, "eval_runtime": 106.7519, "eval_samples_per_second": 36.543, "eval_steps_per_second": 0.571, "eval_wer": 0.36472974931169394, "step": 7500 }, { "epoch": 1.1422044545973729, "grad_norm": 4.154621124267578, "learning_rate": 5e-07, "loss": 0.4804, "step": 8000 }, { "epoch": 1.1422044545973729, "eval_loss": Infinity, "eval_runtime": 107.9663, "eval_samples_per_second": 36.132, "eval_steps_per_second": 0.565, "eval_wer": 0.3598029271120127, "step": 8000 }, { "epoch": 1.1422044545973729, "step": 8000, "total_flos": 8.368100900404746e+18, "train_loss": 1.0091876525878907, "train_runtime": 5013.5506, "train_samples_per_second": 12.765, "train_steps_per_second": 1.596 } ], "logging_steps": 500, "max_steps": 8000, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 8.368100900404746e+18, "train_batch_size": 8, "trial_name": null, "trial_params": null }