{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.277904328018223, "eval_steps": 200, "global_step": 2000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.22779043280182232, "eval_loss": 2.9681296348571777, "eval_runtime": 179.5569, "eval_samples_per_second": 39.107, "eval_steps_per_second": 4.89, "eval_wer": 1.0, "step": 200 }, { "epoch": 0.45558086560364464, "eval_loss": 1.3578158617019653, "eval_runtime": 178.971, "eval_samples_per_second": 39.235, "eval_steps_per_second": 4.906, "eval_wer": 0.8299738237287235, "step": 400 }, { "epoch": 0.5694760820045558, "grad_norm": 2.982609748840332, "learning_rate": 0.00029699999999999996, "loss": 3.3875, "step": 500 }, { "epoch": 0.683371298405467, "eval_loss": 0.9213220477104187, "eval_runtime": 178.9628, "eval_samples_per_second": 39.237, "eval_steps_per_second": 4.906, "eval_wer": 0.6513373815756255, "step": 600 }, { "epoch": 0.9111617312072893, "eval_loss": 0.7288344502449036, "eval_runtime": 180.4166, "eval_samples_per_second": 38.921, "eval_steps_per_second": 4.867, "eval_wer": 0.5341155210672478, "step": 800 }, { "epoch": 1.1389521640091116, "grad_norm": 1.1159461736679077, "learning_rate": 0.0002022, "loss": 0.8947, "step": 1000 }, { "epoch": 1.1389521640091116, "eval_loss": 0.6245301961898804, "eval_runtime": 180.7702, "eval_samples_per_second": 38.845, "eval_steps_per_second": 4.857, "eval_wer": 0.46335986393653916, "step": 1000 }, { "epoch": 1.366742596810934, "eval_loss": 0.5951615571975708, "eval_runtime": 177.8647, "eval_samples_per_second": 39.479, "eval_steps_per_second": 4.936, "eval_wer": 0.445833720883881, "step": 1200 }, { "epoch": 1.5945330296127562, "eval_loss": 0.578143835067749, "eval_runtime": 178.6987, "eval_samples_per_second": 39.295, "eval_steps_per_second": 4.913, "eval_wer": 0.4384857625001661, "step": 1400 }, { "epoch": 1.7084282460136673, "grad_norm": 1.4777103662490845, "learning_rate": 0.00010279999999999999, "loss": 0.7523, "step": 1500 }, { "epoch": 1.8223234624145785, "eval_loss": 0.562013566493988, "eval_runtime": 178.104, "eval_samples_per_second": 39.426, "eval_steps_per_second": 4.93, "eval_wer": 0.42830757783122286, "step": 1600 }, { "epoch": 2.050113895216401, "eval_loss": 0.5437431931495667, "eval_runtime": 177.6652, "eval_samples_per_second": 39.524, "eval_steps_per_second": 4.942, "eval_wer": 0.4140368593789447, "step": 1800 }, { "epoch": 2.277904328018223, "grad_norm": 3.888875961303711, "learning_rate": 2.9999999999999997e-06, "loss": 0.6707, "step": 2000 }, { "epoch": 2.277904328018223, "eval_loss": 0.5399738550186157, "eval_runtime": 178.027, "eval_samples_per_second": 39.443, "eval_steps_per_second": 4.932, "eval_wer": 0.40660917631113885, "step": 2000 }, { "epoch": 2.277904328018223, "step": 2000, "total_flos": 7.50335828249239e+18, "train_loss": 1.4263343353271485, "train_runtime": 3812.7792, "train_samples_per_second": 16.786, "train_steps_per_second": 0.525 } ], "logging_steps": 500, "max_steps": 2000, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 7.50335828249239e+18, "train_batch_size": 32, "trial_name": null, "trial_params": null }