{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.7084282460136673, "eval_steps": 200, "global_step": 3000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.11389521640091116, "eval_loss": 0.7147656679153442, "eval_runtime": 189.2611, "eval_samples_per_second": 37.102, "eval_steps_per_second": 0.581, "eval_wer": 0.46031703849373495, "step": 200 }, { "epoch": 0.22779043280182232, "eval_loss": 0.6810471415519714, "eval_runtime": 189.9773, "eval_samples_per_second": 36.962, "eval_steps_per_second": 0.579, "eval_wer": 0.4794509626755604, "step": 400 }, { "epoch": 0.2847380410022779, "grad_norm": 3.062281370162964, "learning_rate": 0.0002781111111111111, "loss": 1.4735, "step": 500 }, { "epoch": 0.3416856492027335, "eval_loss": 0.6035017371177673, "eval_runtime": 190.2908, "eval_samples_per_second": 36.901, "eval_steps_per_second": 0.578, "eval_wer": 0.4485975099323669, "step": 600 }, { "epoch": 0.45558086560364464, "eval_loss": 0.6222513914108276, "eval_runtime": 191.1122, "eval_samples_per_second": 36.743, "eval_steps_per_second": 0.576, "eval_wer": 0.5104638647869357, "step": 800 }, { "epoch": 0.5694760820045558, "grad_norm": 3.101222515106201, "learning_rate": 0.00022266666666666664, "loss": 0.7681, "step": 1000 }, { "epoch": 0.5694760820045558, "eval_loss": 0.5656484365463257, "eval_runtime": 192.373, "eval_samples_per_second": 36.502, "eval_steps_per_second": 0.572, "eval_wer": 0.4336225567706188, "step": 1000 }, { "epoch": 0.683371298405467, "eval_loss": 0.5275253057479858, "eval_runtime": 193.09, "eval_samples_per_second": 36.366, "eval_steps_per_second": 0.57, "eval_wer": 0.4008291367145458, "step": 1200 }, { "epoch": 0.7972665148063781, "eval_loss": 0.5284231305122375, "eval_runtime": 193.9644, "eval_samples_per_second": 36.203, "eval_steps_per_second": 0.567, "eval_wer": 0.40279567892212226, "step": 1400 }, { "epoch": 0.8542141230068337, "grad_norm": 6.068333148956299, "learning_rate": 0.00016744444444444443, "loss": 0.7159, "step": 1500 }, { "epoch": 0.9111617312072893, "eval_loss": 0.4989575147628784, "eval_runtime": 194.0102, "eval_samples_per_second": 36.194, "eval_steps_per_second": 0.567, "eval_wer": 0.39135518675507247, "step": 1600 }, { "epoch": 1.0250569476082005, "eval_loss": 0.4855109453201294, "eval_runtime": 193.5264, "eval_samples_per_second": 36.284, "eval_steps_per_second": 0.568, "eval_wer": 0.37300522196680796, "step": 1800 }, { "epoch": 1.1389521640091116, "grad_norm": 2.826504707336426, "learning_rate": 0.0001121111111111111, "loss": 0.6203, "step": 2000 }, { "epoch": 1.1389521640091116, "eval_loss": 0.47395312786102295, "eval_runtime": 193.5198, "eval_samples_per_second": 36.286, "eval_steps_per_second": 0.568, "eval_wer": 0.3622822519565766, "step": 2000 }, { "epoch": 1.2528473804100229, "eval_loss": 0.45885559916496277, "eval_runtime": 194.4002, "eval_samples_per_second": 36.121, "eval_steps_per_second": 0.566, "eval_wer": 0.3536454111800582, "step": 2200 }, { "epoch": 1.366742596810934, "eval_loss": 0.4538777470588684, "eval_runtime": 194.3098, "eval_samples_per_second": 36.138, "eval_steps_per_second": 0.566, "eval_wer": 0.34074329980467455, "step": 2400 }, { "epoch": 1.4236902050113895, "grad_norm": 1.8585691452026367, "learning_rate": 5.666666666666666e-05, "loss": 0.5447, "step": 2500 }, { "epoch": 1.4806378132118452, "eval_loss": 0.4409582316875458, "eval_runtime": 193.3591, "eval_samples_per_second": 36.316, "eval_steps_per_second": 0.569, "eval_wer": 0.3357339321542938, "step": 2600 }, { "epoch": 1.5945330296127562, "eval_loss": 0.43471136689186096, "eval_runtime": 193.3767, "eval_samples_per_second": 36.313, "eval_steps_per_second": 0.569, "eval_wer": 0.3293426699796702, "step": 2800 }, { "epoch": 1.7084282460136673, "grad_norm": 1.961743712425232, "learning_rate": 1.222222222222222e-06, "loss": 0.5392, "step": 3000 }, { "epoch": 1.7084282460136673, "eval_loss": 0.4313787817955017, "eval_runtime": 192.9526, "eval_samples_per_second": 36.392, "eval_steps_per_second": 0.57, "eval_wer": 0.3285321356914123, "step": 3000 }, { "epoch": 1.7084282460136673, "step": 3000, "total_flos": 5.591115044514249e+18, "train_loss": 0.7769642333984375, "train_runtime": 4540.3637, "train_samples_per_second": 10.572, "train_steps_per_second": 0.661 } ], "logging_steps": 500, "max_steps": 3000, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.591115044514249e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }