{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.7084282460136673, "eval_steps": 200, "global_step": 3000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.11389521640091116, "eval_loss": 0.7527692914009094, "eval_runtime": 198.1592, "eval_samples_per_second": 35.436, "eval_steps_per_second": 0.555, "eval_wer": 0.5048698494532216, "step": 200 }, { "epoch": 0.22779043280182232, "eval_loss": 0.6966450810432434, "eval_runtime": 197.214, "eval_samples_per_second": 35.606, "eval_steps_per_second": 0.558, "eval_wer": 0.5050160113740549, "step": 400 }, { "epoch": 0.2847380410022779, "grad_norm": 4.219395637512207, "learning_rate": 0.00027833333333333334, "loss": 2.117, "step": 500 }, { "epoch": 0.3416856492027335, "eval_loss": 0.6128434538841248, "eval_runtime": 200.109, "eval_samples_per_second": 35.091, "eval_steps_per_second": 0.55, "eval_wer": 0.47608923849639245, "step": 600 }, { "epoch": 0.45558086560364464, "eval_loss": 0.6331803202629089, "eval_runtime": 200.832, "eval_samples_per_second": 34.965, "eval_steps_per_second": 0.548, "eval_wer": 0.5017472993263264, "step": 800 }, { "epoch": 0.5694760820045558, "grad_norm": 3.2030014991760254, "learning_rate": 0.00022288888888888887, "loss": 0.7606, "step": 1000 }, { "epoch": 0.5694760820045558, "eval_loss": 0.5895215272903442, "eval_runtime": 203.9306, "eval_samples_per_second": 34.433, "eval_steps_per_second": 0.539, "eval_wer": 0.457659549024037, "step": 1000 }, { "epoch": 0.683371298405467, "eval_loss": 0.5552608370780945, "eval_runtime": 205.5709, "eval_samples_per_second": 34.159, "eval_steps_per_second": 0.535, "eval_wer": 0.4211057813683413, "step": 1200 }, { "epoch": 0.7972665148063781, "eval_loss": 0.530360996723175, "eval_runtime": 215.1898, "eval_samples_per_second": 32.632, "eval_steps_per_second": 0.511, "eval_wer": 0.419604299817962, "step": 1400 }, { "epoch": 0.8542141230068337, "grad_norm": 2.892026424407959, "learning_rate": 0.00016766666666666666, "loss": 0.7049, "step": 1500 }, { "epoch": 0.9111617312072893, "eval_loss": 0.5060806274414062, "eval_runtime": 208.7711, "eval_samples_per_second": 33.635, "eval_steps_per_second": 0.527, "eval_wer": 0.38730251531378307, "step": 1600 }, { "epoch": 1.0250569476082005, "eval_loss": 0.5090161561965942, "eval_runtime": 207.1232, "eval_samples_per_second": 33.903, "eval_steps_per_second": 0.531, "eval_wer": 0.3959127811956045, "step": 1800 }, { "epoch": 1.1389521640091116, "grad_norm": 2.3204939365386963, "learning_rate": 0.00011233333333333333, "loss": 0.6136, "step": 2000 }, { "epoch": 1.1389521640091116, "eval_loss": 0.4839297831058502, "eval_runtime": 206.3623, "eval_samples_per_second": 34.028, "eval_steps_per_second": 0.533, "eval_wer": 0.3758088733573393, "step": 2000 }, { "epoch": 1.2528473804100229, "eval_loss": 0.46924272179603577, "eval_runtime": 205.266, "eval_samples_per_second": 34.209, "eval_steps_per_second": 0.536, "eval_wer": 0.3658565752933204, "step": 2200 }, { "epoch": 1.366742596810934, "eval_loss": 0.4569305181503296, "eval_runtime": 207.0588, "eval_samples_per_second": 33.913, "eval_steps_per_second": 0.531, "eval_wer": 0.35436293333687663, "step": 2400 }, { "epoch": 1.4236902050113895, "grad_norm": 3.092404365539551, "learning_rate": 5.688888888888888e-05, "loss": 0.5388, "step": 2500 }, { "epoch": 1.4806378132118452, "eval_loss": 0.4487648606300354, "eval_runtime": 205.0091, "eval_samples_per_second": 34.252, "eval_steps_per_second": 0.537, "eval_wer": 0.3484765941614956, "step": 2600 }, { "epoch": 1.5945330296127562, "eval_loss": 0.441054105758667, "eval_runtime": 211.0185, "eval_samples_per_second": 33.277, "eval_steps_per_second": 0.521, "eval_wer": 0.3423112185917963, "step": 2800 }, { "epoch": 1.7084282460136673, "grad_norm": 0.7985823154449463, "learning_rate": 1.4444444444444445e-06, "loss": 0.5275, "step": 3000 }, { "epoch": 1.7084282460136673, "eval_loss": 0.4376124143600464, "eval_runtime": 210.3633, "eval_samples_per_second": 33.38, "eval_steps_per_second": 0.523, "eval_wer": 0.339454417411871, "step": 3000 }, { "epoch": 1.7084282460136673, "step": 3000, "total_flos": 5.591115044514249e+18, "train_loss": 0.8770465799967448, "train_runtime": 4793.5663, "train_samples_per_second": 10.013, "train_steps_per_second": 0.626 } ], "logging_steps": 500, "max_steps": 3000, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.591115044514249e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }