{ "best_metric": null, "best_model_checkpoint": null, "epoch": 4.633204633204633, "eval_steps": 200, "global_step": 6000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.15444015444015444, "eval_loss": Infinity, "eval_runtime": 188.4417, "eval_samples_per_second": 37.264, "eval_steps_per_second": 4.659, "eval_wer": 1.0, "step": 200 }, { "epoch": 0.3088803088803089, "eval_loss": Infinity, "eval_runtime": 187.0753, "eval_samples_per_second": 37.536, "eval_steps_per_second": 4.693, "eval_wer": 0.8660615740199706, "step": 400 }, { "epoch": 0.3861003861003861, "grad_norm": 2.7789804935455322, "learning_rate": 0.00029699999999999996, "loss": 3.7305, "step": 500 }, { "epoch": 0.46332046332046334, "eval_loss": Infinity, "eval_runtime": 186.9186, "eval_samples_per_second": 37.567, "eval_steps_per_second": 4.697, "eval_wer": 0.7040110111279914, "step": 600 }, { "epoch": 0.6177606177606177, "eval_loss": Infinity, "eval_runtime": 187.3522, "eval_samples_per_second": 37.48, "eval_steps_per_second": 4.686, "eval_wer": 0.5505953540311376, "step": 800 }, { "epoch": 0.7722007722007722, "grad_norm": 11.684309959411621, "learning_rate": 0.0002731090909090909, "loss": 0.8464, "step": 1000 }, { "epoch": 0.7722007722007722, "eval_loss": Infinity, "eval_runtime": 187.5603, "eval_samples_per_second": 37.439, "eval_steps_per_second": 4.681, "eval_wer": 0.5168088504538195, "step": 1000 }, { "epoch": 0.9266409266409267, "eval_loss": Infinity, "eval_runtime": 187.4249, "eval_samples_per_second": 37.466, "eval_steps_per_second": 4.685, "eval_wer": 0.4824769844052303, "step": 1200 }, { "epoch": 1.0810810810810811, "eval_loss": Infinity, "eval_runtime": 188.7764, "eval_samples_per_second": 37.197, "eval_steps_per_second": 4.651, "eval_wer": 0.46011712308311586, "step": 1400 }, { "epoch": 1.1583011583011582, "grad_norm": 0.6005635261535645, "learning_rate": 0.0002458363636363636, "loss": 0.6629, "step": 1500 }, { "epoch": 1.2355212355212355, "eval_loss": Infinity, "eval_runtime": 188.0077, "eval_samples_per_second": 37.35, "eval_steps_per_second": 4.67, "eval_wer": 0.4445483230104008, "step": 1600 }, { "epoch": 1.3899613899613898, "eval_loss": Infinity, "eval_runtime": 188.204, "eval_samples_per_second": 37.311, "eval_steps_per_second": 4.665, "eval_wer": 0.41425473621336656, "step": 1800 }, { "epoch": 1.5444015444015444, "grad_norm": 0.4440418481826782, "learning_rate": 0.0002186181818181818, "loss": 0.5655, "step": 2000 }, { "epoch": 1.5444015444015444, "eval_loss": Infinity, "eval_runtime": 188.2865, "eval_samples_per_second": 37.294, "eval_steps_per_second": 4.663, "eval_wer": 0.41700751821121107, "step": 2000 }, { "epoch": 1.698841698841699, "eval_loss": Infinity, "eval_runtime": 189.1618, "eval_samples_per_second": 37.122, "eval_steps_per_second": 4.642, "eval_wer": 0.4047108929661226, "step": 2200 }, { "epoch": 1.8532818532818531, "eval_loss": Infinity, "eval_runtime": 188.2544, "eval_samples_per_second": 37.301, "eval_steps_per_second": 4.664, "eval_wer": 0.3966213496422682, "step": 2400 }, { "epoch": 1.9305019305019306, "grad_norm": 0.9740249514579773, "learning_rate": 0.00019150909090909088, "loss": 0.5524, "step": 2500 }, { "epoch": 2.0077220077220077, "eval_loss": Infinity, "eval_runtime": 189.1265, "eval_samples_per_second": 37.129, "eval_steps_per_second": 4.642, "eval_wer": 0.37794917741160583, "step": 2600 }, { "epoch": 2.1621621621621623, "eval_loss": Infinity, "eval_runtime": 188.7855, "eval_samples_per_second": 37.196, "eval_steps_per_second": 4.651, "eval_wer": 0.37366418656590444, "step": 2800 }, { "epoch": 2.3166023166023164, "grad_norm": 0.5873022675514221, "learning_rate": 0.00016429090909090907, "loss": 0.4773, "step": 3000 }, { "epoch": 2.3166023166023164, "eval_loss": Infinity, "eval_runtime": 189.7378, "eval_samples_per_second": 37.009, "eval_steps_per_second": 4.627, "eval_wer": 0.3698336644462623, "step": 3000 }, { "epoch": 2.471042471042471, "eval_loss": Infinity, "eval_runtime": 189.8812, "eval_samples_per_second": 36.981, "eval_steps_per_second": 4.624, "eval_wer": 0.37235271967070493, "step": 3200 }, { "epoch": 2.6254826254826256, "eval_loss": Infinity, "eval_runtime": 189.4769, "eval_samples_per_second": 37.06, "eval_steps_per_second": 4.634, "eval_wer": 0.3583940373703141, "step": 3400 }, { "epoch": 2.7027027027027026, "grad_norm": 1.4725390672683716, "learning_rate": 0.00013712727272727272, "loss": 0.4694, "step": 3500 }, { "epoch": 2.7799227799227797, "eval_loss": Infinity, "eval_runtime": 189.6126, "eval_samples_per_second": 37.033, "eval_steps_per_second": 4.63, "eval_wer": 0.3820783504083726, "step": 3600 }, { "epoch": 2.9343629343629343, "eval_loss": Infinity, "eval_runtime": 190.0131, "eval_samples_per_second": 36.955, "eval_steps_per_second": 4.621, "eval_wer": 0.4729850804409645, "step": 3800 }, { "epoch": 3.088803088803089, "grad_norm": 14.039852142333984, "learning_rate": 0.0001099090909090909, "loss": 0.6537, "step": 4000 }, { "epoch": 3.088803088803089, "eval_loss": Infinity, "eval_runtime": 189.7145, "eval_samples_per_second": 37.014, "eval_steps_per_second": 4.628, "eval_wer": 0.4753613026372171, "step": 4000 }, { "epoch": 3.2432432432432434, "eval_loss": Infinity, "eval_runtime": 189.6267, "eval_samples_per_second": 37.031, "eval_steps_per_second": 4.63, "eval_wer": 0.5899263760663784, "step": 4200 }, { "epoch": 3.3976833976833976, "eval_loss": Infinity, "eval_runtime": 189.2072, "eval_samples_per_second": 37.113, "eval_steps_per_second": 4.64, "eval_wer": 0.5957565605806812, "step": 4400 }, { "epoch": 3.474903474903475, "grad_norm": 15.850507736206055, "learning_rate": 8.269090909090907e-05, "loss": 0.8238, "step": 4500 }, { "epoch": 3.552123552123552, "eval_loss": Infinity, "eval_runtime": 188.9868, "eval_samples_per_second": 37.156, "eval_steps_per_second": 4.646, "eval_wer": 0.633646267513277, "step": 4600 }, { "epoch": 3.7065637065637067, "eval_loss": Infinity, "eval_runtime": 190.0461, "eval_samples_per_second": 36.949, "eval_steps_per_second": 4.62, "eval_wer": 0.6025865762923143, "step": 4800 }, { "epoch": 3.861003861003861, "grad_norm": 6.711777687072754, "learning_rate": 5.547272727272727e-05, "loss": 0.8682, "step": 5000 }, { "epoch": 3.861003861003861, "eval_loss": Infinity, "eval_runtime": 189.1358, "eval_samples_per_second": 37.127, "eval_steps_per_second": 4.642, "eval_wer": 0.5671380156596938, "step": 5000 }, { "epoch": 4.015444015444015, "eval_loss": Infinity, "eval_runtime": 190.8009, "eval_samples_per_second": 36.803, "eval_steps_per_second": 4.602, "eval_wer": 0.5377923207770117, "step": 5200 }, { "epoch": 4.1698841698841695, "eval_loss": Infinity, "eval_runtime": 189.3706, "eval_samples_per_second": 37.081, "eval_steps_per_second": 4.636, "eval_wer": 0.5373638216924416, "step": 5400 }, { "epoch": 4.2471042471042475, "grad_norm": 6.047791957855225, "learning_rate": 2.8309090909090903e-05, "loss": 0.855, "step": 5500 }, { "epoch": 4.324324324324325, "eval_loss": Infinity, "eval_runtime": 189.8451, "eval_samples_per_second": 36.988, "eval_steps_per_second": 4.625, "eval_wer": 0.5328451040733383, "step": 5600 }, { "epoch": 4.478764478764479, "eval_loss": Infinity, "eval_runtime": 190.1917, "eval_samples_per_second": 36.921, "eval_steps_per_second": 4.616, "eval_wer": 0.5225351564021659, "step": 5800 }, { "epoch": 4.633204633204633, "grad_norm": 9.171218872070312, "learning_rate": 1.0363636363636363e-06, "loss": 0.9644, "step": 6000 }, { "epoch": 4.633204633204633, "eval_loss": Infinity, "eval_runtime": 190.3777, "eval_samples_per_second": 36.885, "eval_steps_per_second": 4.612, "eval_wer": 0.5237557295521535, "step": 6000 }, { "epoch": 4.633204633204633, "step": 6000, "total_flos": 2.5308493485736165e+19, "train_loss": 0.9557839482625325, "train_runtime": 12299.0724, "train_samples_per_second": 15.611, "train_steps_per_second": 0.488 } ], "logging_steps": 500, "max_steps": 6000, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.5308493485736165e+19, "train_batch_size": 32, "trial_name": null, "trial_params": null }