{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.088803088803089, "eval_steps": 200, "global_step": 4000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.15444015444015444, "eval_loss": Infinity, "eval_runtime": 182.0736, "eval_samples_per_second": 38.567, "eval_steps_per_second": 4.822, "eval_wer": 1.0, "step": 200 }, { "epoch": 0.3088803088803089, "eval_loss": Infinity, "eval_runtime": 177.405, "eval_samples_per_second": 39.582, "eval_steps_per_second": 4.949, "eval_wer": 0.8236661342890161, "step": 400 }, { "epoch": 0.3861003861003861, "grad_norm": 3.3234875202178955, "learning_rate": 0.00029699999999999996, "loss": 3.6679, "step": 500 }, { "epoch": 0.46332046332046334, "eval_loss": Infinity, "eval_runtime": 177.3768, "eval_samples_per_second": 39.588, "eval_steps_per_second": 4.95, "eval_wer": 0.7189305701634789, "step": 600 }, { "epoch": 0.6177606177606177, "eval_loss": Infinity, "eval_runtime": 177.3999, "eval_samples_per_second": 39.583, "eval_steps_per_second": 4.949, "eval_wer": 0.5541142404529106, "step": 800 }, { "epoch": 0.7722007722007722, "grad_norm": 3.4127838611602783, "learning_rate": 0.0002577428571428571, "loss": 0.8341, "step": 1000 }, { "epoch": 0.7722007722007722, "eval_loss": Infinity, "eval_runtime": 178.223, "eval_samples_per_second": 39.4, "eval_steps_per_second": 4.926, "eval_wer": 0.512381026579928, "step": 1000 }, { "epoch": 0.9266409266409267, "eval_loss": Infinity, "eval_runtime": 177.4709, "eval_samples_per_second": 39.567, "eval_steps_per_second": 4.947, "eval_wer": 0.489462817965798, "step": 1200 }, { "epoch": 1.0810810810810811, "eval_loss": Infinity, "eval_runtime": 178.0824, "eval_samples_per_second": 39.431, "eval_steps_per_second": 4.93, "eval_wer": 0.45271577525872253, "step": 1400 }, { "epoch": 1.1583011583011582, "grad_norm": 0.5421157479286194, "learning_rate": 0.00021488571428571426, "loss": 0.6524, "step": 1500 }, { "epoch": 1.2355212355212355, "eval_loss": Infinity, "eval_runtime": 176.6316, "eval_samples_per_second": 39.755, "eval_steps_per_second": 4.971, "eval_wer": 0.4340436030280602, "step": 1600 }, { "epoch": 1.3899613899613898, "eval_loss": Infinity, "eval_runtime": 177.018, "eval_samples_per_second": 39.668, "eval_steps_per_second": 4.96, "eval_wer": 0.40710009998311975, "step": 1800 }, { "epoch": 1.5444015444015444, "grad_norm": 0.6360776424407959, "learning_rate": 0.0001721142857142857, "loss": 0.5566, "step": 2000 }, { "epoch": 1.5444015444015444, "eval_loss": Infinity, "eval_runtime": 176.5522, "eval_samples_per_second": 39.773, "eval_steps_per_second": 4.973, "eval_wer": 0.3941931881630374, "step": 2000 }, { "epoch": 1.698841698841699, "eval_loss": Infinity, "eval_runtime": 177.2873, "eval_samples_per_second": 39.608, "eval_steps_per_second": 4.952, "eval_wer": 0.39070027138275354, "step": 2200 }, { "epoch": 1.8532818532818531, "eval_loss": Infinity, "eval_runtime": 177.1067, "eval_samples_per_second": 39.648, "eval_steps_per_second": 4.957, "eval_wer": 0.37781932920416034, "step": 2400 }, { "epoch": 1.9305019305019306, "grad_norm": 3.162639617919922, "learning_rate": 0.0001295142857142857, "loss": 0.5324, "step": 2500 }, { "epoch": 2.0077220077220077, "eval_loss": Infinity, "eval_runtime": 177.9344, "eval_samples_per_second": 39.464, "eval_steps_per_second": 4.934, "eval_wer": 0.3624453014426136, "step": 2600 }, { "epoch": 2.1621621621621623, "eval_loss": Infinity, "eval_runtime": 178.1586, "eval_samples_per_second": 39.414, "eval_steps_per_second": 4.928, "eval_wer": 0.35340786620440706, "step": 2800 }, { "epoch": 2.3166023166023164, "grad_norm": 0.8761777877807617, "learning_rate": 8.674285714285714e-05, "loss": 0.4444, "step": 3000 }, { "epoch": 2.3166023166023164, "eval_loss": Infinity, "eval_runtime": 177.4921, "eval_samples_per_second": 39.562, "eval_steps_per_second": 4.947, "eval_wer": 0.34959032890550945, "step": 3000 }, { "epoch": 2.471042471042471, "eval_loss": Infinity, "eval_runtime": 177.8631, "eval_samples_per_second": 39.48, "eval_steps_per_second": 4.936, "eval_wer": 0.3415916793268669, "step": 3200 }, { "epoch": 2.6254826254826256, "eval_loss": Infinity, "eval_runtime": 177.4214, "eval_samples_per_second": 39.578, "eval_steps_per_second": 4.949, "eval_wer": 0.3406697570540039, "step": 3400 }, { "epoch": 2.7027027027027026, "grad_norm": 3.2265968322753906, "learning_rate": 4.405714285714285e-05, "loss": 0.4254, "step": 3500 }, { "epoch": 2.7799227799227797, "eval_loss": Infinity, "eval_runtime": 178.3037, "eval_samples_per_second": 39.382, "eval_steps_per_second": 4.924, "eval_wer": 0.3346448002285328, "step": 3600 }, { "epoch": 2.9343629343629343, "eval_loss": Infinity, "eval_runtime": 177.742, "eval_samples_per_second": 39.507, "eval_steps_per_second": 4.94, "eval_wer": 0.33778712684871387, "step": 3800 }, { "epoch": 3.088803088803089, "grad_norm": 3.3560454845428467, "learning_rate": 1.2857142857142856e-06, "loss": 0.4273, "step": 4000 }, { "epoch": 3.088803088803089, "eval_loss": Infinity, "eval_runtime": 177.2694, "eval_samples_per_second": 39.612, "eval_steps_per_second": 4.953, "eval_wer": 0.33521613234129305, "step": 4000 }, { "epoch": 3.088803088803089, "step": 4000, "total_flos": 1.6887033849357677e+19, "train_loss": 0.9425676002502441, "train_runtime": 7915.8947, "train_samples_per_second": 16.17, "train_steps_per_second": 0.505 } ], "logging_steps": 500, "max_steps": 4000, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.6887033849357677e+19, "train_batch_size": 32, "trial_name": null, "trial_params": null }