{ "best_metric": 0.33828356862068176, "best_model_checkpoint": "/scratch/skscla001/results/mms-zeroshot-300m-genbed-m-model/checkpoint-3400", "epoch": 11.019283746556473, "eval_steps": 200, "global_step": 4000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.5509641873278237, "eval_loss": 2.310696840286255, "eval_runtime": 57.4366, "eval_samples_per_second": 16.871, "eval_steps_per_second": 2.124, "eval_wer": 1.0, "step": 200 }, { "epoch": 1.1019283746556474, "eval_loss": 0.4859784245491028, "eval_runtime": 55.9045, "eval_samples_per_second": 17.333, "eval_steps_per_second": 2.182, "eval_wer": 0.638015449896638, "step": 400 }, { "epoch": 1.3774104683195592, "grad_norm": 0.8601316213607788, "learning_rate": 0.0002889620018535681, "loss": 2.8087, "step": 500 }, { "epoch": 1.6528925619834711, "eval_loss": 0.438145250082016, "eval_runtime": 56.5281, "eval_samples_per_second": 17.142, "eval_steps_per_second": 2.158, "eval_wer": 0.5988466978565988, "step": 600 }, { "epoch": 2.203856749311295, "eval_loss": 0.42812615633010864, "eval_runtime": 56.0727, "eval_samples_per_second": 17.281, "eval_steps_per_second": 2.176, "eval_wer": 0.5854640409095855, "step": 800 }, { "epoch": 2.7548209366391183, "grad_norm": 0.6990365982055664, "learning_rate": 0.0002750602409638554, "loss": 0.6248, "step": 1000 }, { "epoch": 2.7548209366391183, "eval_loss": 0.4056081473827362, "eval_runtime": 56.3304, "eval_samples_per_second": 17.202, "eval_steps_per_second": 2.166, "eval_wer": 0.5655532586225656, "step": 1000 }, { "epoch": 3.3057851239669422, "eval_loss": 0.401683509349823, "eval_runtime": 56.6419, "eval_samples_per_second": 17.107, "eval_steps_per_second": 2.154, "eval_wer": 0.5513001849635513, "step": 1200 }, { "epoch": 3.8567493112947657, "eval_loss": 0.39044997096061707, "eval_runtime": 56.4862, "eval_samples_per_second": 17.155, "eval_steps_per_second": 2.16, "eval_wer": 0.5617451855075617, "step": 1400 }, { "epoch": 4.132231404958677, "grad_norm": 0.828954815864563, "learning_rate": 0.0002611584800741427, "loss": 0.578, "step": 1500 }, { "epoch": 4.40771349862259, "eval_loss": 0.37762871384620667, "eval_runtime": 56.693, "eval_samples_per_second": 17.092, "eval_steps_per_second": 2.152, "eval_wer": 0.5306277880535306, "step": 1600 }, { "epoch": 4.958677685950414, "eval_loss": 0.37221014499664307, "eval_runtime": 56.5211, "eval_samples_per_second": 17.144, "eval_steps_per_second": 2.158, "eval_wer": 0.5177891415515178, "step": 1800 }, { "epoch": 5.509641873278237, "grad_norm": 0.5989723801612854, "learning_rate": 0.00024725671918443, "loss": 0.5343, "step": 2000 }, { "epoch": 5.509641873278237, "eval_loss": 0.3658897578716278, "eval_runtime": 56.2923, "eval_samples_per_second": 17.214, "eval_steps_per_second": 2.167, "eval_wer": 0.5151778914155152, "step": 2000 }, { "epoch": 6.0606060606060606, "eval_loss": 0.36135414242744446, "eval_runtime": 56.2126, "eval_samples_per_second": 17.238, "eval_steps_per_second": 2.17, "eval_wer": 0.514960287237515, "step": 2200 }, { "epoch": 6.6115702479338845, "eval_loss": 0.3572969138622284, "eval_runtime": 56.7561, "eval_samples_per_second": 17.073, "eval_steps_per_second": 2.15, "eval_wer": 0.5015776302905016, "step": 2400 }, { "epoch": 6.887052341597796, "grad_norm": 0.6925222873687744, "learning_rate": 0.0002333549582947173, "loss": 0.5153, "step": 2500 }, { "epoch": 7.162534435261708, "eval_loss": 0.3624655604362488, "eval_runtime": 56.0335, "eval_samples_per_second": 17.293, "eval_steps_per_second": 2.177, "eval_wer": 0.5025568490915026, "step": 2600 }, { "epoch": 7.7134986225895315, "eval_loss": 0.3545249402523041, "eval_runtime": 57.0392, "eval_samples_per_second": 16.988, "eval_steps_per_second": 2.139, "eval_wer": 0.48667174409748665, "step": 2800 }, { "epoch": 8.264462809917354, "grad_norm": 1.5090163946151733, "learning_rate": 0.00021945319740500463, "loss": 0.4935, "step": 3000 }, { "epoch": 8.264462809917354, "eval_loss": 0.350554883480072, "eval_runtime": 56.6484, "eval_samples_per_second": 17.106, "eval_steps_per_second": 2.154, "eval_wer": 0.48144924382548143, "step": 3000 }, { "epoch": 8.81542699724518, "eval_loss": 0.3481844663619995, "eval_runtime": 56.3422, "eval_samples_per_second": 17.198, "eval_steps_per_second": 2.165, "eval_wer": 0.49222065063649223, "step": 3200 }, { "epoch": 9.366391184573002, "eval_loss": 0.33828356862068176, "eval_runtime": 56.3849, "eval_samples_per_second": 17.185, "eval_steps_per_second": 2.164, "eval_wer": 0.4761179414644761, "step": 3400 }, { "epoch": 9.641873278236915, "grad_norm": 0.5624499917030334, "learning_rate": 0.0002055514365152919, "loss": 0.4731, "step": 3500 }, { "epoch": 9.917355371900827, "eval_loss": 0.34193041920661926, "eval_runtime": 56.2658, "eval_samples_per_second": 17.222, "eval_steps_per_second": 2.168, "eval_wer": 0.4644761179414645, "step": 3600 }, { "epoch": 10.46831955922865, "eval_loss": 0.3391243517398834, "eval_runtime": 56.3644, "eval_samples_per_second": 17.192, "eval_steps_per_second": 2.164, "eval_wer": 0.4686105973234686, "step": 3800 }, { "epoch": 11.019283746556473, "grad_norm": 0.5695391893386841, "learning_rate": 0.00019164967562557923, "loss": 0.4619, "step": 4000 }, { "epoch": 11.019283746556473, "eval_loss": 0.34022417664527893, "eval_runtime": 56.9717, "eval_samples_per_second": 17.008, "eval_steps_per_second": 2.141, "eval_wer": 0.4576215863344576, "step": 4000 }, { "epoch": 11.019283746556473, "step": 4000, "total_flos": 6.332346090903939e+18, "train_loss": 0.8111911582946777, "train_runtime": 3985.8224, "train_samples_per_second": 21.827, "train_steps_per_second": 2.732 } ], "logging_steps": 500, "max_steps": 10890, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 200, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 3 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6.332346090903939e+18, "train_batch_size": 8, "trial_name": null, "trial_params": null }