{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9619084263178146, "eval_steps": 500, "global_step": 15000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03206361421059382, "grad_norm": 4.760500431060791, "learning_rate": 9.859999999999998e-05, "loss": 5.7778, "step": 500 }, { "epoch": 0.03206361421059382, "eval_loss": 2.885216236114502, "eval_runtime": 184.1165, "eval_samples_per_second": 38.139, "eval_steps_per_second": 0.597, "eval_wer": 1.0, "step": 500 }, { "epoch": 0.06412722842118763, "grad_norm": 5.567607402801514, "learning_rate": 0.0001986, "loss": 1.4914, "step": 1000 }, { "epoch": 0.06412722842118763, "eval_loss": 1.2011666297912598, "eval_runtime": 186.6136, "eval_samples_per_second": 37.629, "eval_steps_per_second": 0.589, "eval_wer": 0.7805710944870381, "step": 1000 }, { "epoch": 0.09619084263178146, "grad_norm": 5.768416881561279, "learning_rate": 0.00029859999999999994, "loss": 0.8803, "step": 1500 }, { "epoch": 0.09619084263178146, "eval_loss": 1.1211999654769897, "eval_runtime": 186.8607, "eval_samples_per_second": 37.579, "eval_steps_per_second": 0.589, "eval_wer": 0.7589657050983936, "step": 1500 }, { "epoch": 0.12825445684237527, "grad_norm": 9.690132141113281, "learning_rate": 0.00028904444444444443, "loss": 0.7723, "step": 2000 }, { "epoch": 0.12825445684237527, "eval_loss": 0.9680694937705994, "eval_runtime": 188.1934, "eval_samples_per_second": 37.313, "eval_steps_per_second": 0.585, "eval_wer": 0.6770220173002565, "step": 2000 }, { "epoch": 0.16031807105296908, "grad_norm": 5.588994979858398, "learning_rate": 0.0002779333333333333, "loss": 0.6988, "step": 2500 }, { "epoch": 0.16031807105296908, "eval_loss": 0.9452723860740662, "eval_runtime": 187.5323, "eval_samples_per_second": 37.444, "eval_steps_per_second": 0.587, "eval_wer": 0.6598812102207045, "step": 2500 }, { "epoch": 0.19238168526356292, "grad_norm": 5.276751518249512, "learning_rate": 0.0002668222222222222, "loss": 0.6392, "step": 3000 }, { "epoch": 0.19238168526356292, "eval_loss": 0.8690649271011353, "eval_runtime": 187.6799, "eval_samples_per_second": 37.415, "eval_steps_per_second": 0.586, "eval_wer": 0.6200055807278864, "step": 3000 }, { "epoch": 0.22444529947415673, "grad_norm": 6.366265296936035, "learning_rate": 0.0002557111111111111, "loss": 0.6114, "step": 3500 }, { "epoch": 0.22444529947415673, "eval_loss": 0.8661066293716431, "eval_runtime": 188.4956, "eval_samples_per_second": 37.253, "eval_steps_per_second": 0.584, "eval_wer": 0.619155184097583, "step": 3500 }, { "epoch": 0.25650891368475054, "grad_norm": 11.82204818725586, "learning_rate": 0.0002446222222222222, "loss": 0.5807, "step": 4000 }, { "epoch": 0.25650891368475054, "eval_loss": 0.7884626984596252, "eval_runtime": 188.2678, "eval_samples_per_second": 37.298, "eval_steps_per_second": 0.584, "eval_wer": 0.5793592792888558, "step": 4000 }, { "epoch": 0.2885725278953444, "grad_norm": 14.343709945678711, "learning_rate": 0.00023355555555555553, "loss": 0.5534, "step": 4500 }, { "epoch": 0.2885725278953444, "eval_loss": 0.7738627791404724, "eval_runtime": 188.8344, "eval_samples_per_second": 37.186, "eval_steps_per_second": 0.583, "eval_wer": 0.549037324439602, "step": 4500 }, { "epoch": 0.32063614210593816, "grad_norm": 8.92720890045166, "learning_rate": 0.00022244444444444444, "loss": 0.5358, "step": 5000 }, { "epoch": 0.32063614210593816, "eval_loss": 0.7416187524795532, "eval_runtime": 189.1576, "eval_samples_per_second": 37.122, "eval_steps_per_second": 0.582, "eval_wer": 0.5414767668983111, "step": 5000 }, { "epoch": 0.352699756316532, "grad_norm": 7.230262279510498, "learning_rate": 0.0002113333333333333, "loss": 0.5189, "step": 5500 }, { "epoch": 0.352699756316532, "eval_loss": 0.7361556887626648, "eval_runtime": 188.5731, "eval_samples_per_second": 37.238, "eval_steps_per_second": 0.583, "eval_wer": 0.5303285985729281, "step": 5500 }, { "epoch": 0.38476337052712584, "grad_norm": 7.765280246734619, "learning_rate": 0.0002002444444444444, "loss": 0.4991, "step": 6000 }, { "epoch": 0.38476337052712584, "eval_loss": 0.7187824845314026, "eval_runtime": 188.8496, "eval_samples_per_second": 37.183, "eval_steps_per_second": 0.582, "eval_wer": 0.5065839301611768, "step": 6000 }, { "epoch": 0.4168269847377196, "grad_norm": 5.903895854949951, "learning_rate": 0.00018913333333333331, "loss": 0.48, "step": 6500 }, { "epoch": 0.4168269847377196, "eval_loss": 0.6984608173370361, "eval_runtime": 189.9958, "eval_samples_per_second": 36.959, "eval_steps_per_second": 0.579, "eval_wer": 0.5177719608286052, "step": 6500 }, { "epoch": 0.44889059894831346, "grad_norm": 3.641240358352661, "learning_rate": 0.00017804444444444444, "loss": 0.463, "step": 7000 }, { "epoch": 0.44889059894831346, "eval_loss": 0.6681538820266724, "eval_runtime": 189.2879, "eval_samples_per_second": 37.097, "eval_steps_per_second": 0.581, "eval_wer": 0.49330977026003536, "step": 7000 }, { "epoch": 0.4809542131589073, "grad_norm": 6.733245849609375, "learning_rate": 0.00016693333333333332, "loss": 0.4477, "step": 7500 }, { "epoch": 0.4809542131589073, "eval_loss": 0.6624513268470764, "eval_runtime": 189.8301, "eval_samples_per_second": 36.991, "eval_steps_per_second": 0.579, "eval_wer": 0.48671919637518435, "step": 7500 }, { "epoch": 0.5130178273695011, "grad_norm": 7.44530725479126, "learning_rate": 0.00015584444444444442, "loss": 0.4431, "step": 8000 }, { "epoch": 0.5130178273695011, "eval_loss": 0.6373856663703918, "eval_runtime": 189.4419, "eval_samples_per_second": 37.067, "eval_steps_per_second": 0.581, "eval_wer": 0.47356462350017936, "step": 8000 }, { "epoch": 0.5450814415800949, "grad_norm": 3.664278745651245, "learning_rate": 0.00014473333333333332, "loss": 0.4392, "step": 8500 }, { "epoch": 0.5450814415800949, "eval_loss": 0.6391619443893433, "eval_runtime": 189.4208, "eval_samples_per_second": 37.071, "eval_steps_per_second": 0.581, "eval_wer": 0.4772452464157111, "step": 8500 }, { "epoch": 0.5771450557906888, "grad_norm": 11.637319564819336, "learning_rate": 0.00013362222222222222, "loss": 0.4197, "step": 9000 }, { "epoch": 0.5771450557906888, "eval_loss": 0.6158761978149414, "eval_runtime": 188.8242, "eval_samples_per_second": 37.188, "eval_steps_per_second": 0.583, "eval_wer": 0.45473631060736924, "step": 9000 }, { "epoch": 0.6092086700012825, "grad_norm": 7.102973461151123, "learning_rate": 0.0001225111111111111, "loss": 0.4147, "step": 9500 }, { "epoch": 0.6092086700012825, "eval_loss": 0.5994922518730164, "eval_runtime": 188.4237, "eval_samples_per_second": 37.267, "eval_steps_per_second": 0.584, "eval_wer": 0.45217183326911065, "step": 9500 }, { "epoch": 0.6412722842118763, "grad_norm": 6.166309833526611, "learning_rate": 0.0001114, "loss": 0.3912, "step": 10000 }, { "epoch": 0.6412722842118763, "eval_loss": 0.5847700834274292, "eval_runtime": 188.8879, "eval_samples_per_second": 37.175, "eval_steps_per_second": 0.582, "eval_wer": 0.4285866142255411, "step": 10000 }, { "epoch": 0.6733358984224702, "grad_norm": 8.538312911987305, "learning_rate": 0.00010028888888888889, "loss": 0.3742, "step": 10500 }, { "epoch": 0.6733358984224702, "eval_loss": 0.585001528263092, "eval_runtime": 189.3338, "eval_samples_per_second": 37.088, "eval_steps_per_second": 0.581, "eval_wer": 0.4259025498611462, "step": 10500 }, { "epoch": 0.705399512633064, "grad_norm": 16.837343215942383, "learning_rate": 8.917777777777777e-05, "loss": 0.402, "step": 11000 }, { "epoch": 0.705399512633064, "eval_loss": 0.6351918578147888, "eval_runtime": 188.7345, "eval_samples_per_second": 37.206, "eval_steps_per_second": 0.583, "eval_wer": 0.44894298356342766, "step": 11000 }, { "epoch": 0.7374631268436578, "grad_norm": 4.569055557250977, "learning_rate": 7.806666666666666e-05, "loss": 0.5746, "step": 11500 }, { "epoch": 0.7374631268436578, "eval_loss": 0.7711716294288635, "eval_runtime": 188.4281, "eval_samples_per_second": 37.266, "eval_steps_per_second": 0.584, "eval_wer": 0.5170810135664837, "step": 11500 }, { "epoch": 0.7695267410542517, "grad_norm": NaN, "learning_rate": 7.275555555555556e-05, "loss": 0.5783, "step": 12000 }, { "epoch": 0.7695267410542517, "eval_loss": NaN, "eval_runtime": 184.9484, "eval_samples_per_second": 37.967, "eval_steps_per_second": 0.595, "eval_wer": 1.0, "step": 12000 }, { "epoch": 0.8015903552648455, "grad_norm": NaN, "learning_rate": 7.275555555555556e-05, "loss": 0.0, "step": 12500 }, { "epoch": 0.8015903552648455, "eval_loss": NaN, "eval_runtime": 184.8998, "eval_samples_per_second": 37.977, "eval_steps_per_second": 0.595, "eval_wer": 1.0, "step": 12500 }, { "epoch": 0.8336539694754392, "grad_norm": NaN, "learning_rate": 7.275555555555556e-05, "loss": 0.0, "step": 13000 }, { "epoch": 0.8336539694754392, "eval_loss": NaN, "eval_runtime": 184.8775, "eval_samples_per_second": 37.982, "eval_steps_per_second": 0.595, "eval_wer": 1.0, "step": 13000 }, { "epoch": 0.8657175836860331, "grad_norm": NaN, "learning_rate": 7.275555555555556e-05, "loss": 0.0, "step": 13500 }, { "epoch": 0.8657175836860331, "eval_loss": NaN, "eval_runtime": 185.2742, "eval_samples_per_second": 37.901, "eval_steps_per_second": 0.594, "eval_wer": 1.0, "step": 13500 }, { "epoch": 0.8977811978966269, "grad_norm": NaN, "learning_rate": 7.275555555555556e-05, "loss": 0.0, "step": 14000 }, { "epoch": 0.8977811978966269, "eval_loss": NaN, "eval_runtime": 185.0536, "eval_samples_per_second": 37.946, "eval_steps_per_second": 0.594, "eval_wer": 1.0, "step": 14000 }, { "epoch": 0.9298448121072207, "grad_norm": NaN, "learning_rate": 7.275555555555556e-05, "loss": 0.0, "step": 14500 }, { "epoch": 0.9298448121072207, "eval_loss": NaN, "eval_runtime": 184.7301, "eval_samples_per_second": 38.012, "eval_steps_per_second": 0.595, "eval_wer": 1.0, "step": 14500 }, { "epoch": 0.9619084263178146, "grad_norm": NaN, "learning_rate": 7.275555555555556e-05, "loss": 0.0, "step": 15000 }, { "epoch": 0.9619084263178146, "eval_loss": NaN, "eval_runtime": 185.3079, "eval_samples_per_second": 37.894, "eval_steps_per_second": 0.594, "eval_wer": 1.0, "step": 15000 }, { "epoch": 0.9619084263178146, "step": 15000, "total_flos": 1.7109669148845115e+19, "train_loss": 0.6328924499511719, "train_runtime": 11517.021, "train_samples_per_second": 10.419, "train_steps_per_second": 1.302 } ], "logging_steps": 500, "max_steps": 15000, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.7109669148845115e+19, "train_batch_size": 8, "trial_name": null, "trial_params": null }