|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9619084263178146, |
|
"eval_steps": 500, |
|
"global_step": 15000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03206361421059382, |
|
"grad_norm": 4.760500431060791, |
|
"learning_rate": 9.859999999999998e-05, |
|
"loss": 5.7778, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.03206361421059382, |
|
"eval_loss": 2.885216236114502, |
|
"eval_runtime": 184.1165, |
|
"eval_samples_per_second": 38.139, |
|
"eval_steps_per_second": 0.597, |
|
"eval_wer": 1.0, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.06412722842118763, |
|
"grad_norm": 5.567607402801514, |
|
"learning_rate": 0.0001986, |
|
"loss": 1.4914, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.06412722842118763, |
|
"eval_loss": 1.2011666297912598, |
|
"eval_runtime": 186.6136, |
|
"eval_samples_per_second": 37.629, |
|
"eval_steps_per_second": 0.589, |
|
"eval_wer": 0.7805710944870381, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.09619084263178146, |
|
"grad_norm": 5.768416881561279, |
|
"learning_rate": 0.00029859999999999994, |
|
"loss": 0.8803, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.09619084263178146, |
|
"eval_loss": 1.1211999654769897, |
|
"eval_runtime": 186.8607, |
|
"eval_samples_per_second": 37.579, |
|
"eval_steps_per_second": 0.589, |
|
"eval_wer": 0.7589657050983936, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.12825445684237527, |
|
"grad_norm": 9.690132141113281, |
|
"learning_rate": 0.00028904444444444443, |
|
"loss": 0.7723, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.12825445684237527, |
|
"eval_loss": 0.9680694937705994, |
|
"eval_runtime": 188.1934, |
|
"eval_samples_per_second": 37.313, |
|
"eval_steps_per_second": 0.585, |
|
"eval_wer": 0.6770220173002565, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.16031807105296908, |
|
"grad_norm": 5.588994979858398, |
|
"learning_rate": 0.0002779333333333333, |
|
"loss": 0.6988, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.16031807105296908, |
|
"eval_loss": 0.9452723860740662, |
|
"eval_runtime": 187.5323, |
|
"eval_samples_per_second": 37.444, |
|
"eval_steps_per_second": 0.587, |
|
"eval_wer": 0.6598812102207045, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.19238168526356292, |
|
"grad_norm": 5.276751518249512, |
|
"learning_rate": 0.0002668222222222222, |
|
"loss": 0.6392, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.19238168526356292, |
|
"eval_loss": 0.8690649271011353, |
|
"eval_runtime": 187.6799, |
|
"eval_samples_per_second": 37.415, |
|
"eval_steps_per_second": 0.586, |
|
"eval_wer": 0.6200055807278864, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.22444529947415673, |
|
"grad_norm": 6.366265296936035, |
|
"learning_rate": 0.0002557111111111111, |
|
"loss": 0.6114, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.22444529947415673, |
|
"eval_loss": 0.8661066293716431, |
|
"eval_runtime": 188.4956, |
|
"eval_samples_per_second": 37.253, |
|
"eval_steps_per_second": 0.584, |
|
"eval_wer": 0.619155184097583, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.25650891368475054, |
|
"grad_norm": 11.82204818725586, |
|
"learning_rate": 0.0002446222222222222, |
|
"loss": 0.5807, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.25650891368475054, |
|
"eval_loss": 0.7884626984596252, |
|
"eval_runtime": 188.2678, |
|
"eval_samples_per_second": 37.298, |
|
"eval_steps_per_second": 0.584, |
|
"eval_wer": 0.5793592792888558, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.2885725278953444, |
|
"grad_norm": 14.343709945678711, |
|
"learning_rate": 0.00023355555555555553, |
|
"loss": 0.5534, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.2885725278953444, |
|
"eval_loss": 0.7738627791404724, |
|
"eval_runtime": 188.8344, |
|
"eval_samples_per_second": 37.186, |
|
"eval_steps_per_second": 0.583, |
|
"eval_wer": 0.549037324439602, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.32063614210593816, |
|
"grad_norm": 8.92720890045166, |
|
"learning_rate": 0.00022244444444444444, |
|
"loss": 0.5358, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.32063614210593816, |
|
"eval_loss": 0.7416187524795532, |
|
"eval_runtime": 189.1576, |
|
"eval_samples_per_second": 37.122, |
|
"eval_steps_per_second": 0.582, |
|
"eval_wer": 0.5414767668983111, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.352699756316532, |
|
"grad_norm": 7.230262279510498, |
|
"learning_rate": 0.0002113333333333333, |
|
"loss": 0.5189, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.352699756316532, |
|
"eval_loss": 0.7361556887626648, |
|
"eval_runtime": 188.5731, |
|
"eval_samples_per_second": 37.238, |
|
"eval_steps_per_second": 0.583, |
|
"eval_wer": 0.5303285985729281, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.38476337052712584, |
|
"grad_norm": 7.765280246734619, |
|
"learning_rate": 0.0002002444444444444, |
|
"loss": 0.4991, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.38476337052712584, |
|
"eval_loss": 0.7187824845314026, |
|
"eval_runtime": 188.8496, |
|
"eval_samples_per_second": 37.183, |
|
"eval_steps_per_second": 0.582, |
|
"eval_wer": 0.5065839301611768, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.4168269847377196, |
|
"grad_norm": 5.903895854949951, |
|
"learning_rate": 0.00018913333333333331, |
|
"loss": 0.48, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.4168269847377196, |
|
"eval_loss": 0.6984608173370361, |
|
"eval_runtime": 189.9958, |
|
"eval_samples_per_second": 36.959, |
|
"eval_steps_per_second": 0.579, |
|
"eval_wer": 0.5177719608286052, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.44889059894831346, |
|
"grad_norm": 3.641240358352661, |
|
"learning_rate": 0.00017804444444444444, |
|
"loss": 0.463, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.44889059894831346, |
|
"eval_loss": 0.6681538820266724, |
|
"eval_runtime": 189.2879, |
|
"eval_samples_per_second": 37.097, |
|
"eval_steps_per_second": 0.581, |
|
"eval_wer": 0.49330977026003536, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.4809542131589073, |
|
"grad_norm": 6.733245849609375, |
|
"learning_rate": 0.00016693333333333332, |
|
"loss": 0.4477, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.4809542131589073, |
|
"eval_loss": 0.6624513268470764, |
|
"eval_runtime": 189.8301, |
|
"eval_samples_per_second": 36.991, |
|
"eval_steps_per_second": 0.579, |
|
"eval_wer": 0.48671919637518435, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.5130178273695011, |
|
"grad_norm": 7.44530725479126, |
|
"learning_rate": 0.00015584444444444442, |
|
"loss": 0.4431, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.5130178273695011, |
|
"eval_loss": 0.6373856663703918, |
|
"eval_runtime": 189.4419, |
|
"eval_samples_per_second": 37.067, |
|
"eval_steps_per_second": 0.581, |
|
"eval_wer": 0.47356462350017936, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.5450814415800949, |
|
"grad_norm": 3.664278745651245, |
|
"learning_rate": 0.00014473333333333332, |
|
"loss": 0.4392, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.5450814415800949, |
|
"eval_loss": 0.6391619443893433, |
|
"eval_runtime": 189.4208, |
|
"eval_samples_per_second": 37.071, |
|
"eval_steps_per_second": 0.581, |
|
"eval_wer": 0.4772452464157111, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.5771450557906888, |
|
"grad_norm": 11.637319564819336, |
|
"learning_rate": 0.00013362222222222222, |
|
"loss": 0.4197, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.5771450557906888, |
|
"eval_loss": 0.6158761978149414, |
|
"eval_runtime": 188.8242, |
|
"eval_samples_per_second": 37.188, |
|
"eval_steps_per_second": 0.583, |
|
"eval_wer": 0.45473631060736924, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.6092086700012825, |
|
"grad_norm": 7.102973461151123, |
|
"learning_rate": 0.0001225111111111111, |
|
"loss": 0.4147, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.6092086700012825, |
|
"eval_loss": 0.5994922518730164, |
|
"eval_runtime": 188.4237, |
|
"eval_samples_per_second": 37.267, |
|
"eval_steps_per_second": 0.584, |
|
"eval_wer": 0.45217183326911065, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.6412722842118763, |
|
"grad_norm": 6.166309833526611, |
|
"learning_rate": 0.0001114, |
|
"loss": 0.3912, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.6412722842118763, |
|
"eval_loss": 0.5847700834274292, |
|
"eval_runtime": 188.8879, |
|
"eval_samples_per_second": 37.175, |
|
"eval_steps_per_second": 0.582, |
|
"eval_wer": 0.4285866142255411, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.6733358984224702, |
|
"grad_norm": 8.538312911987305, |
|
"learning_rate": 0.00010028888888888889, |
|
"loss": 0.3742, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.6733358984224702, |
|
"eval_loss": 0.585001528263092, |
|
"eval_runtime": 189.3338, |
|
"eval_samples_per_second": 37.088, |
|
"eval_steps_per_second": 0.581, |
|
"eval_wer": 0.4259025498611462, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.705399512633064, |
|
"grad_norm": 16.837343215942383, |
|
"learning_rate": 8.917777777777777e-05, |
|
"loss": 0.402, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.705399512633064, |
|
"eval_loss": 0.6351918578147888, |
|
"eval_runtime": 188.7345, |
|
"eval_samples_per_second": 37.206, |
|
"eval_steps_per_second": 0.583, |
|
"eval_wer": 0.44894298356342766, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.7374631268436578, |
|
"grad_norm": 4.569055557250977, |
|
"learning_rate": 7.806666666666666e-05, |
|
"loss": 0.5746, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.7374631268436578, |
|
"eval_loss": 0.7711716294288635, |
|
"eval_runtime": 188.4281, |
|
"eval_samples_per_second": 37.266, |
|
"eval_steps_per_second": 0.584, |
|
"eval_wer": 0.5170810135664837, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.7695267410542517, |
|
"grad_norm": NaN, |
|
"learning_rate": 7.275555555555556e-05, |
|
"loss": 0.5783, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.7695267410542517, |
|
"eval_loss": NaN, |
|
"eval_runtime": 184.9484, |
|
"eval_samples_per_second": 37.967, |
|
"eval_steps_per_second": 0.595, |
|
"eval_wer": 1.0, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.8015903552648455, |
|
"grad_norm": NaN, |
|
"learning_rate": 7.275555555555556e-05, |
|
"loss": 0.0, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.8015903552648455, |
|
"eval_loss": NaN, |
|
"eval_runtime": 184.8998, |
|
"eval_samples_per_second": 37.977, |
|
"eval_steps_per_second": 0.595, |
|
"eval_wer": 1.0, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.8336539694754392, |
|
"grad_norm": NaN, |
|
"learning_rate": 7.275555555555556e-05, |
|
"loss": 0.0, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.8336539694754392, |
|
"eval_loss": NaN, |
|
"eval_runtime": 184.8775, |
|
"eval_samples_per_second": 37.982, |
|
"eval_steps_per_second": 0.595, |
|
"eval_wer": 1.0, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.8657175836860331, |
|
"grad_norm": NaN, |
|
"learning_rate": 7.275555555555556e-05, |
|
"loss": 0.0, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.8657175836860331, |
|
"eval_loss": NaN, |
|
"eval_runtime": 185.2742, |
|
"eval_samples_per_second": 37.901, |
|
"eval_steps_per_second": 0.594, |
|
"eval_wer": 1.0, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.8977811978966269, |
|
"grad_norm": NaN, |
|
"learning_rate": 7.275555555555556e-05, |
|
"loss": 0.0, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.8977811978966269, |
|
"eval_loss": NaN, |
|
"eval_runtime": 185.0536, |
|
"eval_samples_per_second": 37.946, |
|
"eval_steps_per_second": 0.594, |
|
"eval_wer": 1.0, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.9298448121072207, |
|
"grad_norm": NaN, |
|
"learning_rate": 7.275555555555556e-05, |
|
"loss": 0.0, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.9298448121072207, |
|
"eval_loss": NaN, |
|
"eval_runtime": 184.7301, |
|
"eval_samples_per_second": 38.012, |
|
"eval_steps_per_second": 0.595, |
|
"eval_wer": 1.0, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.9619084263178146, |
|
"grad_norm": NaN, |
|
"learning_rate": 7.275555555555556e-05, |
|
"loss": 0.0, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.9619084263178146, |
|
"eval_loss": NaN, |
|
"eval_runtime": 185.3079, |
|
"eval_samples_per_second": 37.894, |
|
"eval_steps_per_second": 0.594, |
|
"eval_wer": 1.0, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.9619084263178146, |
|
"step": 15000, |
|
"total_flos": 1.7109669148845115e+19, |
|
"train_loss": 0.6328924499511719, |
|
"train_runtime": 11517.021, |
|
"train_samples_per_second": 10.419, |
|
"train_steps_per_second": 1.302 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 15000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.7109669148845115e+19, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|