|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 4.633204633204633, |
|
"eval_steps": 200, |
|
"global_step": 6000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.15444015444015444, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 188.4417, |
|
"eval_samples_per_second": 37.264, |
|
"eval_steps_per_second": 4.659, |
|
"eval_wer": 1.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.3088803088803089, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 187.0753, |
|
"eval_samples_per_second": 37.536, |
|
"eval_steps_per_second": 4.693, |
|
"eval_wer": 0.8660615740199706, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.3861003861003861, |
|
"grad_norm": 2.7789804935455322, |
|
"learning_rate": 0.00029699999999999996, |
|
"loss": 3.7305, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.46332046332046334, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 186.9186, |
|
"eval_samples_per_second": 37.567, |
|
"eval_steps_per_second": 4.697, |
|
"eval_wer": 0.7040110111279914, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.6177606177606177, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 187.3522, |
|
"eval_samples_per_second": 37.48, |
|
"eval_steps_per_second": 4.686, |
|
"eval_wer": 0.5505953540311376, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.7722007722007722, |
|
"grad_norm": 11.684309959411621, |
|
"learning_rate": 0.0002731090909090909, |
|
"loss": 0.8464, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.7722007722007722, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 187.5603, |
|
"eval_samples_per_second": 37.439, |
|
"eval_steps_per_second": 4.681, |
|
"eval_wer": 0.5168088504538195, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.9266409266409267, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 187.4249, |
|
"eval_samples_per_second": 37.466, |
|
"eval_steps_per_second": 4.685, |
|
"eval_wer": 0.4824769844052303, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.0810810810810811, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 188.7764, |
|
"eval_samples_per_second": 37.197, |
|
"eval_steps_per_second": 4.651, |
|
"eval_wer": 0.46011712308311586, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.1583011583011582, |
|
"grad_norm": 0.6005635261535645, |
|
"learning_rate": 0.0002458363636363636, |
|
"loss": 0.6629, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.2355212355212355, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 188.0077, |
|
"eval_samples_per_second": 37.35, |
|
"eval_steps_per_second": 4.67, |
|
"eval_wer": 0.4445483230104008, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.3899613899613898, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 188.204, |
|
"eval_samples_per_second": 37.311, |
|
"eval_steps_per_second": 4.665, |
|
"eval_wer": 0.41425473621336656, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.5444015444015444, |
|
"grad_norm": 0.4440418481826782, |
|
"learning_rate": 0.0002186181818181818, |
|
"loss": 0.5655, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.5444015444015444, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 188.2865, |
|
"eval_samples_per_second": 37.294, |
|
"eval_steps_per_second": 4.663, |
|
"eval_wer": 0.41700751821121107, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.698841698841699, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 189.1618, |
|
"eval_samples_per_second": 37.122, |
|
"eval_steps_per_second": 4.642, |
|
"eval_wer": 0.4047108929661226, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.8532818532818531, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 188.2544, |
|
"eval_samples_per_second": 37.301, |
|
"eval_steps_per_second": 4.664, |
|
"eval_wer": 0.3966213496422682, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.9305019305019306, |
|
"grad_norm": 0.9740249514579773, |
|
"learning_rate": 0.00019150909090909088, |
|
"loss": 0.5524, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.0077220077220077, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 189.1265, |
|
"eval_samples_per_second": 37.129, |
|
"eval_steps_per_second": 4.642, |
|
"eval_wer": 0.37794917741160583, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 2.1621621621621623, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 188.7855, |
|
"eval_samples_per_second": 37.196, |
|
"eval_steps_per_second": 4.651, |
|
"eval_wer": 0.37366418656590444, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.3166023166023164, |
|
"grad_norm": 0.5873022675514221, |
|
"learning_rate": 0.00016429090909090907, |
|
"loss": 0.4773, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.3166023166023164, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 189.7378, |
|
"eval_samples_per_second": 37.009, |
|
"eval_steps_per_second": 4.627, |
|
"eval_wer": 0.3698336644462623, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.471042471042471, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 189.8812, |
|
"eval_samples_per_second": 36.981, |
|
"eval_steps_per_second": 4.624, |
|
"eval_wer": 0.37235271967070493, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 2.6254826254826256, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 189.4769, |
|
"eval_samples_per_second": 37.06, |
|
"eval_steps_per_second": 4.634, |
|
"eval_wer": 0.3583940373703141, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 2.7027027027027026, |
|
"grad_norm": 1.4725390672683716, |
|
"learning_rate": 0.00013712727272727272, |
|
"loss": 0.4694, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.7799227799227797, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 189.6126, |
|
"eval_samples_per_second": 37.033, |
|
"eval_steps_per_second": 4.63, |
|
"eval_wer": 0.3820783504083726, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 2.9343629343629343, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 190.0131, |
|
"eval_samples_per_second": 36.955, |
|
"eval_steps_per_second": 4.621, |
|
"eval_wer": 0.4729850804409645, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 3.088803088803089, |
|
"grad_norm": 14.039852142333984, |
|
"learning_rate": 0.0001099090909090909, |
|
"loss": 0.6537, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 3.088803088803089, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 189.7145, |
|
"eval_samples_per_second": 37.014, |
|
"eval_steps_per_second": 4.628, |
|
"eval_wer": 0.4753613026372171, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 3.2432432432432434, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 189.6267, |
|
"eval_samples_per_second": 37.031, |
|
"eval_steps_per_second": 4.63, |
|
"eval_wer": 0.5899263760663784, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 3.3976833976833976, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 189.2072, |
|
"eval_samples_per_second": 37.113, |
|
"eval_steps_per_second": 4.64, |
|
"eval_wer": 0.5957565605806812, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 3.474903474903475, |
|
"grad_norm": 15.850507736206055, |
|
"learning_rate": 8.269090909090907e-05, |
|
"loss": 0.8238, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 3.552123552123552, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 188.9868, |
|
"eval_samples_per_second": 37.156, |
|
"eval_steps_per_second": 4.646, |
|
"eval_wer": 0.633646267513277, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 3.7065637065637067, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 190.0461, |
|
"eval_samples_per_second": 36.949, |
|
"eval_steps_per_second": 4.62, |
|
"eval_wer": 0.6025865762923143, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 3.861003861003861, |
|
"grad_norm": 6.711777687072754, |
|
"learning_rate": 5.547272727272727e-05, |
|
"loss": 0.8682, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 3.861003861003861, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 189.1358, |
|
"eval_samples_per_second": 37.127, |
|
"eval_steps_per_second": 4.642, |
|
"eval_wer": 0.5671380156596938, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 4.015444015444015, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 190.8009, |
|
"eval_samples_per_second": 36.803, |
|
"eval_steps_per_second": 4.602, |
|
"eval_wer": 0.5377923207770117, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 4.1698841698841695, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 189.3706, |
|
"eval_samples_per_second": 37.081, |
|
"eval_steps_per_second": 4.636, |
|
"eval_wer": 0.5373638216924416, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 4.2471042471042475, |
|
"grad_norm": 6.047791957855225, |
|
"learning_rate": 2.8309090909090903e-05, |
|
"loss": 0.855, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 4.324324324324325, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 189.8451, |
|
"eval_samples_per_second": 36.988, |
|
"eval_steps_per_second": 4.625, |
|
"eval_wer": 0.5328451040733383, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 4.478764478764479, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 190.1917, |
|
"eval_samples_per_second": 36.921, |
|
"eval_steps_per_second": 4.616, |
|
"eval_wer": 0.5225351564021659, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 4.633204633204633, |
|
"grad_norm": 9.171218872070312, |
|
"learning_rate": 1.0363636363636363e-06, |
|
"loss": 0.9644, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 4.633204633204633, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 190.3777, |
|
"eval_samples_per_second": 36.885, |
|
"eval_steps_per_second": 4.612, |
|
"eval_wer": 0.5237557295521535, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 4.633204633204633, |
|
"step": 6000, |
|
"total_flos": 2.5308493485736165e+19, |
|
"train_loss": 0.9557839482625325, |
|
"train_runtime": 12299.0724, |
|
"train_samples_per_second": 15.611, |
|
"train_steps_per_second": 0.488 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 6000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.5308493485736165e+19, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|