|
{ |
|
"best_metric": 0.31370314955711365, |
|
"best_model_checkpoint": "/scratch/skscla001/results/xls-r-1b-bem-genbed-f-model/checkpoint-2000", |
|
"epoch": 6.575342465753424, |
|
"eval_steps": 100, |
|
"global_step": 2400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.273972602739726, |
|
"eval_loss": 3.0377819538116455, |
|
"eval_runtime": 63.0932, |
|
"eval_samples_per_second": 15.374, |
|
"eval_steps_per_second": 1.934, |
|
"eval_wer": 1.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.547945205479452, |
|
"eval_loss": 0.8302333950996399, |
|
"eval_runtime": 63.0983, |
|
"eval_samples_per_second": 15.373, |
|
"eval_steps_per_second": 1.933, |
|
"eval_wer": 0.9818455258352132, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.821917808219178, |
|
"eval_loss": 0.6783275008201599, |
|
"eval_runtime": 63.062, |
|
"eval_samples_per_second": 15.382, |
|
"eval_steps_per_second": 1.935, |
|
"eval_wer": 0.9103018584165861, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.095890410958904, |
|
"eval_loss": 0.5512491464614868, |
|
"eval_runtime": 62.9709, |
|
"eval_samples_per_second": 15.404, |
|
"eval_steps_per_second": 1.937, |
|
"eval_wer": 0.8720592974540767, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.36986301369863, |
|
"grad_norm": 1.8127524852752686, |
|
"learning_rate": 0.0002982, |
|
"loss": 1.8782, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.36986301369863, |
|
"eval_loss": 0.5296033024787903, |
|
"eval_runtime": 62.9931, |
|
"eval_samples_per_second": 15.399, |
|
"eval_steps_per_second": 1.937, |
|
"eval_wer": 0.8568052422386937, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.643835616438356, |
|
"eval_loss": 0.44133469462394714, |
|
"eval_runtime": 63.0857, |
|
"eval_samples_per_second": 15.376, |
|
"eval_steps_per_second": 1.934, |
|
"eval_wer": 0.7332688795789022, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.9178082191780823, |
|
"eval_loss": 0.4747186303138733, |
|
"eval_runtime": 62.7711, |
|
"eval_samples_per_second": 15.453, |
|
"eval_steps_per_second": 1.944, |
|
"eval_wer": 0.7614136856805243, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.191780821917808, |
|
"eval_loss": 0.38835805654525757, |
|
"eval_runtime": 62.8716, |
|
"eval_samples_per_second": 15.428, |
|
"eval_steps_per_second": 1.94, |
|
"eval_wer": 0.6666666666666666, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.4657534246575343, |
|
"eval_loss": 0.35773810744285583, |
|
"eval_runtime": 63.1283, |
|
"eval_samples_per_second": 15.366, |
|
"eval_steps_per_second": 1.933, |
|
"eval_wer": 0.6355140186915887, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.73972602739726, |
|
"grad_norm": 0.8247061371803284, |
|
"learning_rate": 0.0002857320574162679, |
|
"loss": 0.5114, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.73972602739726, |
|
"eval_loss": 0.35849785804748535, |
|
"eval_runtime": 63.6369, |
|
"eval_samples_per_second": 15.243, |
|
"eval_steps_per_second": 1.917, |
|
"eval_wer": 0.632076485121925, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.0136986301369864, |
|
"eval_loss": 0.3641321659088135, |
|
"eval_runtime": 64.0158, |
|
"eval_samples_per_second": 15.153, |
|
"eval_steps_per_second": 1.906, |
|
"eval_wer": 0.6606509829197551, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 3.287671232876712, |
|
"eval_loss": 0.3812738358974457, |
|
"eval_runtime": 63.4185, |
|
"eval_samples_per_second": 15.295, |
|
"eval_steps_per_second": 1.924, |
|
"eval_wer": 0.7282200021484585, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.5616438356164384, |
|
"eval_loss": 0.38292980194091797, |
|
"eval_runtime": 63.3868, |
|
"eval_samples_per_second": 15.303, |
|
"eval_steps_per_second": 1.925, |
|
"eval_wer": 0.7085616070469438, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 3.8356164383561646, |
|
"eval_loss": 0.36824819445610046, |
|
"eval_runtime": 64.0552, |
|
"eval_samples_per_second": 15.143, |
|
"eval_steps_per_second": 1.905, |
|
"eval_wer": 0.6413148565903964, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 4.109589041095891, |
|
"grad_norm": 0.8130350112915039, |
|
"learning_rate": 0.000271377990430622, |
|
"loss": 0.3931, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 4.109589041095891, |
|
"eval_loss": 0.3526601195335388, |
|
"eval_runtime": 63.2057, |
|
"eval_samples_per_second": 15.347, |
|
"eval_steps_per_second": 1.93, |
|
"eval_wer": 0.6220861531850898, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 4.383561643835616, |
|
"eval_loss": 0.34812936186790466, |
|
"eval_runtime": 63.2206, |
|
"eval_samples_per_second": 15.343, |
|
"eval_steps_per_second": 1.93, |
|
"eval_wer": 0.6297131807927812, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 4.657534246575342, |
|
"eval_loss": 0.3540855944156647, |
|
"eval_runtime": 63.557, |
|
"eval_samples_per_second": 15.262, |
|
"eval_steps_per_second": 1.92, |
|
"eval_wer": 0.6192931571597379, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 4.931506849315069, |
|
"eval_loss": 0.3354911208152771, |
|
"eval_runtime": 63.9409, |
|
"eval_samples_per_second": 15.17, |
|
"eval_steps_per_second": 1.908, |
|
"eval_wer": 0.6242346116661296, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 5.205479452054795, |
|
"eval_loss": 0.3339410722255707, |
|
"eval_runtime": 64.696, |
|
"eval_samples_per_second": 14.993, |
|
"eval_steps_per_second": 1.886, |
|
"eval_wer": 0.5800837898807606, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 5.47945205479452, |
|
"grad_norm": 0.5992590188980103, |
|
"learning_rate": 0.0002570239234449761, |
|
"loss": 0.3293, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 5.47945205479452, |
|
"eval_loss": 0.31370314955711365, |
|
"eval_runtime": 64.3752, |
|
"eval_samples_per_second": 15.068, |
|
"eval_steps_per_second": 1.895, |
|
"eval_wer": 0.5529057900956064, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 5.7534246575342465, |
|
"eval_loss": 0.3132256865501404, |
|
"eval_runtime": 63.6862, |
|
"eval_samples_per_second": 15.231, |
|
"eval_steps_per_second": 1.916, |
|
"eval_wer": 0.5822322483618004, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 6.027397260273973, |
|
"eval_loss": 0.31450605392456055, |
|
"eval_runtime": 64.6472, |
|
"eval_samples_per_second": 15.005, |
|
"eval_steps_per_second": 1.887, |
|
"eval_wer": 0.5676227306907294, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 6.301369863013699, |
|
"eval_loss": 0.3282639682292938, |
|
"eval_runtime": 63.5382, |
|
"eval_samples_per_second": 15.266, |
|
"eval_steps_per_second": 1.92, |
|
"eval_wer": 0.5960898055645074, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 6.575342465753424, |
|
"eval_loss": 0.32465773820877075, |
|
"eval_runtime": 64.1901, |
|
"eval_samples_per_second": 15.111, |
|
"eval_steps_per_second": 1.901, |
|
"eval_wer": 0.5987753786658073, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 6.575342465753424, |
|
"step": 2400, |
|
"total_flos": 1.1217868566953175e+19, |
|
"train_loss": 0.6952082284291585, |
|
"train_runtime": 4252.7781, |
|
"train_samples_per_second": 20.577, |
|
"train_steps_per_second": 2.575 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 10950, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 30, |
|
"save_steps": 400, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 3 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.1217868566953175e+19, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|