{ "best_metric": 0.31370314955711365, "best_model_checkpoint": "/scratch/skscla001/results/xls-r-1b-bem-genbed-f-model/checkpoint-2000", "epoch": 6.575342465753424, "eval_steps": 100, "global_step": 2400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.273972602739726, "eval_loss": 3.0377819538116455, "eval_runtime": 63.0932, "eval_samples_per_second": 15.374, "eval_steps_per_second": 1.934, "eval_wer": 1.0, "step": 100 }, { "epoch": 0.547945205479452, "eval_loss": 0.8302333950996399, "eval_runtime": 63.0983, "eval_samples_per_second": 15.373, "eval_steps_per_second": 1.933, "eval_wer": 0.9818455258352132, "step": 200 }, { "epoch": 0.821917808219178, "eval_loss": 0.6783275008201599, "eval_runtime": 63.062, "eval_samples_per_second": 15.382, "eval_steps_per_second": 1.935, "eval_wer": 0.9103018584165861, "step": 300 }, { "epoch": 1.095890410958904, "eval_loss": 0.5512491464614868, "eval_runtime": 62.9709, "eval_samples_per_second": 15.404, "eval_steps_per_second": 1.937, "eval_wer": 0.8720592974540767, "step": 400 }, { "epoch": 1.36986301369863, "grad_norm": 1.8127524852752686, "learning_rate": 0.0002982, "loss": 1.8782, "step": 500 }, { "epoch": 1.36986301369863, "eval_loss": 0.5296033024787903, "eval_runtime": 62.9931, "eval_samples_per_second": 15.399, "eval_steps_per_second": 1.937, "eval_wer": 0.8568052422386937, "step": 500 }, { "epoch": 1.643835616438356, "eval_loss": 0.44133469462394714, "eval_runtime": 63.0857, "eval_samples_per_second": 15.376, "eval_steps_per_second": 1.934, "eval_wer": 0.7332688795789022, "step": 600 }, { "epoch": 1.9178082191780823, "eval_loss": 0.4747186303138733, "eval_runtime": 62.7711, "eval_samples_per_second": 15.453, "eval_steps_per_second": 1.944, "eval_wer": 0.7614136856805243, "step": 700 }, { "epoch": 2.191780821917808, "eval_loss": 0.38835805654525757, "eval_runtime": 62.8716, "eval_samples_per_second": 15.428, "eval_steps_per_second": 1.94, "eval_wer": 0.6666666666666666, "step": 800 }, { "epoch": 2.4657534246575343, "eval_loss": 0.35773810744285583, "eval_runtime": 63.1283, "eval_samples_per_second": 15.366, "eval_steps_per_second": 1.933, "eval_wer": 0.6355140186915887, "step": 900 }, { "epoch": 2.73972602739726, "grad_norm": 0.8247061371803284, "learning_rate": 0.0002857320574162679, "loss": 0.5114, "step": 1000 }, { "epoch": 2.73972602739726, "eval_loss": 0.35849785804748535, "eval_runtime": 63.6369, "eval_samples_per_second": 15.243, "eval_steps_per_second": 1.917, "eval_wer": 0.632076485121925, "step": 1000 }, { "epoch": 3.0136986301369864, "eval_loss": 0.3641321659088135, "eval_runtime": 64.0158, "eval_samples_per_second": 15.153, "eval_steps_per_second": 1.906, "eval_wer": 0.6606509829197551, "step": 1100 }, { "epoch": 3.287671232876712, "eval_loss": 0.3812738358974457, "eval_runtime": 63.4185, "eval_samples_per_second": 15.295, "eval_steps_per_second": 1.924, "eval_wer": 0.7282200021484585, "step": 1200 }, { "epoch": 3.5616438356164384, "eval_loss": 0.38292980194091797, "eval_runtime": 63.3868, "eval_samples_per_second": 15.303, "eval_steps_per_second": 1.925, "eval_wer": 0.7085616070469438, "step": 1300 }, { "epoch": 3.8356164383561646, "eval_loss": 0.36824819445610046, "eval_runtime": 64.0552, "eval_samples_per_second": 15.143, "eval_steps_per_second": 1.905, "eval_wer": 0.6413148565903964, "step": 1400 }, { "epoch": 4.109589041095891, "grad_norm": 0.8130350112915039, "learning_rate": 0.000271377990430622, "loss": 0.3931, "step": 1500 }, { "epoch": 4.109589041095891, "eval_loss": 0.3526601195335388, "eval_runtime": 63.2057, "eval_samples_per_second": 15.347, "eval_steps_per_second": 1.93, "eval_wer": 0.6220861531850898, "step": 1500 }, { "epoch": 4.383561643835616, "eval_loss": 0.34812936186790466, "eval_runtime": 63.2206, "eval_samples_per_second": 15.343, "eval_steps_per_second": 1.93, "eval_wer": 0.6297131807927812, "step": 1600 }, { "epoch": 4.657534246575342, "eval_loss": 0.3540855944156647, "eval_runtime": 63.557, "eval_samples_per_second": 15.262, "eval_steps_per_second": 1.92, "eval_wer": 0.6192931571597379, "step": 1700 }, { "epoch": 4.931506849315069, "eval_loss": 0.3354911208152771, "eval_runtime": 63.9409, "eval_samples_per_second": 15.17, "eval_steps_per_second": 1.908, "eval_wer": 0.6242346116661296, "step": 1800 }, { "epoch": 5.205479452054795, "eval_loss": 0.3339410722255707, "eval_runtime": 64.696, "eval_samples_per_second": 14.993, "eval_steps_per_second": 1.886, "eval_wer": 0.5800837898807606, "step": 1900 }, { "epoch": 5.47945205479452, "grad_norm": 0.5992590188980103, "learning_rate": 0.0002570239234449761, "loss": 0.3293, "step": 2000 }, { "epoch": 5.47945205479452, "eval_loss": 0.31370314955711365, "eval_runtime": 64.3752, "eval_samples_per_second": 15.068, "eval_steps_per_second": 1.895, "eval_wer": 0.5529057900956064, "step": 2000 }, { "epoch": 5.7534246575342465, "eval_loss": 0.3132256865501404, "eval_runtime": 63.6862, "eval_samples_per_second": 15.231, "eval_steps_per_second": 1.916, "eval_wer": 0.5822322483618004, "step": 2100 }, { "epoch": 6.027397260273973, "eval_loss": 0.31450605392456055, "eval_runtime": 64.6472, "eval_samples_per_second": 15.005, "eval_steps_per_second": 1.887, "eval_wer": 0.5676227306907294, "step": 2200 }, { "epoch": 6.301369863013699, "eval_loss": 0.3282639682292938, "eval_runtime": 63.5382, "eval_samples_per_second": 15.266, "eval_steps_per_second": 1.92, "eval_wer": 0.5960898055645074, "step": 2300 }, { "epoch": 6.575342465753424, "eval_loss": 0.32465773820877075, "eval_runtime": 64.1901, "eval_samples_per_second": 15.111, "eval_steps_per_second": 1.901, "eval_wer": 0.5987753786658073, "step": 2400 }, { "epoch": 6.575342465753424, "step": 2400, "total_flos": 1.1217868566953175e+19, "train_loss": 0.6952082284291585, "train_runtime": 4252.7781, "train_samples_per_second": 20.577, "train_steps_per_second": 2.575 } ], "logging_steps": 500, "max_steps": 10950, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 400, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 3 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.1217868566953175e+19, "train_batch_size": 4, "trial_name": null, "trial_params": null }