{ "best_metric": 0.702553391456604, "best_model_checkpoint": "/scratch/skscla001/results/xls-r-1b-bem-natbed-combined-model/checkpoint-1800", "epoch": 5.256570713391739, "eval_steps": 100, "global_step": 2100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.2503128911138924, "grad_norm": 2.982633352279663, "learning_rate": 0.00029099999999999997, "loss": 5.0296, "step": 100 }, { "epoch": 0.2503128911138924, "eval_loss": 2.807065010070801, "eval_runtime": 99.4276, "eval_samples_per_second": 13.658, "eval_steps_per_second": 1.71, "eval_wer": 1.0, "step": 100 }, { "epoch": 0.5006257822277848, "grad_norm": 2.2083914279937744, "learning_rate": 0.00029754844144903116, "loss": 1.5792, "step": 200 }, { "epoch": 0.5006257822277848, "eval_loss": 1.1184355020523071, "eval_runtime": 98.2757, "eval_samples_per_second": 13.818, "eval_steps_per_second": 1.73, "eval_wer": 0.9645076872751063, "step": 200 }, { "epoch": 0.7509386733416771, "grad_norm": 3.159029483795166, "learning_rate": 0.0002950210614995788, "loss": 1.1422, "step": 300 }, { "epoch": 0.7509386733416771, "eval_loss": 1.0389240980148315, "eval_runtime": 98.789, "eval_samples_per_second": 13.746, "eval_steps_per_second": 1.721, "eval_wer": 0.960582270199542, "step": 300 }, { "epoch": 1.0012515644555695, "grad_norm": 6.720183372497559, "learning_rate": 0.00029249368155012635, "loss": 0.9883, "step": 400 }, { "epoch": 1.0012515644555695, "eval_loss": 1.0493754148483276, "eval_runtime": 97.5764, "eval_samples_per_second": 13.917, "eval_steps_per_second": 1.742, "eval_wer": 0.9989368662087014, "step": 400 }, { "epoch": 1.2515644555694618, "grad_norm": 3.1551387310028076, "learning_rate": 0.00028996630160067397, "loss": 0.8999, "step": 500 }, { "epoch": 1.2515644555694618, "eval_loss": 0.86915123462677, "eval_runtime": 98.5884, "eval_samples_per_second": 13.774, "eval_steps_per_second": 1.724, "eval_wer": 0.8682531894013739, "step": 500 }, { "epoch": 1.5018773466833542, "grad_norm": 2.8559296131134033, "learning_rate": 0.00028743892165122154, "loss": 0.9135, "step": 600 }, { "epoch": 1.5018773466833542, "eval_loss": 0.8564479351043701, "eval_runtime": 98.2671, "eval_samples_per_second": 13.819, "eval_steps_per_second": 1.73, "eval_wer": 0.8429833169774289, "step": 600 }, { "epoch": 1.7521902377972465, "grad_norm": 2.4900286197662354, "learning_rate": 0.00028491154170176916, "loss": 0.8898, "step": 700 }, { "epoch": 1.7521902377972465, "eval_loss": 0.8450906872749329, "eval_runtime": 98.3583, "eval_samples_per_second": 13.807, "eval_steps_per_second": 1.728, "eval_wer": 0.8522244030094864, "step": 700 }, { "epoch": 2.002503128911139, "grad_norm": 2.2157928943634033, "learning_rate": 0.0002823841617523167, "loss": 0.9089, "step": 800 }, { "epoch": 2.002503128911139, "eval_loss": 0.8857161402702332, "eval_runtime": 98.023, "eval_samples_per_second": 13.854, "eval_steps_per_second": 1.734, "eval_wer": 0.8484625449787373, "step": 800 }, { "epoch": 2.252816020025031, "grad_norm": 10.16332721710205, "learning_rate": 0.00027985678180286435, "loss": 0.8292, "step": 900 }, { "epoch": 2.252816020025031, "eval_loss": 0.8661824464797974, "eval_runtime": 98.5047, "eval_samples_per_second": 13.786, "eval_steps_per_second": 1.726, "eval_wer": 0.8580307491004252, "step": 900 }, { "epoch": 2.5031289111389237, "grad_norm": 1.2531850337982178, "learning_rate": 0.00027732940185341197, "loss": 0.7921, "step": 1000 }, { "epoch": 2.5031289111389237, "eval_loss": 0.796419084072113, "eval_runtime": 98.0484, "eval_samples_per_second": 13.85, "eval_steps_per_second": 1.734, "eval_wer": 0.7968596663395485, "step": 1000 }, { "epoch": 2.7534418022528158, "grad_norm": 6.90547513961792, "learning_rate": 0.00027480202190395954, "loss": 0.7983, "step": 1100 }, { "epoch": 2.7534418022528158, "eval_loss": 0.7895970940589905, "eval_runtime": 98.3739, "eval_samples_per_second": 13.804, "eval_steps_per_second": 1.728, "eval_wer": 0.7951422963689893, "step": 1100 }, { "epoch": 3.0037546933667083, "grad_norm": 1.1230757236480713, "learning_rate": 0.00027227464195450716, "loss": 0.7946, "step": 1200 }, { "epoch": 3.0037546933667083, "eval_loss": 0.7666952013969421, "eval_runtime": 97.1256, "eval_samples_per_second": 13.982, "eval_steps_per_second": 1.75, "eval_wer": 0.7946516192345436, "step": 1200 }, { "epoch": 3.254067584480601, "grad_norm": 2.657978057861328, "learning_rate": 0.00026974726200505473, "loss": 0.7488, "step": 1300 }, { "epoch": 3.254067584480601, "eval_loss": 0.8179810047149658, "eval_runtime": 98.614, "eval_samples_per_second": 13.771, "eval_steps_per_second": 1.724, "eval_wer": 0.8495256787700359, "step": 1300 }, { "epoch": 3.504380475594493, "grad_norm": 1.1082779169082642, "learning_rate": 0.00026721988205560235, "loss": 0.7428, "step": 1400 }, { "epoch": 3.504380475594493, "eval_loss": 0.7548468708992004, "eval_runtime": 98.2436, "eval_samples_per_second": 13.823, "eval_steps_per_second": 1.73, "eval_wer": 0.7688092901537456, "step": 1400 }, { "epoch": 3.7546933667083855, "grad_norm": 0.6124172210693359, "learning_rate": 0.0002646925021061499, "loss": 0.7256, "step": 1500 }, { "epoch": 3.7546933667083855, "eval_loss": 0.7258334755897522, "eval_runtime": 98.5216, "eval_samples_per_second": 13.784, "eval_steps_per_second": 1.726, "eval_wer": 0.7595682041216879, "step": 1500 }, { "epoch": 4.005006257822278, "grad_norm": 1.6707249879837036, "learning_rate": 0.00026216512215669754, "loss": 0.741, "step": 1600 }, { "epoch": 4.005006257822278, "eval_loss": 0.7665364146232605, "eval_runtime": 98.0591, "eval_samples_per_second": 13.849, "eval_steps_per_second": 1.734, "eval_wer": 0.7717533529604187, "step": 1600 }, { "epoch": 4.25531914893617, "grad_norm": 1.7184687852859497, "learning_rate": 0.00025963774220724516, "loss": 0.6775, "step": 1700 }, { "epoch": 4.25531914893617, "eval_loss": 0.7922295928001404, "eval_runtime": 97.9024, "eval_samples_per_second": 13.871, "eval_steps_per_second": 1.736, "eval_wer": 0.77747791952895, "step": 1700 }, { "epoch": 4.505632040050062, "grad_norm": 0.8240795135498047, "learning_rate": 0.00025711036225779273, "loss": 0.6795, "step": 1800 }, { "epoch": 4.505632040050062, "eval_loss": 0.702553391456604, "eval_runtime": 96.6836, "eval_samples_per_second": 14.046, "eval_steps_per_second": 1.758, "eval_wer": 0.7512266928361139, "step": 1800 }, { "epoch": 4.755944931163955, "grad_norm": 0.8530161380767822, "learning_rate": 0.00025458298230834035, "loss": 0.683, "step": 1900 }, { "epoch": 4.755944931163955, "eval_loss": 0.7051005363464355, "eval_runtime": 98.456, "eval_samples_per_second": 13.793, "eval_steps_per_second": 1.727, "eval_wer": 0.72252208047105, "step": 1900 }, { "epoch": 5.006257822277847, "grad_norm": 1.205073595046997, "learning_rate": 0.0002520556023588879, "loss": 0.6838, "step": 2000 }, { "epoch": 5.006257822277847, "eval_loss": 0.7196346521377563, "eval_runtime": 98.5975, "eval_samples_per_second": 13.773, "eval_steps_per_second": 1.724, "eval_wer": 0.7503271180896304, "step": 2000 }, { "epoch": 5.256570713391739, "grad_norm": 0.9071811437606812, "learning_rate": 0.00024952822240943554, "loss": 0.6005, "step": 2100 }, { "epoch": 5.256570713391739, "eval_loss": 0.7032491564750671, "eval_runtime": 98.4687, "eval_samples_per_second": 13.791, "eval_steps_per_second": 1.726, "eval_wer": 0.7423945044160942, "step": 2100 }, { "epoch": 5.256570713391739, "step": 2100, "total_flos": 2.148232424796796e+19, "train_loss": 1.0403752663021997, "train_runtime": 6352.2325, "train_samples_per_second": 30.164, "train_steps_per_second": 1.884 } ], "logging_steps": 100, "max_steps": 11970, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 200, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 2 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.148232424796796e+19, "train_batch_size": 8, "trial_name": null, "trial_params": null }