{ "best_metric": 0.5623809695243835, "best_model_checkpoint": "/scratch/skscla001/results/xls-r-1b-bem-genbed-m-model/checkpoint-1200", "epoch": 4.689655172413794, "eval_steps": 100, "global_step": 1700, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.27586206896551724, "eval_loss": 2.9022111892700195, "eval_runtime": 66.7362, "eval_samples_per_second": 14.52, "eval_steps_per_second": 1.828, "eval_wer": 1.0, "step": 100 }, { "epoch": 0.5517241379310345, "eval_loss": 1.4914664030075073, "eval_runtime": 66.6507, "eval_samples_per_second": 14.538, "eval_steps_per_second": 1.83, "eval_wer": 1.002176041780002, "step": 200 }, { "epoch": 0.8275862068965517, "eval_loss": 0.9387084245681763, "eval_runtime": 66.0539, "eval_samples_per_second": 14.67, "eval_steps_per_second": 1.847, "eval_wer": 0.9415732782069416, "step": 300 }, { "epoch": 1.103448275862069, "eval_loss": 0.7253537774085999, "eval_runtime": 66.8678, "eval_samples_per_second": 14.491, "eval_steps_per_second": 1.824, "eval_wer": 0.8660646284408661, "step": 400 }, { "epoch": 1.3793103448275863, "grad_norm": 1.6774070262908936, "learning_rate": 0.00029759999999999997, "loss": 2.1327, "step": 500 }, { "epoch": 1.3793103448275863, "eval_loss": 0.8557319045066833, "eval_runtime": 66.8253, "eval_samples_per_second": 14.501, "eval_steps_per_second": 1.826, "eval_wer": 0.942008486562942, "step": 500 }, { "epoch": 1.6551724137931034, "eval_loss": 0.723638117313385, "eval_runtime": 66.6409, "eval_samples_per_second": 14.541, "eval_steps_per_second": 1.831, "eval_wer": 0.8661734305298662, "step": 600 }, { "epoch": 1.9310344827586206, "eval_loss": 0.6982457637786865, "eval_runtime": 66.5193, "eval_samples_per_second": 14.567, "eval_steps_per_second": 1.834, "eval_wer": 0.8572516592318572, "step": 700 }, { "epoch": 2.206896551724138, "eval_loss": 0.6066463589668274, "eval_runtime": 66.4475, "eval_samples_per_second": 14.583, "eval_steps_per_second": 1.836, "eval_wer": 0.8003481666848004, "step": 800 }, { "epoch": 2.4827586206896552, "eval_loss": 0.6351816654205322, "eval_runtime": 66.6502, "eval_samples_per_second": 14.539, "eval_steps_per_second": 1.83, "eval_wer": 0.7989337395277989, "step": 900 }, { "epoch": 2.7586206896551726, "grad_norm": 1.1028050184249878, "learning_rate": 0.0002856370656370656, "loss": 0.733, "step": 1000 }, { "epoch": 2.7586206896551726, "eval_loss": 0.5854902863502502, "eval_runtime": 66.0232, "eval_samples_per_second": 14.677, "eval_steps_per_second": 1.848, "eval_wer": 0.7902295724077902, "step": 1000 }, { "epoch": 3.0344827586206895, "eval_loss": 0.5586574673652649, "eval_runtime": 65.9579, "eval_samples_per_second": 14.691, "eval_steps_per_second": 1.85, "eval_wer": 0.738983788488739, "step": 1100 }, { "epoch": 3.310344827586207, "eval_loss": 0.5623809695243835, "eval_runtime": 66.0815, "eval_samples_per_second": 14.664, "eval_steps_per_second": 1.846, "eval_wer": 0.7513872266347514, "step": 1200 }, { "epoch": 3.586206896551724, "eval_loss": 0.5213696360588074, "eval_runtime": 66.262, "eval_samples_per_second": 14.624, "eval_steps_per_second": 1.841, "eval_wer": 0.7192906103797193, "step": 1300 }, { "epoch": 3.862068965517241, "eval_loss": 0.5320978164672852, "eval_runtime": 66.1025, "eval_samples_per_second": 14.659, "eval_steps_per_second": 1.846, "eval_wer": 0.7208138396257208, "step": 1400 }, { "epoch": 4.137931034482759, "grad_norm": 1.7808165550231934, "learning_rate": 0.0002711583011583011, "loss": 0.5894, "step": 1500 }, { "epoch": 4.137931034482759, "eval_loss": 0.6476588249206543, "eval_runtime": 66.1849, "eval_samples_per_second": 14.641, "eval_steps_per_second": 1.843, "eval_wer": 0.7798933739527799, "step": 1500 }, { "epoch": 4.413793103448276, "eval_loss": 0.607578694820404, "eval_runtime": 66.3108, "eval_samples_per_second": 14.613, "eval_steps_per_second": 1.84, "eval_wer": 0.780981394842781, "step": 1600 }, { "epoch": 4.689655172413794, "eval_loss": 0.5820547938346863, "eval_runtime": 66.5363, "eval_samples_per_second": 14.563, "eval_steps_per_second": 1.834, "eval_wer": 0.7666195190947667, "step": 1700 }, { "epoch": 4.689655172413794, "step": 1700, "total_flos": 8.072996922592684e+18, "train_loss": 1.0753759990018956, "train_runtime": 3063.4125, "train_samples_per_second": 28.4, "train_steps_per_second": 3.545 } ], "logging_steps": 500, "max_steps": 10860, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 400, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 2 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 8.072996922592684e+18, "train_batch_size": 4, "trial_name": null, "trial_params": null }