{ "best_metric": 0.3019354045391083, "best_model_checkpoint": "./xls-r-1b-bem-sv-female/checkpoint-2000", "epoch": 4.995196926032661, "eval_steps": 500, "global_step": 2600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.38424591738712777, "grad_norm": 4.829944610595703, "learning_rate": 1.97e-05, "loss": 4.4978, "step": 200 }, { "epoch": 0.7684918347742555, "grad_norm": 6.936720848083496, "learning_rate": 3.97e-05, "loss": 0.6184, "step": 400 }, { "epoch": 0.9606147934678194, "eval_loss": 0.5192012786865234, "eval_runtime": 63.7121, "eval_samples_per_second": 7.832, "eval_steps_per_second": 1.962, "eval_wer": 0.9458917835671342, "step": 500 }, { "epoch": 1.1527377521613833, "grad_norm": 1.6504590511322021, "learning_rate": 4.769047619047619e-05, "loss": 0.2678, "step": 600 }, { "epoch": 1.536983669548511, "grad_norm": 0.48046204447746277, "learning_rate": 4.292857142857143e-05, "loss": 0.2085, "step": 800 }, { "epoch": 1.9212295869356388, "grad_norm": 1.0426075458526611, "learning_rate": 3.816666666666667e-05, "loss": 0.1841, "step": 1000 }, { "epoch": 1.9212295869356388, "eval_loss": 0.33747053146362305, "eval_runtime": 61.1182, "eval_samples_per_second": 8.165, "eval_steps_per_second": 2.045, "eval_wer": 0.8396793587174348, "step": 1000 }, { "epoch": 2.3054755043227666, "grad_norm": 0.9610881209373474, "learning_rate": 3.3404761904761904e-05, "loss": 0.141, "step": 1200 }, { "epoch": 2.689721421709894, "grad_norm": 0.4008996784687042, "learning_rate": 2.8642857142857144e-05, "loss": 0.1244, "step": 1400 }, { "epoch": 2.881844380403458, "eval_loss": 0.31584659218788147, "eval_runtime": 61.6779, "eval_samples_per_second": 8.09, "eval_steps_per_second": 2.027, "eval_wer": 0.8396793587174348, "step": 1500 }, { "epoch": 3.073967339097022, "grad_norm": 0.6827730536460876, "learning_rate": 2.3880952380952383e-05, "loss": 0.1118, "step": 1600 }, { "epoch": 3.4582132564841497, "grad_norm": 0.3247419595718384, "learning_rate": 1.911904761904762e-05, "loss": 0.079, "step": 1800 }, { "epoch": 3.8424591738712777, "grad_norm": 0.5847994089126587, "learning_rate": 1.4357142857142858e-05, "loss": 0.0757, "step": 2000 }, { "epoch": 3.8424591738712777, "eval_loss": 0.3019354045391083, "eval_runtime": 61.5577, "eval_samples_per_second": 8.106, "eval_steps_per_second": 2.031, "eval_wer": 0.8036072144288577, "step": 2000 }, { "epoch": 4.226705091258405, "grad_norm": 0.3855469822883606, "learning_rate": 9.595238095238096e-06, "loss": 0.0553, "step": 2200 }, { "epoch": 4.610951008645533, "grad_norm": 0.22909563779830933, "learning_rate": 4.833333333333333e-06, "loss": 0.0419, "step": 2400 }, { "epoch": 4.803073967339097, "eval_loss": 0.33066290616989136, "eval_runtime": 61.6863, "eval_samples_per_second": 8.089, "eval_steps_per_second": 2.026, "eval_wer": 0.7995991983967936, "step": 2500 }, { "epoch": 4.995196926032661, "grad_norm": 0.05295922979712486, "learning_rate": 7.142857142857144e-08, "loss": 0.0363, "step": 2600 }, { "epoch": 4.995196926032661, "step": 2600, "total_flos": 1.1893077312531345e+19, "train_loss": 0.49554201878034154, "train_runtime": 6582.5135, "train_samples_per_second": 3.162, "train_steps_per_second": 0.395 } ], "logging_steps": 200, "max_steps": 2600, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.1893077312531345e+19, "train_batch_size": 4, "trial_name": null, "trial_params": null }