{ "best_metric": 0.21721933782100677, "best_model_checkpoint": "./xls-r-1b-bem-genbed-all/checkpoint-3000", "epoch": 4.9966953073364175, "eval_steps": 200, "global_step": 3780, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.26437541308658297, "grad_norm": 3.2960169315338135, "learning_rate": 1.97e-05, "loss": 4.6827, "step": 200 }, { "epoch": 0.26437541308658297, "eval_loss": 2.834711790084839, "eval_runtime": 123.0061, "eval_samples_per_second": 16.406, "eval_steps_per_second": 4.105, "eval_wer": 1.0, "step": 200 }, { "epoch": 0.5287508261731659, "grad_norm": 5.020152568817139, "learning_rate": 3.97e-05, "loss": 1.0401, "step": 400 }, { "epoch": 0.5287508261731659, "eval_loss": 0.5636318922042847, "eval_runtime": 121.385, "eval_samples_per_second": 16.625, "eval_steps_per_second": 4.16, "eval_wer": 0.9410307234886026, "step": 400 }, { "epoch": 0.7931262392597488, "grad_norm": 4.118492126464844, "learning_rate": 4.852134146341464e-05, "loss": 0.4289, "step": 600 }, { "epoch": 0.7931262392597488, "eval_loss": 0.40182533860206604, "eval_runtime": 122.3487, "eval_samples_per_second": 16.494, "eval_steps_per_second": 4.128, "eval_wer": 0.9028741328047571, "step": 600 }, { "epoch": 1.0575016523463319, "grad_norm": 1.0666348934173584, "learning_rate": 4.5472560975609756e-05, "loss": 0.3449, "step": 800 }, { "epoch": 1.0575016523463319, "eval_loss": 0.3604024052619934, "eval_runtime": 123.3539, "eval_samples_per_second": 16.359, "eval_steps_per_second": 4.094, "eval_wer": 0.8771060455896927, "step": 800 }, { "epoch": 1.3218770654329148, "grad_norm": 3.203847885131836, "learning_rate": 4.242378048780488e-05, "loss": 0.2954, "step": 1000 }, { "epoch": 1.3218770654329148, "eval_loss": 0.33885934948921204, "eval_runtime": 124.083, "eval_samples_per_second": 16.263, "eval_steps_per_second": 4.07, "eval_wer": 0.8741328047571854, "step": 1000 }, { "epoch": 1.5862524785194978, "grad_norm": 2.469949722290039, "learning_rate": 3.9375e-05, "loss": 0.2719, "step": 1200 }, { "epoch": 1.5862524785194978, "eval_loss": 0.29620760679244995, "eval_runtime": 124.1198, "eval_samples_per_second": 16.258, "eval_steps_per_second": 4.069, "eval_wer": 0.8439048562933598, "step": 1200 }, { "epoch": 1.8506278916060808, "grad_norm": 1.500631332397461, "learning_rate": 3.632621951219513e-05, "loss": 0.2472, "step": 1400 }, { "epoch": 1.8506278916060808, "eval_loss": 0.2701094448566437, "eval_runtime": 123.0962, "eval_samples_per_second": 16.394, "eval_steps_per_second": 4.102, "eval_wer": 0.8052527254707631, "step": 1400 }, { "epoch": 2.1150033046926637, "grad_norm": 0.6989238262176514, "learning_rate": 3.327743902439025e-05, "loss": 0.2093, "step": 1600 }, { "epoch": 2.1150033046926637, "eval_loss": 0.2598518133163452, "eval_runtime": 123.5685, "eval_samples_per_second": 16.331, "eval_steps_per_second": 4.087, "eval_wer": 0.8285431119920713, "step": 1600 }, { "epoch": 2.3793787177792467, "grad_norm": 0.9642230868339539, "learning_rate": 3.022865853658537e-05, "loss": 0.1725, "step": 1800 }, { "epoch": 2.3793787177792467, "eval_loss": 0.2533758580684662, "eval_runtime": 124.2174, "eval_samples_per_second": 16.246, "eval_steps_per_second": 4.065, "eval_wer": 0.8374628344895937, "step": 1800 }, { "epoch": 2.6437541308658297, "grad_norm": 0.6002829670906067, "learning_rate": 2.717987804878049e-05, "loss": 0.1675, "step": 2000 }, { "epoch": 2.6437541308658297, "eval_loss": 0.240593820810318, "eval_runtime": 125.6029, "eval_samples_per_second": 16.067, "eval_steps_per_second": 4.021, "eval_wer": 0.7690782953419227, "step": 2000 }, { "epoch": 2.9081295439524126, "grad_norm": 1.0664541721343994, "learning_rate": 2.413109756097561e-05, "loss": 0.1632, "step": 2200 }, { "epoch": 2.9081295439524126, "eval_loss": 0.2308683693408966, "eval_runtime": 124.4, "eval_samples_per_second": 16.222, "eval_steps_per_second": 4.059, "eval_wer": 0.7616451932606542, "step": 2200 }, { "epoch": 3.1725049570389956, "grad_norm": 0.6247605681419373, "learning_rate": 2.108231707317073e-05, "loss": 0.1295, "step": 2400 }, { "epoch": 3.1725049570389956, "eval_loss": 0.23871323466300964, "eval_runtime": 124.4212, "eval_samples_per_second": 16.219, "eval_steps_per_second": 4.059, "eval_wer": 0.7556987115956393, "step": 2400 }, { "epoch": 3.4368803701255786, "grad_norm": 0.5814207792282104, "learning_rate": 1.8033536585365853e-05, "loss": 0.1082, "step": 2600 }, { "epoch": 3.4368803701255786, "eval_loss": 0.2275388240814209, "eval_runtime": 124.2026, "eval_samples_per_second": 16.248, "eval_steps_per_second": 4.066, "eval_wer": 0.7329038652130823, "step": 2600 }, { "epoch": 3.7012557832121615, "grad_norm": 0.5891350507736206, "learning_rate": 1.4984756097560976e-05, "loss": 0.1059, "step": 2800 }, { "epoch": 3.7012557832121615, "eval_loss": 0.223988875746727, "eval_runtime": 124.3149, "eval_samples_per_second": 16.233, "eval_steps_per_second": 4.062, "eval_wer": 0.7329038652130823, "step": 2800 }, { "epoch": 3.965631196298744, "grad_norm": 1.1988484859466553, "learning_rate": 1.1935975609756097e-05, "loss": 0.1049, "step": 3000 }, { "epoch": 3.965631196298744, "eval_loss": 0.21721933782100677, "eval_runtime": 124.3819, "eval_samples_per_second": 16.224, "eval_steps_per_second": 4.06, "eval_wer": 0.7294350842418236, "step": 3000 }, { "epoch": 4.2300066093853275, "grad_norm": 0.32344383001327515, "learning_rate": 8.88719512195122e-06, "loss": 0.0657, "step": 3200 }, { "epoch": 4.2300066093853275, "eval_loss": 0.23203983902931213, "eval_runtime": 125.8598, "eval_samples_per_second": 16.034, "eval_steps_per_second": 4.012, "eval_wer": 0.722001982160555, "step": 3200 }, { "epoch": 4.49438202247191, "grad_norm": 0.8019347190856934, "learning_rate": 5.838414634146342e-06, "loss": 0.059, "step": 3400 }, { "epoch": 4.49438202247191, "eval_loss": 0.23412850499153137, "eval_runtime": 124.439, "eval_samples_per_second": 16.217, "eval_steps_per_second": 4.058, "eval_wer": 0.7215064420218038, "step": 3400 }, { "epoch": 4.758757435558493, "grad_norm": 0.9007149934768677, "learning_rate": 2.7896341463414635e-06, "loss": 0.0582, "step": 3600 }, { "epoch": 4.758757435558493, "eval_loss": 0.2315901815891266, "eval_runtime": 124.1565, "eval_samples_per_second": 16.254, "eval_steps_per_second": 4.067, "eval_wer": 0.711595639246779, "step": 3600 }, { "epoch": 4.9966953073364175, "step": 3780, "total_flos": 1.7289369341985591e+19, "train_loss": 0.4604275799302197, "train_runtime": 6372.0716, "train_samples_per_second": 4.746, "train_steps_per_second": 0.593 } ], "logging_steps": 200, "max_steps": 3780, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.7289369341985591e+19, "train_batch_size": 4, "trial_name": null, "trial_params": null }