|
{ |
|
"best_metric": 0.21721933782100677, |
|
"best_model_checkpoint": "./xls-r-1b-bem-genbed-all/checkpoint-3000", |
|
"epoch": 4.9966953073364175, |
|
"eval_steps": 200, |
|
"global_step": 3780, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.26437541308658297, |
|
"grad_norm": 3.2960169315338135, |
|
"learning_rate": 1.97e-05, |
|
"loss": 4.6827, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.26437541308658297, |
|
"eval_loss": 2.834711790084839, |
|
"eval_runtime": 123.0061, |
|
"eval_samples_per_second": 16.406, |
|
"eval_steps_per_second": 4.105, |
|
"eval_wer": 1.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.5287508261731659, |
|
"grad_norm": 5.020152568817139, |
|
"learning_rate": 3.97e-05, |
|
"loss": 1.0401, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.5287508261731659, |
|
"eval_loss": 0.5636318922042847, |
|
"eval_runtime": 121.385, |
|
"eval_samples_per_second": 16.625, |
|
"eval_steps_per_second": 4.16, |
|
"eval_wer": 0.9410307234886026, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.7931262392597488, |
|
"grad_norm": 4.118492126464844, |
|
"learning_rate": 4.852134146341464e-05, |
|
"loss": 0.4289, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.7931262392597488, |
|
"eval_loss": 0.40182533860206604, |
|
"eval_runtime": 122.3487, |
|
"eval_samples_per_second": 16.494, |
|
"eval_steps_per_second": 4.128, |
|
"eval_wer": 0.9028741328047571, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.0575016523463319, |
|
"grad_norm": 1.0666348934173584, |
|
"learning_rate": 4.5472560975609756e-05, |
|
"loss": 0.3449, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.0575016523463319, |
|
"eval_loss": 0.3604024052619934, |
|
"eval_runtime": 123.3539, |
|
"eval_samples_per_second": 16.359, |
|
"eval_steps_per_second": 4.094, |
|
"eval_wer": 0.8771060455896927, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.3218770654329148, |
|
"grad_norm": 3.203847885131836, |
|
"learning_rate": 4.242378048780488e-05, |
|
"loss": 0.2954, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.3218770654329148, |
|
"eval_loss": 0.33885934948921204, |
|
"eval_runtime": 124.083, |
|
"eval_samples_per_second": 16.263, |
|
"eval_steps_per_second": 4.07, |
|
"eval_wer": 0.8741328047571854, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.5862524785194978, |
|
"grad_norm": 2.469949722290039, |
|
"learning_rate": 3.9375e-05, |
|
"loss": 0.2719, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.5862524785194978, |
|
"eval_loss": 0.29620760679244995, |
|
"eval_runtime": 124.1198, |
|
"eval_samples_per_second": 16.258, |
|
"eval_steps_per_second": 4.069, |
|
"eval_wer": 0.8439048562933598, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.8506278916060808, |
|
"grad_norm": 1.500631332397461, |
|
"learning_rate": 3.632621951219513e-05, |
|
"loss": 0.2472, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.8506278916060808, |
|
"eval_loss": 0.2701094448566437, |
|
"eval_runtime": 123.0962, |
|
"eval_samples_per_second": 16.394, |
|
"eval_steps_per_second": 4.102, |
|
"eval_wer": 0.8052527254707631, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.1150033046926637, |
|
"grad_norm": 0.6989238262176514, |
|
"learning_rate": 3.327743902439025e-05, |
|
"loss": 0.2093, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.1150033046926637, |
|
"eval_loss": 0.2598518133163452, |
|
"eval_runtime": 123.5685, |
|
"eval_samples_per_second": 16.331, |
|
"eval_steps_per_second": 4.087, |
|
"eval_wer": 0.8285431119920713, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.3793787177792467, |
|
"grad_norm": 0.9642230868339539, |
|
"learning_rate": 3.022865853658537e-05, |
|
"loss": 0.1725, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.3793787177792467, |
|
"eval_loss": 0.2533758580684662, |
|
"eval_runtime": 124.2174, |
|
"eval_samples_per_second": 16.246, |
|
"eval_steps_per_second": 4.065, |
|
"eval_wer": 0.8374628344895937, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.6437541308658297, |
|
"grad_norm": 0.6002829670906067, |
|
"learning_rate": 2.717987804878049e-05, |
|
"loss": 0.1675, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.6437541308658297, |
|
"eval_loss": 0.240593820810318, |
|
"eval_runtime": 125.6029, |
|
"eval_samples_per_second": 16.067, |
|
"eval_steps_per_second": 4.021, |
|
"eval_wer": 0.7690782953419227, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.9081295439524126, |
|
"grad_norm": 1.0664541721343994, |
|
"learning_rate": 2.413109756097561e-05, |
|
"loss": 0.1632, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.9081295439524126, |
|
"eval_loss": 0.2308683693408966, |
|
"eval_runtime": 124.4, |
|
"eval_samples_per_second": 16.222, |
|
"eval_steps_per_second": 4.059, |
|
"eval_wer": 0.7616451932606542, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 3.1725049570389956, |
|
"grad_norm": 0.6247605681419373, |
|
"learning_rate": 2.108231707317073e-05, |
|
"loss": 0.1295, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 3.1725049570389956, |
|
"eval_loss": 0.23871323466300964, |
|
"eval_runtime": 124.4212, |
|
"eval_samples_per_second": 16.219, |
|
"eval_steps_per_second": 4.059, |
|
"eval_wer": 0.7556987115956393, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 3.4368803701255786, |
|
"grad_norm": 0.5814207792282104, |
|
"learning_rate": 1.8033536585365853e-05, |
|
"loss": 0.1082, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 3.4368803701255786, |
|
"eval_loss": 0.2275388240814209, |
|
"eval_runtime": 124.2026, |
|
"eval_samples_per_second": 16.248, |
|
"eval_steps_per_second": 4.066, |
|
"eval_wer": 0.7329038652130823, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 3.7012557832121615, |
|
"grad_norm": 0.5891350507736206, |
|
"learning_rate": 1.4984756097560976e-05, |
|
"loss": 0.1059, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 3.7012557832121615, |
|
"eval_loss": 0.223988875746727, |
|
"eval_runtime": 124.3149, |
|
"eval_samples_per_second": 16.233, |
|
"eval_steps_per_second": 4.062, |
|
"eval_wer": 0.7329038652130823, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 3.965631196298744, |
|
"grad_norm": 1.1988484859466553, |
|
"learning_rate": 1.1935975609756097e-05, |
|
"loss": 0.1049, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 3.965631196298744, |
|
"eval_loss": 0.21721933782100677, |
|
"eval_runtime": 124.3819, |
|
"eval_samples_per_second": 16.224, |
|
"eval_steps_per_second": 4.06, |
|
"eval_wer": 0.7294350842418236, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 4.2300066093853275, |
|
"grad_norm": 0.32344383001327515, |
|
"learning_rate": 8.88719512195122e-06, |
|
"loss": 0.0657, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 4.2300066093853275, |
|
"eval_loss": 0.23203983902931213, |
|
"eval_runtime": 125.8598, |
|
"eval_samples_per_second": 16.034, |
|
"eval_steps_per_second": 4.012, |
|
"eval_wer": 0.722001982160555, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 4.49438202247191, |
|
"grad_norm": 0.8019347190856934, |
|
"learning_rate": 5.838414634146342e-06, |
|
"loss": 0.059, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 4.49438202247191, |
|
"eval_loss": 0.23412850499153137, |
|
"eval_runtime": 124.439, |
|
"eval_samples_per_second": 16.217, |
|
"eval_steps_per_second": 4.058, |
|
"eval_wer": 0.7215064420218038, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 4.758757435558493, |
|
"grad_norm": 0.9007149934768677, |
|
"learning_rate": 2.7896341463414635e-06, |
|
"loss": 0.0582, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 4.758757435558493, |
|
"eval_loss": 0.2315901815891266, |
|
"eval_runtime": 124.1565, |
|
"eval_samples_per_second": 16.254, |
|
"eval_steps_per_second": 4.067, |
|
"eval_wer": 0.711595639246779, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 4.9966953073364175, |
|
"step": 3780, |
|
"total_flos": 1.7289369341985591e+19, |
|
"train_loss": 0.4604275799302197, |
|
"train_runtime": 6372.0716, |
|
"train_samples_per_second": 4.746, |
|
"train_steps_per_second": 0.593 |
|
} |
|
], |
|
"logging_steps": 200, |
|
"max_steps": 3780, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 200, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.7289369341985591e+19, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|