wav2vec2-xlsr-53-ft-btb-cy / trainer_state.json
DewiBrynJones's picture
End of training
2d19eb1 verified
raw
history blame
7.94 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.536067892503536,
"eval_steps": 100,
"global_step": 2500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.14144271570014144,
"eval_loss": 4.031618595123291,
"eval_runtime": 161.4756,
"eval_samples_per_second": 35.027,
"eval_steps_per_second": 4.378,
"eval_wer": 1.0,
"step": 100
},
{
"epoch": 0.2828854314002829,
"eval_loss": 3.0548782348632812,
"eval_runtime": 158.3539,
"eval_samples_per_second": 35.717,
"eval_steps_per_second": 4.465,
"eval_wer": 1.0,
"step": 200
},
{
"epoch": 0.4243281471004243,
"eval_loss": 2.5533947944641113,
"eval_runtime": 158.9465,
"eval_samples_per_second": 35.584,
"eval_steps_per_second": 4.448,
"eval_wer": 0.9862296976311228,
"step": 300
},
{
"epoch": 0.5657708628005658,
"eval_loss": 1.4279608726501465,
"eval_runtime": 159.2587,
"eval_samples_per_second": 35.515,
"eval_steps_per_second": 4.439,
"eval_wer": 0.8846536560313283,
"step": 400
},
{
"epoch": 0.7072135785007072,
"grad_norm": 1.9099421501159668,
"learning_rate": 0.00029699999999999996,
"loss": 3.6818,
"step": 500
},
{
"epoch": 0.7072135785007072,
"eval_loss": 1.1378353834152222,
"eval_runtime": 160.4319,
"eval_samples_per_second": 35.255,
"eval_steps_per_second": 4.407,
"eval_wer": 0.7958368106824164,
"step": 500
},
{
"epoch": 0.8486562942008486,
"eval_loss": 0.9263126254081726,
"eval_runtime": 159.8222,
"eval_samples_per_second": 35.389,
"eval_steps_per_second": 4.424,
"eval_wer": 0.6777299865185851,
"step": 600
},
{
"epoch": 0.9900990099009901,
"eval_loss": 0.8501134514808655,
"eval_runtime": 160.338,
"eval_samples_per_second": 35.275,
"eval_steps_per_second": 4.409,
"eval_wer": 0.6388104256275278,
"step": 700
},
{
"epoch": 1.1315417256011315,
"eval_loss": 0.6984566450119019,
"eval_runtime": 159.8325,
"eval_samples_per_second": 35.387,
"eval_steps_per_second": 4.423,
"eval_wer": 0.5563972523592476,
"step": 800
},
{
"epoch": 1.272984441301273,
"eval_loss": 0.6664860248565674,
"eval_runtime": 162.9316,
"eval_samples_per_second": 34.714,
"eval_steps_per_second": 4.339,
"eval_wer": 0.5401393079540348,
"step": 900
},
{
"epoch": 1.4144271570014144,
"grad_norm": 0.8423302173614502,
"learning_rate": 0.00022574999999999996,
"loss": 0.895,
"step": 1000
},
{
"epoch": 1.4144271570014144,
"eval_loss": 0.6228350400924683,
"eval_runtime": 161.5875,
"eval_samples_per_second": 35.003,
"eval_steps_per_second": 4.375,
"eval_wer": 0.5000802465173011,
"step": 1000
},
{
"epoch": 1.5558698727015559,
"eval_loss": 0.5974757075309753,
"eval_runtime": 160.961,
"eval_samples_per_second": 35.139,
"eval_steps_per_second": 4.392,
"eval_wer": 0.48359761186364514,
"step": 1100
},
{
"epoch": 1.6973125884016973,
"eval_loss": 0.5826218128204346,
"eval_runtime": 162.2219,
"eval_samples_per_second": 34.866,
"eval_steps_per_second": 4.358,
"eval_wer": 0.4677248507414778,
"step": 1200
},
{
"epoch": 1.8387553041018387,
"eval_loss": 0.5473496317863464,
"eval_runtime": 164.7294,
"eval_samples_per_second": 34.335,
"eval_steps_per_second": 4.292,
"eval_wer": 0.4512903639982025,
"step": 1300
},
{
"epoch": 1.9801980198019802,
"eval_loss": 0.5284178256988525,
"eval_runtime": 168.0547,
"eval_samples_per_second": 33.656,
"eval_steps_per_second": 4.207,
"eval_wer": 0.4376966039673878,
"step": 1400
},
{
"epoch": 2.1216407355021216,
"grad_norm": 1.0099021196365356,
"learning_rate": 0.0001512,
"loss": 0.687,
"step": 1500
},
{
"epoch": 2.1216407355021216,
"eval_loss": 0.5137470960617065,
"eval_runtime": 160.3266,
"eval_samples_per_second": 35.278,
"eval_steps_per_second": 4.41,
"eval_wer": 0.42363741413622646,
"step": 1500
},
{
"epoch": 2.263083451202263,
"eval_loss": 0.5110819339752197,
"eval_runtime": 161.254,
"eval_samples_per_second": 35.075,
"eval_steps_per_second": 4.384,
"eval_wer": 0.4103004429607755,
"step": 1600
},
{
"epoch": 2.4045261669024045,
"eval_loss": 0.49603915214538574,
"eval_runtime": 160.5657,
"eval_samples_per_second": 35.225,
"eval_steps_per_second": 4.403,
"eval_wer": 0.4084226744559286,
"step": 1700
},
{
"epoch": 2.545968882602546,
"eval_loss": 0.4876905679702759,
"eval_runtime": 161.8676,
"eval_samples_per_second": 34.942,
"eval_steps_per_second": 4.368,
"eval_wer": 0.40148937536110935,
"step": 1800
},
{
"epoch": 2.6874115983026874,
"eval_loss": 0.47859108448028564,
"eval_runtime": 161.5548,
"eval_samples_per_second": 35.01,
"eval_steps_per_second": 4.376,
"eval_wer": 0.39929062078705785,
"step": 1900
},
{
"epoch": 2.828854314002829,
"grad_norm": 0.5755736231803894,
"learning_rate": 7.664999999999999e-05,
"loss": 0.5319,
"step": 2000
},
{
"epoch": 2.828854314002829,
"eval_loss": 0.47312092781066895,
"eval_runtime": 162.5051,
"eval_samples_per_second": 34.805,
"eval_steps_per_second": 4.351,
"eval_wer": 0.39299929383064774,
"step": 2000
},
{
"epoch": 2.9702970297029703,
"eval_loss": 0.4668172001838684,
"eval_runtime": 162.21,
"eval_samples_per_second": 34.868,
"eval_steps_per_second": 4.359,
"eval_wer": 0.3877190729922321,
"step": 2100
},
{
"epoch": 3.1117397454031117,
"eval_loss": 0.46727854013442993,
"eval_runtime": 161.4684,
"eval_samples_per_second": 35.029,
"eval_steps_per_second": 4.379,
"eval_wer": 0.3849585927970726,
"step": 2200
},
{
"epoch": 3.253182461103253,
"eval_loss": 0.46300554275512695,
"eval_runtime": 161.1936,
"eval_samples_per_second": 35.088,
"eval_steps_per_second": 4.386,
"eval_wer": 0.3804326892212878,
"step": 2300
},
{
"epoch": 3.3946251768033946,
"eval_loss": 0.4593857228755951,
"eval_runtime": 161.5089,
"eval_samples_per_second": 35.02,
"eval_steps_per_second": 4.377,
"eval_wer": 0.3768697438531168,
"step": 2400
},
{
"epoch": 3.536067892503536,
"grad_norm": 0.887208104133606,
"learning_rate": 1.9499999999999995e-06,
"loss": 0.4355,
"step": 2500
},
{
"epoch": 3.536067892503536,
"eval_loss": 0.4583967626094818,
"eval_runtime": 162.1219,
"eval_samples_per_second": 34.887,
"eval_steps_per_second": 4.361,
"eval_wer": 0.37831418116453747,
"step": 2500
},
{
"epoch": 3.536067892503536,
"step": 2500,
"total_flos": 9.55169606524761e+18,
"train_loss": 1.2462444946289062,
"train_runtime": 6962.0027,
"train_samples_per_second": 11.491,
"train_steps_per_second": 0.359
}
],
"logging_steps": 500,
"max_steps": 2500,
"num_input_tokens_seen": 0,
"num_train_epochs": 4,
"save_steps": 400,
"total_flos": 9.55169606524761e+18,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}