wav2vec2-xlsr-53-ft-btb-cy / trainer_state.json
DewiBrynJones's picture
End of training
c8a54e7 verified
raw
history blame
7.94 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.536067892503536,
"eval_steps": 100,
"global_step": 2500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.14144271570014144,
"eval_loss": 3.7463672161102295,
"eval_runtime": 153.5435,
"eval_samples_per_second": 36.836,
"eval_steps_per_second": 4.605,
"eval_wer": 1.0,
"step": 100
},
{
"epoch": 0.2828854314002829,
"eval_loss": 2.9399216175079346,
"eval_runtime": 150.7703,
"eval_samples_per_second": 37.514,
"eval_steps_per_second": 4.689,
"eval_wer": 1.0,
"step": 200
},
{
"epoch": 0.4243281471004243,
"eval_loss": 2.5961458683013916,
"eval_runtime": 151.4236,
"eval_samples_per_second": 37.352,
"eval_steps_per_second": 4.669,
"eval_wer": 0.9991041576682503,
"step": 300
},
{
"epoch": 0.5657708628005658,
"eval_loss": 1.1618728637695312,
"eval_runtime": 152.5862,
"eval_samples_per_second": 37.068,
"eval_steps_per_second": 4.633,
"eval_wer": 0.7905328662155461,
"step": 400
},
{
"epoch": 0.7072135785007072,
"grad_norm": 1.7047498226165771,
"learning_rate": 0.00029699999999999996,
"loss": 3.5448,
"step": 500
},
{
"epoch": 0.7072135785007072,
"eval_loss": 0.946560800075531,
"eval_runtime": 153.1427,
"eval_samples_per_second": 36.933,
"eval_steps_per_second": 4.617,
"eval_wer": 0.6897506038937147,
"step": 500
},
{
"epoch": 0.8486562942008486,
"eval_loss": 0.7894724607467651,
"eval_runtime": 151.7162,
"eval_samples_per_second": 37.28,
"eval_steps_per_second": 4.66,
"eval_wer": 0.6110604533602086,
"step": 600
},
{
"epoch": 0.9900990099009901,
"eval_loss": 0.6820164918899536,
"eval_runtime": 152.0086,
"eval_samples_per_second": 37.208,
"eval_steps_per_second": 4.651,
"eval_wer": 0.5378893314776599,
"step": 700
},
{
"epoch": 1.1315417256011315,
"eval_loss": 0.6039016842842102,
"eval_runtime": 152.002,
"eval_samples_per_second": 37.21,
"eval_steps_per_second": 4.651,
"eval_wer": 0.47239685815296506,
"step": 800
},
{
"epoch": 1.272984441301273,
"eval_loss": 0.5631398558616638,
"eval_runtime": 153.0321,
"eval_samples_per_second": 36.96,
"eval_steps_per_second": 4.62,
"eval_wer": 0.46745372814384667,
"step": 900
},
{
"epoch": 1.4144271570014144,
"grad_norm": 0.8790757656097412,
"learning_rate": 0.00022574999999999996,
"loss": 0.7808,
"step": 1000
},
{
"epoch": 1.4144271570014144,
"eval_loss": 0.5279428958892822,
"eval_runtime": 152.9768,
"eval_samples_per_second": 36.973,
"eval_steps_per_second": 4.622,
"eval_wer": 0.4291084769080642,
"step": 1000
},
{
"epoch": 1.5558698727015559,
"eval_loss": 0.5024306178092957,
"eval_runtime": 152.8927,
"eval_samples_per_second": 36.993,
"eval_steps_per_second": 4.624,
"eval_wer": 0.39940170529986724,
"step": 1100
},
{
"epoch": 1.6973125884016973,
"eval_loss": 0.4894837439060211,
"eval_runtime": 153.3103,
"eval_samples_per_second": 36.892,
"eval_steps_per_second": 4.612,
"eval_wer": 0.3894514565436483,
"step": 1200
},
{
"epoch": 1.8387553041018387,
"eval_loss": 0.4595918357372284,
"eval_runtime": 153.0651,
"eval_samples_per_second": 36.952,
"eval_steps_per_second": 4.619,
"eval_wer": 0.3695829534002016,
"step": 1300
},
{
"epoch": 1.9801980198019802,
"eval_loss": 0.44729524850845337,
"eval_runtime": 154.1998,
"eval_samples_per_second": 36.68,
"eval_steps_per_second": 4.585,
"eval_wer": 0.3610884484330758,
"step": 1400
},
{
"epoch": 2.1216407355021216,
"grad_norm": 0.8205087184906006,
"learning_rate": 0.0001512,
"loss": 0.6005,
"step": 1500
},
{
"epoch": 2.1216407355021216,
"eval_loss": 0.43324384093284607,
"eval_runtime": 150.9169,
"eval_samples_per_second": 37.478,
"eval_steps_per_second": 4.685,
"eval_wer": 0.3474268528738942,
"step": 1500
},
{
"epoch": 2.263083451202263,
"eval_loss": 0.4268616735935211,
"eval_runtime": 152.3911,
"eval_samples_per_second": 37.115,
"eval_steps_per_second": 4.639,
"eval_wer": 0.3418118411159636,
"step": 1600
},
{
"epoch": 2.4045261669024045,
"eval_loss": 0.4155045449733734,
"eval_runtime": 153.4832,
"eval_samples_per_second": 36.851,
"eval_steps_per_second": 4.606,
"eval_wer": 0.33606885188206875,
"step": 1700
},
{
"epoch": 2.545968882602546,
"eval_loss": 0.4121190905570984,
"eval_runtime": 153.7529,
"eval_samples_per_second": 36.786,
"eval_steps_per_second": 4.598,
"eval_wer": 0.32143142806865993,
"step": 1800
},
{
"epoch": 2.6874115983026874,
"eval_loss": 0.4145391285419464,
"eval_runtime": 159.3319,
"eval_samples_per_second": 35.498,
"eval_steps_per_second": 4.437,
"eval_wer": 0.3366447505239078,
"step": 1900
},
{
"epoch": 2.828854314002829,
"grad_norm": 0.8615767359733582,
"learning_rate": 7.664999999999999e-05,
"loss": 0.4666,
"step": 2000
},
{
"epoch": 2.828854314002829,
"eval_loss": 0.39387884736061096,
"eval_runtime": 153.1343,
"eval_samples_per_second": 36.935,
"eval_steps_per_second": 4.617,
"eval_wer": 0.3114171905744589,
"step": 2000
},
{
"epoch": 2.9702970297029703,
"eval_loss": 0.38894009590148926,
"eval_runtime": 152.8789,
"eval_samples_per_second": 36.997,
"eval_steps_per_second": 4.625,
"eval_wer": 0.30807377901489336,
"step": 2100
},
{
"epoch": 3.1117397454031117,
"eval_loss": 0.3909347653388977,
"eval_runtime": 154.4919,
"eval_samples_per_second": 36.61,
"eval_steps_per_second": 4.576,
"eval_wer": 0.30644206619634945,
"step": 2200
},
{
"epoch": 3.253182461103253,
"eval_loss": 0.3874327838420868,
"eval_runtime": 153.3025,
"eval_samples_per_second": 36.894,
"eval_steps_per_second": 4.612,
"eval_wer": 0.3015469277407176,
"step": 2300
},
{
"epoch": 3.3946251768033946,
"eval_loss": 0.386868953704834,
"eval_runtime": 153.0443,
"eval_samples_per_second": 36.957,
"eval_steps_per_second": 4.62,
"eval_wer": 0.29833149365711636,
"step": 2400
},
{
"epoch": 3.536067892503536,
"grad_norm": 0.6678842902183533,
"learning_rate": 1.9499999999999995e-06,
"loss": 0.3805,
"step": 2500
},
{
"epoch": 3.536067892503536,
"eval_loss": 0.3846580684185028,
"eval_runtime": 154.6331,
"eval_samples_per_second": 36.577,
"eval_steps_per_second": 4.572,
"eval_wer": 0.29666778646958136,
"step": 2500
},
{
"epoch": 3.536067892503536,
"step": 2500,
"total_flos": 9.55129266706546e+18,
"train_loss": 1.1546670959472656,
"train_runtime": 6557.446,
"train_samples_per_second": 12.2,
"train_steps_per_second": 0.381
}
],
"logging_steps": 500,
"max_steps": 2500,
"num_input_tokens_seen": 0,
"num_train_epochs": 4,
"save_steps": 400,
"total_flos": 9.55129266706546e+18,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}