whisper-medium-NST-uf-linlr / trainer_state.json
pere's picture
End of training
a6f56f6
raw
history blame contribute delete
No virus
15.1 kB
{
"best_metric": 6.499429874572406,
"best_model_checkpoint": "../whisper-medium-NST-uf-linlr/checkpoint-13000",
"epoch": 2.1946,
"global_step": 20000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 2.47e-06,
"loss": 1.7524,
"step": 250
},
{
"epoch": 0.03,
"learning_rate": 4.970000000000001e-06,
"loss": 0.3854,
"step": 500
},
{
"epoch": 0.04,
"learning_rate": 7.4700000000000005e-06,
"loss": 0.2369,
"step": 750
},
{
"epoch": 0.05,
"learning_rate": 9.970000000000001e-06,
"loss": 0.2046,
"step": 1000
},
{
"epoch": 0.05,
"eval_loss": 0.34258419275283813,
"eval_runtime": 49.7358,
"eval_samples_per_second": 2.011,
"eval_steps_per_second": 0.141,
"eval_wer": 15.279361459521096,
"step": 1000
},
{
"epoch": 0.06,
"learning_rate": 9.87e-06,
"loss": 0.1852,
"step": 1250
},
{
"epoch": 0.07,
"learning_rate": 9.738421052631579e-06,
"loss": 0.167,
"step": 1500
},
{
"epoch": 0.09,
"learning_rate": 9.606842105263159e-06,
"loss": 0.1547,
"step": 1750
},
{
"epoch": 0.1,
"learning_rate": 9.475263157894737e-06,
"loss": 0.148,
"step": 2000
},
{
"epoch": 0.1,
"eval_loss": 0.3284447491168976,
"eval_runtime": 52.426,
"eval_samples_per_second": 1.907,
"eval_steps_per_second": 0.134,
"eval_wer": 10.832383124287343,
"step": 2000
},
{
"epoch": 0.11,
"learning_rate": 9.343684210526317e-06,
"loss": 0.1401,
"step": 2250
},
{
"epoch": 0.12,
"learning_rate": 9.212105263157895e-06,
"loss": 0.1298,
"step": 2500
},
{
"epoch": 0.14,
"learning_rate": 9.080526315789475e-06,
"loss": 0.1252,
"step": 2750
},
{
"epoch": 0.15,
"learning_rate": 8.948947368421053e-06,
"loss": 0.121,
"step": 3000
},
{
"epoch": 0.15,
"eval_loss": 0.30924132466316223,
"eval_runtime": 57.6047,
"eval_samples_per_second": 1.736,
"eval_steps_per_second": 0.122,
"eval_wer": 12.884834663625996,
"step": 3000
},
{
"epoch": 0.16,
"learning_rate": 8.817368421052631e-06,
"loss": 0.1169,
"step": 3250
},
{
"epoch": 0.17,
"learning_rate": 8.685789473684211e-06,
"loss": 0.1143,
"step": 3500
},
{
"epoch": 0.19,
"learning_rate": 8.55421052631579e-06,
"loss": 0.1128,
"step": 3750
},
{
"epoch": 0.2,
"learning_rate": 8.42263157894737e-06,
"loss": 0.1089,
"step": 4000
},
{
"epoch": 0.2,
"eval_loss": 0.2808048129081726,
"eval_runtime": 46.0082,
"eval_samples_per_second": 2.174,
"eval_steps_per_second": 0.152,
"eval_wer": 10.490307867730902,
"step": 4000
},
{
"epoch": 0.21,
"learning_rate": 8.291052631578948e-06,
"loss": 0.1041,
"step": 4250
},
{
"epoch": 0.23,
"learning_rate": 8.159473684210528e-06,
"loss": 0.1073,
"step": 4500
},
{
"epoch": 0.24,
"learning_rate": 8.027894736842106e-06,
"loss": 0.0996,
"step": 4750
},
{
"epoch": 0.25,
"learning_rate": 7.896315789473686e-06,
"loss": 0.0976,
"step": 5000
},
{
"epoch": 0.25,
"eval_loss": 0.26165536046028137,
"eval_runtime": 47.1016,
"eval_samples_per_second": 2.123,
"eval_steps_per_second": 0.149,
"eval_wer": 9.92018244013683,
"step": 5000
},
{
"epoch": 0.26,
"learning_rate": 7.764736842105264e-06,
"loss": 0.099,
"step": 5250
},
{
"epoch": 0.28,
"learning_rate": 7.633157894736842e-06,
"loss": 0.0925,
"step": 5500
},
{
"epoch": 0.29,
"learning_rate": 7.501578947368422e-06,
"loss": 0.0918,
"step": 5750
},
{
"epoch": 0.3,
"learning_rate": 7.370000000000001e-06,
"loss": 0.0901,
"step": 6000
},
{
"epoch": 0.3,
"eval_loss": 0.26038578152656555,
"eval_runtime": 72.3813,
"eval_samples_per_second": 1.382,
"eval_steps_per_second": 0.097,
"eval_wer": 21.892816419612316,
"step": 6000
},
{
"epoch": 0.31,
"learning_rate": 7.238421052631579e-06,
"loss": 0.0881,
"step": 6250
},
{
"epoch": 0.33,
"learning_rate": 7.106842105263159e-06,
"loss": 0.0908,
"step": 6500
},
{
"epoch": 0.34,
"learning_rate": 6.975263157894737e-06,
"loss": 0.0887,
"step": 6750
},
{
"epoch": 0.35,
"learning_rate": 6.843684210526317e-06,
"loss": 0.0834,
"step": 7000
},
{
"epoch": 0.35,
"eval_loss": 0.2877318859100342,
"eval_runtime": 47.1887,
"eval_samples_per_second": 2.119,
"eval_steps_per_second": 0.148,
"eval_wer": 9.35005701254276,
"step": 7000
},
{
"epoch": 0.36,
"learning_rate": 6.712105263157895e-06,
"loss": 0.0828,
"step": 7250
},
{
"epoch": 0.38,
"learning_rate": 6.580526315789474e-06,
"loss": 0.0825,
"step": 7500
},
{
"epoch": 0.39,
"learning_rate": 6.448947368421053e-06,
"loss": 0.085,
"step": 7750
},
{
"epoch": 0.4,
"learning_rate": 6.317368421052632e-06,
"loss": 0.0825,
"step": 8000
},
{
"epoch": 0.4,
"eval_loss": 0.2793583571910858,
"eval_runtime": 46.2565,
"eval_samples_per_second": 2.162,
"eval_steps_per_second": 0.151,
"eval_wer": 9.35005701254276,
"step": 8000
},
{
"epoch": 1.01,
"learning_rate": 6.185789473684211e-06,
"loss": 0.0783,
"step": 8250
},
{
"epoch": 1.02,
"learning_rate": 6.05421052631579e-06,
"loss": 0.0708,
"step": 8500
},
{
"epoch": 1.03,
"learning_rate": 5.922631578947369e-06,
"loss": 0.0605,
"step": 8750
},
{
"epoch": 1.05,
"learning_rate": 5.791052631578948e-06,
"loss": 0.0553,
"step": 9000
},
{
"epoch": 1.05,
"eval_loss": 0.2844734191894531,
"eval_runtime": 54.3571,
"eval_samples_per_second": 1.84,
"eval_steps_per_second": 0.129,
"eval_wer": 9.578107183580387,
"step": 9000
},
{
"epoch": 1.06,
"learning_rate": 5.659473684210527e-06,
"loss": 0.0505,
"step": 9250
},
{
"epoch": 1.07,
"learning_rate": 5.527894736842105e-06,
"loss": 0.0475,
"step": 9500
},
{
"epoch": 1.08,
"learning_rate": 5.396842105263158e-06,
"loss": 0.0465,
"step": 9750
},
{
"epoch": 1.1,
"learning_rate": 5.265263157894738e-06,
"loss": 0.0472,
"step": 10000
},
{
"epoch": 1.1,
"eval_loss": 0.2814468741416931,
"eval_runtime": 74.4128,
"eval_samples_per_second": 1.344,
"eval_steps_per_second": 0.094,
"eval_wer": 24.173318129988598,
"step": 10000
},
{
"epoch": 1.11,
"learning_rate": 5.133684210526316e-06,
"loss": 0.0471,
"step": 10250
},
{
"epoch": 1.12,
"learning_rate": 5.002105263157895e-06,
"loss": 0.0431,
"step": 10500
},
{
"epoch": 1.13,
"learning_rate": 4.870526315789474e-06,
"loss": 0.0396,
"step": 10750
},
{
"epoch": 1.15,
"learning_rate": 4.738947368421053e-06,
"loss": 0.0409,
"step": 11000
},
{
"epoch": 1.15,
"eval_loss": 0.30840516090393066,
"eval_runtime": 47.429,
"eval_samples_per_second": 2.108,
"eval_steps_per_second": 0.148,
"eval_wer": 8.095781071835804,
"step": 11000
},
{
"epoch": 1.16,
"learning_rate": 4.607368421052632e-06,
"loss": 0.0401,
"step": 11250
},
{
"epoch": 1.17,
"learning_rate": 4.475789473684211e-06,
"loss": 0.041,
"step": 11500
},
{
"epoch": 1.18,
"learning_rate": 4.344210526315789e-06,
"loss": 0.0394,
"step": 11750
},
{
"epoch": 1.2,
"learning_rate": 4.212631578947368e-06,
"loss": 0.041,
"step": 12000
},
{
"epoch": 1.2,
"eval_loss": 0.2865241765975952,
"eval_runtime": 57.8167,
"eval_samples_per_second": 1.73,
"eval_steps_per_second": 0.121,
"eval_wer": 9.236031927023944,
"step": 12000
},
{
"epoch": 1.21,
"learning_rate": 4.0810526315789474e-06,
"loss": 0.0383,
"step": 12250
},
{
"epoch": 1.22,
"learning_rate": 3.9494736842105265e-06,
"loss": 0.0404,
"step": 12500
},
{
"epoch": 1.23,
"learning_rate": 3.817894736842106e-06,
"loss": 0.0374,
"step": 12750
},
{
"epoch": 1.25,
"learning_rate": 3.6863157894736847e-06,
"loss": 0.0353,
"step": 13000
},
{
"epoch": 1.25,
"eval_loss": 0.2827776074409485,
"eval_runtime": 49.2884,
"eval_samples_per_second": 2.029,
"eval_steps_per_second": 0.142,
"eval_wer": 6.499429874572406,
"step": 13000
},
{
"epoch": 1.26,
"learning_rate": 3.554736842105264e-06,
"loss": 0.0387,
"step": 13250
},
{
"epoch": 1.27,
"learning_rate": 3.423157894736842e-06,
"loss": 0.0353,
"step": 13500
},
{
"epoch": 1.28,
"learning_rate": 3.292105263157895e-06,
"loss": 0.0353,
"step": 13750
},
{
"epoch": 1.3,
"learning_rate": 3.160526315789474e-06,
"loss": 0.0348,
"step": 14000
},
{
"epoch": 1.3,
"eval_loss": 0.27084633708000183,
"eval_runtime": 56.2306,
"eval_samples_per_second": 1.778,
"eval_steps_per_second": 0.124,
"eval_wer": 7.525655644241732,
"step": 14000
},
{
"epoch": 1.31,
"learning_rate": 3.028947368421053e-06,
"loss": 0.0367,
"step": 14250
},
{
"epoch": 1.32,
"learning_rate": 2.897368421052632e-06,
"loss": 0.0369,
"step": 14500
},
{
"epoch": 1.33,
"learning_rate": 2.7657894736842104e-06,
"loss": 0.0354,
"step": 14750
},
{
"epoch": 1.35,
"learning_rate": 2.6342105263157895e-06,
"loss": 0.0349,
"step": 15000
},
{
"epoch": 1.35,
"eval_loss": 0.28421100974082947,
"eval_runtime": 69.8567,
"eval_samples_per_second": 1.432,
"eval_steps_per_second": 0.1,
"eval_wer": 23.033067274800455,
"step": 15000
},
{
"epoch": 1.36,
"learning_rate": 2.5026315789473686e-06,
"loss": 0.033,
"step": 15250
},
{
"epoch": 1.37,
"learning_rate": 2.3710526315789477e-06,
"loss": 0.0351,
"step": 15500
},
{
"epoch": 1.38,
"learning_rate": 2.24e-06,
"loss": 0.0355,
"step": 15750
},
{
"epoch": 1.4,
"learning_rate": 2.1084210526315792e-06,
"loss": 0.0361,
"step": 16000
},
{
"epoch": 1.4,
"eval_loss": 0.2769256830215454,
"eval_runtime": 54.7414,
"eval_samples_per_second": 1.827,
"eval_steps_per_second": 0.128,
"eval_wer": 10.148232611174459,
"step": 16000
},
{
"epoch": 2.01,
"learning_rate": 1.9773684210526317e-06,
"loss": 0.0338,
"step": 16250
},
{
"epoch": 2.02,
"learning_rate": 1.8457894736842108e-06,
"loss": 0.03,
"step": 16500
},
{
"epoch": 2.03,
"learning_rate": 1.7142105263157897e-06,
"loss": 0.0263,
"step": 16750
},
{
"epoch": 2.04,
"learning_rate": 1.5826315789473687e-06,
"loss": 0.0249,
"step": 17000
},
{
"epoch": 2.04,
"eval_loss": 0.2934916913509369,
"eval_runtime": 60.1234,
"eval_samples_per_second": 1.663,
"eval_steps_per_second": 0.116,
"eval_wer": 8.893956670467503,
"step": 17000
},
{
"epoch": 2.06,
"learning_rate": 1.4510526315789474e-06,
"loss": 0.0214,
"step": 17250
},
{
"epoch": 2.07,
"learning_rate": 1.3194736842105263e-06,
"loss": 0.0197,
"step": 17500
},
{
"epoch": 2.08,
"learning_rate": 1.1878947368421054e-06,
"loss": 0.0208,
"step": 17750
},
{
"epoch": 2.09,
"learning_rate": 1.0563157894736843e-06,
"loss": 0.0204,
"step": 18000
},
{
"epoch": 2.09,
"eval_loss": 0.28742682933807373,
"eval_runtime": 70.388,
"eval_samples_per_second": 1.421,
"eval_steps_per_second": 0.099,
"eval_wer": 12.428734321550742,
"step": 18000
},
{
"epoch": 2.11,
"learning_rate": 9.247368421052633e-07,
"loss": 0.0207,
"step": 18250
},
{
"epoch": 2.12,
"learning_rate": 7.931578947368422e-07,
"loss": 0.0188,
"step": 18500
},
{
"epoch": 2.13,
"learning_rate": 6.615789473684211e-07,
"loss": 0.0167,
"step": 18750
},
{
"epoch": 2.14,
"learning_rate": 5.3e-07,
"loss": 0.0175,
"step": 19000
},
{
"epoch": 2.14,
"eval_loss": 0.28821495175361633,
"eval_runtime": 69.297,
"eval_samples_per_second": 1.443,
"eval_steps_per_second": 0.101,
"eval_wer": 12.998859749144811,
"step": 19000
},
{
"epoch": 2.16,
"learning_rate": 3.98421052631579e-07,
"loss": 0.0184,
"step": 19250
},
{
"epoch": 2.17,
"learning_rate": 2.6684210526315793e-07,
"loss": 0.0186,
"step": 19500
},
{
"epoch": 2.18,
"learning_rate": 1.3526315789473686e-07,
"loss": 0.0179,
"step": 19750
},
{
"epoch": 2.19,
"learning_rate": 3.6842105263157898e-09,
"loss": 0.0197,
"step": 20000
},
{
"epoch": 2.19,
"eval_loss": 0.3006799817085266,
"eval_runtime": 45.4742,
"eval_samples_per_second": 2.199,
"eval_steps_per_second": 0.154,
"eval_wer": 9.122006841505131,
"step": 20000
},
{
"epoch": 2.19,
"step": 20000,
"total_flos": 2.077757165223936e+20,
"train_loss": 0.09227749185562134,
"train_runtime": 214472.7404,
"train_samples_per_second": 6.714,
"train_steps_per_second": 0.093
}
],
"max_steps": 20000,
"num_train_epochs": 9223372036854775807,
"total_flos": 2.077757165223936e+20,
"trial_name": null,
"trial_params": null
}