gary109's picture
End of training
cf0013b
{
"best_metric": 0.5760026574134827,
"best_model_checkpoint": "ai-light-dance_singing2_ft_wav2vec2-large-xlsr-53-v1/checkpoint-2128",
"epoch": 39.99888517279822,
"global_step": 4480,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.09,
"learning_rate": 4.000000000000001e-06,
"loss": 1.6731,
"step": 10
},
{
"epoch": 0.18,
"learning_rate": 8.000000000000001e-06,
"loss": 1.6923,
"step": 20
},
{
"epoch": 0.27,
"learning_rate": 1.2e-05,
"loss": 1.6707,
"step": 30
},
{
"epoch": 0.36,
"learning_rate": 1.6000000000000003e-05,
"loss": 1.6669,
"step": 40
},
{
"epoch": 0.45,
"learning_rate": 2e-05,
"loss": 1.6685,
"step": 50
},
{
"epoch": 0.54,
"learning_rate": 2.4e-05,
"loss": 1.669,
"step": 60
},
{
"epoch": 0.62,
"learning_rate": 2.8e-05,
"loss": 1.6255,
"step": 70
},
{
"epoch": 0.71,
"learning_rate": 3.2000000000000005e-05,
"loss": 1.6338,
"step": 80
},
{
"epoch": 0.8,
"learning_rate": 3.6e-05,
"loss": 1.6468,
"step": 90
},
{
"epoch": 0.89,
"learning_rate": 3.96e-05,
"loss": 1.653,
"step": 100
},
{
"epoch": 0.98,
"learning_rate": 3.9917808219178084e-05,
"loss": 1.656,
"step": 110
},
{
"epoch": 1.0,
"eval_loss": 1.762465476989746,
"eval_runtime": 227.2609,
"eval_samples_per_second": 8.774,
"eval_steps_per_second": 0.88,
"eval_wer": 0.9264513777075675,
"step": 112
},
{
"epoch": 1.07,
"learning_rate": 3.982648401826484e-05,
"loss": 1.6323,
"step": 120
},
{
"epoch": 1.16,
"learning_rate": 3.97351598173516e-05,
"loss": 1.5681,
"step": 130
},
{
"epoch": 1.25,
"learning_rate": 3.964383561643836e-05,
"loss": 1.5729,
"step": 140
},
{
"epoch": 1.34,
"learning_rate": 3.955251141552512e-05,
"loss": 1.4911,
"step": 150
},
{
"epoch": 1.43,
"learning_rate": 3.9461187214611876e-05,
"loss": 1.4653,
"step": 160
},
{
"epoch": 1.52,
"learning_rate": 3.936986301369863e-05,
"loss": 1.4765,
"step": 170
},
{
"epoch": 1.61,
"learning_rate": 3.927853881278539e-05,
"loss": 1.4356,
"step": 180
},
{
"epoch": 1.7,
"learning_rate": 3.9187214611872154e-05,
"loss": 1.4152,
"step": 190
},
{
"epoch": 1.78,
"learning_rate": 3.9095890410958904e-05,
"loss": 1.4193,
"step": 200
},
{
"epoch": 1.87,
"learning_rate": 3.900456621004567e-05,
"loss": 1.427,
"step": 210
},
{
"epoch": 1.96,
"learning_rate": 3.8913242009132425e-05,
"loss": 1.3693,
"step": 220
},
{
"epoch": 2.0,
"eval_loss": 1.51346755027771,
"eval_runtime": 229.1152,
"eval_samples_per_second": 8.703,
"eval_steps_per_second": 0.873,
"eval_wer": 0.9243248169638788,
"step": 224
},
{
"epoch": 2.05,
"learning_rate": 3.882191780821918e-05,
"loss": 1.4403,
"step": 230
},
{
"epoch": 2.14,
"learning_rate": 3.873059360730594e-05,
"loss": 1.3155,
"step": 240
},
{
"epoch": 2.23,
"learning_rate": 3.8639269406392696e-05,
"loss": 1.3129,
"step": 250
},
{
"epoch": 2.32,
"learning_rate": 3.854794520547945e-05,
"loss": 1.2924,
"step": 260
},
{
"epoch": 2.41,
"learning_rate": 3.845662100456622e-05,
"loss": 1.2861,
"step": 270
},
{
"epoch": 2.5,
"learning_rate": 3.836529680365297e-05,
"loss": 1.2856,
"step": 280
},
{
"epoch": 2.59,
"learning_rate": 3.827397260273973e-05,
"loss": 1.2469,
"step": 290
},
{
"epoch": 2.68,
"learning_rate": 3.818264840182649e-05,
"loss": 1.225,
"step": 300
},
{
"epoch": 2.77,
"learning_rate": 3.8091324200913245e-05,
"loss": 1.2471,
"step": 310
},
{
"epoch": 2.86,
"learning_rate": 3.8e-05,
"loss": 1.2351,
"step": 320
},
{
"epoch": 2.95,
"learning_rate": 3.790867579908676e-05,
"loss": 1.2172,
"step": 330
},
{
"epoch": 3.0,
"eval_loss": 1.2657241821289062,
"eval_runtime": 228.6552,
"eval_samples_per_second": 8.721,
"eval_steps_per_second": 0.875,
"eval_wer": 0.8533280675638728,
"step": 336
},
{
"epoch": 3.04,
"learning_rate": 3.7817351598173516e-05,
"loss": 1.2289,
"step": 340
},
{
"epoch": 3.12,
"learning_rate": 3.772602739726028e-05,
"loss": 1.1428,
"step": 350
},
{
"epoch": 3.21,
"learning_rate": 3.763470319634703e-05,
"loss": 1.1727,
"step": 360
},
{
"epoch": 3.3,
"learning_rate": 3.7543378995433794e-05,
"loss": 1.1311,
"step": 370
},
{
"epoch": 3.39,
"learning_rate": 3.745205479452055e-05,
"loss": 1.1217,
"step": 380
},
{
"epoch": 3.48,
"learning_rate": 3.736073059360731e-05,
"loss": 1.0888,
"step": 390
},
{
"epoch": 3.57,
"learning_rate": 3.7269406392694065e-05,
"loss": 1.1147,
"step": 400
},
{
"epoch": 3.66,
"learning_rate": 3.717808219178082e-05,
"loss": 1.0944,
"step": 410
},
{
"epoch": 3.75,
"learning_rate": 3.7086757990867586e-05,
"loss": 1.0851,
"step": 420
},
{
"epoch": 3.84,
"learning_rate": 3.699543378995434e-05,
"loss": 1.0709,
"step": 430
},
{
"epoch": 3.93,
"learning_rate": 3.69041095890411e-05,
"loss": 1.0456,
"step": 440
},
{
"epoch": 4.0,
"eval_loss": 1.0892952680587769,
"eval_runtime": 229.0174,
"eval_samples_per_second": 8.707,
"eval_steps_per_second": 0.873,
"eval_wer": 0.7690858826746059,
"step": 448
},
{
"epoch": 4.02,
"learning_rate": 3.681278538812786e-05,
"loss": 1.0713,
"step": 450
},
{
"epoch": 4.11,
"learning_rate": 3.6721461187214614e-05,
"loss": 1.0165,
"step": 460
},
{
"epoch": 4.2,
"learning_rate": 3.663013698630137e-05,
"loss": 1.0003,
"step": 470
},
{
"epoch": 4.29,
"learning_rate": 3.653881278538813e-05,
"loss": 1.0148,
"step": 480
},
{
"epoch": 4.37,
"learning_rate": 3.6447488584474885e-05,
"loss": 0.9741,
"step": 490
},
{
"epoch": 4.46,
"learning_rate": 3.635616438356165e-05,
"loss": 0.9607,
"step": 500
},
{
"epoch": 4.55,
"learning_rate": 3.6264840182648406e-05,
"loss": 0.9792,
"step": 510
},
{
"epoch": 4.64,
"learning_rate": 3.617351598173516e-05,
"loss": 0.9451,
"step": 520
},
{
"epoch": 4.73,
"learning_rate": 3.608219178082192e-05,
"loss": 0.916,
"step": 530
},
{
"epoch": 4.82,
"learning_rate": 3.599086757990868e-05,
"loss": 0.9305,
"step": 540
},
{
"epoch": 4.91,
"learning_rate": 3.589954337899544e-05,
"loss": 0.9255,
"step": 550
},
{
"epoch": 5.0,
"learning_rate": 3.580821917808219e-05,
"loss": 0.9385,
"step": 560
},
{
"epoch": 5.0,
"eval_loss": 1.0109813213348389,
"eval_runtime": 228.5891,
"eval_samples_per_second": 8.723,
"eval_steps_per_second": 0.875,
"eval_wer": 0.7097244584864963,
"step": 560
},
{
"epoch": 5.09,
"learning_rate": 3.5716894977168955e-05,
"loss": 0.9097,
"step": 570
},
{
"epoch": 5.18,
"learning_rate": 3.562557077625571e-05,
"loss": 0.8809,
"step": 580
},
{
"epoch": 5.27,
"learning_rate": 3.553424657534247e-05,
"loss": 0.886,
"step": 590
},
{
"epoch": 5.36,
"learning_rate": 3.5442922374429226e-05,
"loss": 0.8571,
"step": 600
},
{
"epoch": 5.45,
"learning_rate": 3.535159817351598e-05,
"loss": 0.8642,
"step": 610
},
{
"epoch": 5.54,
"learning_rate": 3.526027397260274e-05,
"loss": 0.875,
"step": 620
},
{
"epoch": 5.62,
"learning_rate": 3.5168949771689504e-05,
"loss": 0.8296,
"step": 630
},
{
"epoch": 5.71,
"learning_rate": 3.5077625570776254e-05,
"loss": 0.823,
"step": 640
},
{
"epoch": 5.8,
"learning_rate": 3.498630136986302e-05,
"loss": 0.8408,
"step": 650
},
{
"epoch": 5.89,
"learning_rate": 3.4894977168949775e-05,
"loss": 0.8288,
"step": 660
},
{
"epoch": 5.98,
"learning_rate": 3.480365296803653e-05,
"loss": 0.8165,
"step": 670
},
{
"epoch": 6.0,
"eval_loss": 0.9242711067199707,
"eval_runtime": 229.5196,
"eval_samples_per_second": 8.688,
"eval_steps_per_second": 0.871,
"eval_wer": 0.6682261445453717,
"step": 672
},
{
"epoch": 6.07,
"learning_rate": 3.471232876712329e-05,
"loss": 0.8096,
"step": 680
},
{
"epoch": 6.16,
"learning_rate": 3.4621004566210046e-05,
"loss": 0.7775,
"step": 690
},
{
"epoch": 6.25,
"learning_rate": 3.452968036529681e-05,
"loss": 0.7988,
"step": 700
},
{
"epoch": 6.34,
"learning_rate": 3.443835616438357e-05,
"loss": 0.7705,
"step": 710
},
{
"epoch": 6.43,
"learning_rate": 3.434703196347032e-05,
"loss": 0.7682,
"step": 720
},
{
"epoch": 6.52,
"learning_rate": 3.425570776255708e-05,
"loss": 0.7895,
"step": 730
},
{
"epoch": 6.61,
"learning_rate": 3.416438356164384e-05,
"loss": 0.7551,
"step": 740
},
{
"epoch": 6.7,
"learning_rate": 3.4073059360730595e-05,
"loss": 0.7705,
"step": 750
},
{
"epoch": 6.78,
"learning_rate": 3.398173515981735e-05,
"loss": 0.7731,
"step": 760
},
{
"epoch": 6.87,
"learning_rate": 3.389041095890411e-05,
"loss": 0.7423,
"step": 770
},
{
"epoch": 6.96,
"learning_rate": 3.379908675799087e-05,
"loss": 0.7491,
"step": 780
},
{
"epoch": 7.0,
"eval_loss": 0.8947959542274475,
"eval_runtime": 230.3886,
"eval_samples_per_second": 8.655,
"eval_steps_per_second": 0.868,
"eval_wer": 0.6582920679284261,
"step": 784
},
{
"epoch": 7.05,
"learning_rate": 3.370776255707763e-05,
"loss": 0.761,
"step": 790
},
{
"epoch": 7.14,
"learning_rate": 3.361643835616439e-05,
"loss": 0.7195,
"step": 800
},
{
"epoch": 7.23,
"learning_rate": 3.3525114155251144e-05,
"loss": 0.7192,
"step": 810
},
{
"epoch": 7.32,
"learning_rate": 3.34337899543379e-05,
"loss": 0.7107,
"step": 820
},
{
"epoch": 7.41,
"learning_rate": 3.3342465753424665e-05,
"loss": 0.697,
"step": 830
},
{
"epoch": 7.5,
"learning_rate": 3.3251141552511416e-05,
"loss": 0.7244,
"step": 840
},
{
"epoch": 7.59,
"learning_rate": 3.315981735159817e-05,
"loss": 0.6812,
"step": 850
},
{
"epoch": 7.68,
"learning_rate": 3.3068493150684936e-05,
"loss": 0.6818,
"step": 860
},
{
"epoch": 7.77,
"learning_rate": 3.2977168949771693e-05,
"loss": 0.7117,
"step": 870
},
{
"epoch": 7.86,
"learning_rate": 3.288584474885845e-05,
"loss": 0.701,
"step": 880
},
{
"epoch": 7.95,
"learning_rate": 3.279452054794521e-05,
"loss": 0.6772,
"step": 890
},
{
"epoch": 8.0,
"eval_loss": 0.7893749475479126,
"eval_runtime": 230.2029,
"eval_samples_per_second": 8.662,
"eval_steps_per_second": 0.869,
"eval_wer": 0.6006926512136586,
"step": 896
},
{
"epoch": 8.04,
"learning_rate": 3.2703196347031965e-05,
"loss": 0.7046,
"step": 900
},
{
"epoch": 8.12,
"learning_rate": 3.261187214611873e-05,
"loss": 0.6475,
"step": 910
},
{
"epoch": 8.21,
"learning_rate": 3.252054794520548e-05,
"loss": 0.6294,
"step": 920
},
{
"epoch": 8.3,
"learning_rate": 3.242922374429224e-05,
"loss": 0.6585,
"step": 930
},
{
"epoch": 8.39,
"learning_rate": 3.2337899543379e-05,
"loss": 0.6298,
"step": 940
},
{
"epoch": 8.48,
"learning_rate": 3.2246575342465757e-05,
"loss": 0.6553,
"step": 950
},
{
"epoch": 8.57,
"learning_rate": 3.2155251141552514e-05,
"loss": 0.6374,
"step": 960
},
{
"epoch": 8.66,
"learning_rate": 3.206392694063927e-05,
"loss": 0.6361,
"step": 970
},
{
"epoch": 8.75,
"learning_rate": 3.197260273972603e-05,
"loss": 0.6861,
"step": 980
},
{
"epoch": 8.84,
"learning_rate": 3.188127853881279e-05,
"loss": 0.6188,
"step": 990
},
{
"epoch": 8.93,
"learning_rate": 3.178995433789954e-05,
"loss": 0.6096,
"step": 1000
},
{
"epoch": 9.0,
"eval_loss": 0.7683572173118591,
"eval_runtime": 229.8867,
"eval_samples_per_second": 8.674,
"eval_steps_per_second": 0.87,
"eval_wer": 0.5663335054834888,
"step": 1008
},
{
"epoch": 9.02,
"learning_rate": 3.1698630136986306e-05,
"loss": 0.6474,
"step": 1010
},
{
"epoch": 9.11,
"learning_rate": 3.160730593607306e-05,
"loss": 0.5958,
"step": 1020
},
{
"epoch": 9.2,
"learning_rate": 3.151598173515982e-05,
"loss": 0.5893,
"step": 1030
},
{
"epoch": 9.29,
"learning_rate": 3.142465753424658e-05,
"loss": 0.6225,
"step": 1040
},
{
"epoch": 9.37,
"learning_rate": 3.1333333333333334e-05,
"loss": 0.5937,
"step": 1050
},
{
"epoch": 9.46,
"learning_rate": 3.12420091324201e-05,
"loss": 0.581,
"step": 1060
},
{
"epoch": 9.55,
"learning_rate": 3.1150684931506855e-05,
"loss": 0.6059,
"step": 1070
},
{
"epoch": 9.64,
"learning_rate": 3.105936073059361e-05,
"loss": 0.5728,
"step": 1080
},
{
"epoch": 9.73,
"learning_rate": 3.096803652968037e-05,
"loss": 0.5728,
"step": 1090
},
{
"epoch": 9.82,
"learning_rate": 3.0876712328767126e-05,
"loss": 0.6019,
"step": 1100
},
{
"epoch": 9.91,
"learning_rate": 3.078538812785388e-05,
"loss": 0.5575,
"step": 1110
},
{
"epoch": 10.0,
"learning_rate": 3.069406392694064e-05,
"loss": 0.5714,
"step": 1120
},
{
"epoch": 10.0,
"eval_loss": 0.6978363394737244,
"eval_runtime": 228.8208,
"eval_samples_per_second": 8.714,
"eval_steps_per_second": 0.874,
"eval_wer": 0.4826077710605462,
"step": 1120
},
{
"epoch": 10.09,
"learning_rate": 3.06027397260274e-05,
"loss": 0.5365,
"step": 1130
},
{
"epoch": 10.18,
"learning_rate": 3.0511415525114157e-05,
"loss": 0.5259,
"step": 1140
},
{
"epoch": 10.27,
"learning_rate": 3.0420091324200918e-05,
"loss": 0.5526,
"step": 1150
},
{
"epoch": 10.36,
"learning_rate": 3.032876712328767e-05,
"loss": 0.5558,
"step": 1160
},
{
"epoch": 10.45,
"learning_rate": 3.0237442922374432e-05,
"loss": 0.5243,
"step": 1170
},
{
"epoch": 10.54,
"learning_rate": 3.0146118721461192e-05,
"loss": 0.5452,
"step": 1180
},
{
"epoch": 10.62,
"learning_rate": 3.005479452054795e-05,
"loss": 0.5273,
"step": 1190
},
{
"epoch": 10.71,
"learning_rate": 2.9963470319634703e-05,
"loss": 0.5123,
"step": 1200
},
{
"epoch": 10.8,
"learning_rate": 2.9872146118721463e-05,
"loss": 0.5523,
"step": 1210
},
{
"epoch": 10.89,
"learning_rate": 2.9780821917808224e-05,
"loss": 0.5049,
"step": 1220
},
{
"epoch": 10.98,
"learning_rate": 2.968949771689498e-05,
"loss": 0.5213,
"step": 1230
},
{
"epoch": 11.0,
"eval_loss": 0.8432782292366028,
"eval_runtime": 229.9511,
"eval_samples_per_second": 8.671,
"eval_steps_per_second": 0.87,
"eval_wer": 0.4927241243126652,
"step": 1232
},
{
"epoch": 11.07,
"learning_rate": 2.9598173515981738e-05,
"loss": 0.5322,
"step": 1240
},
{
"epoch": 11.16,
"learning_rate": 2.9506849315068495e-05,
"loss": 0.4949,
"step": 1250
},
{
"epoch": 11.25,
"learning_rate": 2.9415525114155255e-05,
"loss": 0.5076,
"step": 1260
},
{
"epoch": 11.34,
"learning_rate": 2.9324200913242012e-05,
"loss": 0.5035,
"step": 1270
},
{
"epoch": 11.43,
"learning_rate": 2.923287671232877e-05,
"loss": 0.4936,
"step": 1280
},
{
"epoch": 11.52,
"learning_rate": 2.9141552511415526e-05,
"loss": 0.5135,
"step": 1290
},
{
"epoch": 11.61,
"learning_rate": 2.9050228310502287e-05,
"loss": 0.4765,
"step": 1300
},
{
"epoch": 11.7,
"learning_rate": 2.8958904109589044e-05,
"loss": 0.4743,
"step": 1310
},
{
"epoch": 11.78,
"learning_rate": 2.88675799086758e-05,
"loss": 0.5072,
"step": 1320
},
{
"epoch": 11.87,
"learning_rate": 2.8776255707762558e-05,
"loss": 0.4826,
"step": 1330
},
{
"epoch": 11.96,
"learning_rate": 2.868493150684932e-05,
"loss": 0.4624,
"step": 1340
},
{
"epoch": 12.0,
"eval_loss": 0.6695169806480408,
"eval_runtime": 229.3877,
"eval_samples_per_second": 8.693,
"eval_steps_per_second": 0.872,
"eval_wer": 0.4469119300057721,
"step": 1344
},
{
"epoch": 12.05,
"learning_rate": 2.859360730593608e-05,
"loss": 0.4908,
"step": 1350
},
{
"epoch": 12.14,
"learning_rate": 2.8502283105022832e-05,
"loss": 0.4444,
"step": 1360
},
{
"epoch": 12.23,
"learning_rate": 2.8410958904109593e-05,
"loss": 0.4469,
"step": 1370
},
{
"epoch": 12.32,
"learning_rate": 2.831963470319635e-05,
"loss": 0.4508,
"step": 1380
},
{
"epoch": 12.41,
"learning_rate": 2.822831050228311e-05,
"loss": 0.4419,
"step": 1390
},
{
"epoch": 12.5,
"learning_rate": 2.8136986301369864e-05,
"loss": 0.4662,
"step": 1400
},
{
"epoch": 12.59,
"learning_rate": 2.8045662100456624e-05,
"loss": 0.4345,
"step": 1410
},
{
"epoch": 12.68,
"learning_rate": 2.795433789954338e-05,
"loss": 0.4385,
"step": 1420
},
{
"epoch": 12.77,
"learning_rate": 2.7863013698630142e-05,
"loss": 0.4686,
"step": 1430
},
{
"epoch": 12.86,
"learning_rate": 2.7771689497716896e-05,
"loss": 0.4212,
"step": 1440
},
{
"epoch": 12.95,
"learning_rate": 2.7680365296803656e-05,
"loss": 0.4298,
"step": 1450
},
{
"epoch": 13.0,
"eval_loss": 0.6568945050239563,
"eval_runtime": 227.6777,
"eval_samples_per_second": 8.758,
"eval_steps_per_second": 0.878,
"eval_wer": 0.3867910198377738,
"step": 1456
},
{
"epoch": 13.04,
"learning_rate": 2.7589041095890413e-05,
"loss": 0.4542,
"step": 1460
},
{
"epoch": 13.12,
"learning_rate": 2.7497716894977173e-05,
"loss": 0.4113,
"step": 1470
},
{
"epoch": 13.21,
"learning_rate": 2.7406392694063927e-05,
"loss": 0.4044,
"step": 1480
},
{
"epoch": 13.3,
"learning_rate": 2.7315068493150688e-05,
"loss": 0.4223,
"step": 1490
},
{
"epoch": 13.39,
"learning_rate": 2.7223744292237445e-05,
"loss": 0.3966,
"step": 1500
},
{
"epoch": 13.48,
"learning_rate": 2.7132420091324205e-05,
"loss": 0.4006,
"step": 1510
},
{
"epoch": 13.57,
"learning_rate": 2.704109589041096e-05,
"loss": 0.4131,
"step": 1520
},
{
"epoch": 13.66,
"learning_rate": 2.694977168949772e-05,
"loss": 0.4038,
"step": 1530
},
{
"epoch": 13.75,
"learning_rate": 2.685844748858448e-05,
"loss": 0.4206,
"step": 1540
},
{
"epoch": 13.84,
"learning_rate": 2.6767123287671237e-05,
"loss": 0.4033,
"step": 1550
},
{
"epoch": 13.93,
"learning_rate": 2.6675799086757994e-05,
"loss": 0.3939,
"step": 1560
},
{
"epoch": 14.0,
"eval_loss": 0.6632619500160217,
"eval_runtime": 229.2783,
"eval_samples_per_second": 8.697,
"eval_steps_per_second": 0.872,
"eval_wer": 0.36935322173952667,
"step": 1568
},
{
"epoch": 14.02,
"learning_rate": 2.658447488584475e-05,
"loss": 0.4353,
"step": 1570
},
{
"epoch": 14.11,
"learning_rate": 2.649315068493151e-05,
"loss": 0.3605,
"step": 1580
},
{
"epoch": 14.2,
"learning_rate": 2.6401826484018268e-05,
"loss": 0.3648,
"step": 1590
},
{
"epoch": 14.29,
"learning_rate": 2.6310502283105025e-05,
"loss": 0.3958,
"step": 1600
},
{
"epoch": 14.37,
"learning_rate": 2.6219178082191782e-05,
"loss": 0.3665,
"step": 1610
},
{
"epoch": 14.46,
"learning_rate": 2.6127853881278543e-05,
"loss": 0.3608,
"step": 1620
},
{
"epoch": 14.55,
"learning_rate": 2.60365296803653e-05,
"loss": 0.3904,
"step": 1630
},
{
"epoch": 14.64,
"learning_rate": 2.5945205479452057e-05,
"loss": 0.3659,
"step": 1640
},
{
"epoch": 14.73,
"learning_rate": 2.5853881278538814e-05,
"loss": 0.3674,
"step": 1650
},
{
"epoch": 14.82,
"learning_rate": 2.5762557077625574e-05,
"loss": 0.4027,
"step": 1660
},
{
"epoch": 14.91,
"learning_rate": 2.5671232876712328e-05,
"loss": 0.3636,
"step": 1670
},
{
"epoch": 15.0,
"learning_rate": 2.5579908675799088e-05,
"loss": 0.3803,
"step": 1680
},
{
"epoch": 15.0,
"eval_loss": 0.6375618577003479,
"eval_runtime": 229.7813,
"eval_samples_per_second": 8.678,
"eval_steps_per_second": 0.87,
"eval_wer": 0.39198590394021326,
"step": 1680
},
{
"epoch": 15.09,
"learning_rate": 2.5488584474885845e-05,
"loss": 0.359,
"step": 1690
},
{
"epoch": 15.18,
"learning_rate": 2.5397260273972606e-05,
"loss": 0.3545,
"step": 1700
},
{
"epoch": 15.27,
"learning_rate": 2.530593607305936e-05,
"loss": 0.3631,
"step": 1710
},
{
"epoch": 15.36,
"learning_rate": 2.521461187214612e-05,
"loss": 0.3339,
"step": 1720
},
{
"epoch": 15.45,
"learning_rate": 2.512328767123288e-05,
"loss": 0.343,
"step": 1730
},
{
"epoch": 15.54,
"learning_rate": 2.5031963470319637e-05,
"loss": 0.3652,
"step": 1740
},
{
"epoch": 15.62,
"learning_rate": 2.494063926940639e-05,
"loss": 0.3397,
"step": 1750
},
{
"epoch": 15.71,
"learning_rate": 2.484931506849315e-05,
"loss": 0.3195,
"step": 1760
},
{
"epoch": 15.8,
"learning_rate": 2.4757990867579912e-05,
"loss": 0.3655,
"step": 1770
},
{
"epoch": 15.89,
"learning_rate": 2.466666666666667e-05,
"loss": 0.3402,
"step": 1780
},
{
"epoch": 15.98,
"learning_rate": 2.4575342465753426e-05,
"loss": 0.3415,
"step": 1790
},
{
"epoch": 16.0,
"eval_loss": 0.6463210582733154,
"eval_runtime": 229.9077,
"eval_samples_per_second": 8.673,
"eval_steps_per_second": 0.87,
"eval_wer": 0.3413737582404229,
"step": 1792
},
{
"epoch": 16.07,
"learning_rate": 2.4484018264840183e-05,
"loss": 0.3488,
"step": 1800
},
{
"epoch": 16.16,
"learning_rate": 2.4392694063926943e-05,
"loss": 0.3255,
"step": 1810
},
{
"epoch": 16.25,
"learning_rate": 2.43013698630137e-05,
"loss": 0.3384,
"step": 1820
},
{
"epoch": 16.34,
"learning_rate": 2.4210045662100457e-05,
"loss": 0.3056,
"step": 1830
},
{
"epoch": 16.43,
"learning_rate": 2.4118721461187214e-05,
"loss": 0.3073,
"step": 1840
},
{
"epoch": 16.52,
"learning_rate": 2.4027397260273975e-05,
"loss": 0.3429,
"step": 1850
},
{
"epoch": 16.61,
"learning_rate": 2.3936073059360735e-05,
"loss": 0.3106,
"step": 1860
},
{
"epoch": 16.7,
"learning_rate": 2.384474885844749e-05,
"loss": 0.3089,
"step": 1870
},
{
"epoch": 16.78,
"learning_rate": 2.3753424657534246e-05,
"loss": 0.3406,
"step": 1880
},
{
"epoch": 16.87,
"learning_rate": 2.3662100456621006e-05,
"loss": 0.3171,
"step": 1890
},
{
"epoch": 16.96,
"learning_rate": 2.3570776255707767e-05,
"loss": 0.3239,
"step": 1900
},
{
"epoch": 17.0,
"eval_loss": 0.5841062068939209,
"eval_runtime": 230.1162,
"eval_samples_per_second": 8.665,
"eval_steps_per_second": 0.869,
"eval_wer": 0.319713218093994,
"step": 1904
},
{
"epoch": 17.05,
"learning_rate": 2.347945205479452e-05,
"loss": 0.3472,
"step": 1910
},
{
"epoch": 17.14,
"learning_rate": 2.338812785388128e-05,
"loss": 0.2911,
"step": 1920
},
{
"epoch": 17.23,
"learning_rate": 2.3296803652968038e-05,
"loss": 0.3009,
"step": 1930
},
{
"epoch": 17.32,
"learning_rate": 2.32054794520548e-05,
"loss": 0.3148,
"step": 1940
},
{
"epoch": 17.41,
"learning_rate": 2.3114155251141552e-05,
"loss": 0.2912,
"step": 1950
},
{
"epoch": 17.5,
"learning_rate": 2.3022831050228312e-05,
"loss": 0.3179,
"step": 1960
},
{
"epoch": 17.59,
"learning_rate": 2.293150684931507e-05,
"loss": 0.2922,
"step": 1970
},
{
"epoch": 17.68,
"learning_rate": 2.284018264840183e-05,
"loss": 0.307,
"step": 1980
},
{
"epoch": 17.77,
"learning_rate": 2.2748858447488584e-05,
"loss": 0.3153,
"step": 1990
},
{
"epoch": 17.86,
"learning_rate": 2.2657534246575344e-05,
"loss": 0.3008,
"step": 2000
},
{
"epoch": 17.95,
"learning_rate": 2.25662100456621e-05,
"loss": 0.2946,
"step": 2010
},
{
"epoch": 18.0,
"eval_loss": 0.5947784781455994,
"eval_runtime": 229.4394,
"eval_samples_per_second": 8.691,
"eval_steps_per_second": 0.872,
"eval_wer": 0.3112373545584349,
"step": 2016
},
{
"epoch": 18.04,
"learning_rate": 2.247488584474886e-05,
"loss": 0.3084,
"step": 2020
},
{
"epoch": 18.12,
"learning_rate": 2.2383561643835615e-05,
"loss": 0.2772,
"step": 2030
},
{
"epoch": 18.21,
"learning_rate": 2.2292237442922376e-05,
"loss": 0.2733,
"step": 2040
},
{
"epoch": 18.3,
"learning_rate": 2.2200913242009136e-05,
"loss": 0.2988,
"step": 2050
},
{
"epoch": 18.39,
"learning_rate": 2.2109589041095893e-05,
"loss": 0.2659,
"step": 2060
},
{
"epoch": 18.48,
"learning_rate": 2.2018264840182647e-05,
"loss": 0.2705,
"step": 2070
},
{
"epoch": 18.57,
"learning_rate": 2.1926940639269407e-05,
"loss": 0.2902,
"step": 2080
},
{
"epoch": 18.66,
"learning_rate": 2.1835616438356168e-05,
"loss": 0.2777,
"step": 2090
},
{
"epoch": 18.75,
"learning_rate": 2.1744292237442925e-05,
"loss": 0.3032,
"step": 2100
},
{
"epoch": 18.84,
"learning_rate": 2.165296803652968e-05,
"loss": 0.2805,
"step": 2110
},
{
"epoch": 18.93,
"learning_rate": 2.156164383561644e-05,
"loss": 0.2751,
"step": 2120
},
{
"epoch": 19.0,
"eval_loss": 0.5760026574134827,
"eval_runtime": 229.0041,
"eval_samples_per_second": 8.707,
"eval_steps_per_second": 0.873,
"eval_wer": 0.2905489564662636,
"step": 2128
},
{
"epoch": 19.02,
"learning_rate": 2.14703196347032e-05,
"loss": 0.2978,
"step": 2130
},
{
"epoch": 19.11,
"learning_rate": 2.1378995433789956e-05,
"loss": 0.2586,
"step": 2140
},
{
"epoch": 19.2,
"learning_rate": 2.1287671232876713e-05,
"loss": 0.2573,
"step": 2150
},
{
"epoch": 19.29,
"learning_rate": 2.119634703196347e-05,
"loss": 0.279,
"step": 2160
},
{
"epoch": 19.37,
"learning_rate": 2.110502283105023e-05,
"loss": 0.2511,
"step": 2170
},
{
"epoch": 19.46,
"learning_rate": 2.101369863013699e-05,
"loss": 0.2543,
"step": 2180
},
{
"epoch": 19.55,
"learning_rate": 2.0922374429223745e-05,
"loss": 0.2808,
"step": 2190
},
{
"epoch": 19.64,
"learning_rate": 2.0831050228310502e-05,
"loss": 0.2555,
"step": 2200
},
{
"epoch": 19.73,
"learning_rate": 2.0739726027397262e-05,
"loss": 0.2528,
"step": 2210
},
{
"epoch": 19.82,
"learning_rate": 2.0648401826484023e-05,
"loss": 0.2741,
"step": 2220
},
{
"epoch": 19.91,
"learning_rate": 2.0557077625570776e-05,
"loss": 0.2473,
"step": 2230
},
{
"epoch": 20.0,
"learning_rate": 2.0465753424657537e-05,
"loss": 0.2834,
"step": 2240
},
{
"epoch": 20.0,
"eval_loss": 0.5883628726005554,
"eval_runtime": 229.7604,
"eval_samples_per_second": 8.679,
"eval_steps_per_second": 0.87,
"eval_wer": 0.29750584804204516,
"step": 2240
},
{
"epoch": 20.09,
"learning_rate": 2.0374429223744294e-05,
"loss": 0.2415,
"step": 2250
},
{
"epoch": 20.18,
"learning_rate": 2.0283105022831054e-05,
"loss": 0.2374,
"step": 2260
},
{
"epoch": 20.27,
"learning_rate": 2.0191780821917808e-05,
"loss": 0.2678,
"step": 2270
},
{
"epoch": 20.36,
"learning_rate": 2.0100456621004568e-05,
"loss": 0.237,
"step": 2280
},
{
"epoch": 20.45,
"learning_rate": 2.0009132420091325e-05,
"loss": 0.2392,
"step": 2290
},
{
"epoch": 20.54,
"learning_rate": 1.9917808219178082e-05,
"loss": 0.2623,
"step": 2300
},
{
"epoch": 20.62,
"learning_rate": 1.9826484018264843e-05,
"loss": 0.2415,
"step": 2310
},
{
"epoch": 20.71,
"learning_rate": 1.97351598173516e-05,
"loss": 0.2368,
"step": 2320
},
{
"epoch": 20.8,
"learning_rate": 1.9643835616438357e-05,
"loss": 0.2648,
"step": 2330
},
{
"epoch": 20.89,
"learning_rate": 1.9552511415525114e-05,
"loss": 0.2463,
"step": 2340
},
{
"epoch": 20.98,
"learning_rate": 1.9461187214611874e-05,
"loss": 0.2383,
"step": 2350
},
{
"epoch": 21.0,
"eval_loss": 0.5989164710044861,
"eval_runtime": 229.5452,
"eval_samples_per_second": 8.687,
"eval_steps_per_second": 0.871,
"eval_wer": 0.2775465564905672,
"step": 2352
},
{
"epoch": 21.07,
"learning_rate": 1.936986301369863e-05,
"loss": 0.2505,
"step": 2360
},
{
"epoch": 21.16,
"learning_rate": 1.9278538812785392e-05,
"loss": 0.2216,
"step": 2370
},
{
"epoch": 21.25,
"learning_rate": 1.9187214611872145e-05,
"loss": 0.2433,
"step": 2380
},
{
"epoch": 21.34,
"learning_rate": 1.9095890410958906e-05,
"loss": 0.2283,
"step": 2390
},
{
"epoch": 21.43,
"learning_rate": 1.9004566210045663e-05,
"loss": 0.2225,
"step": 2400
},
{
"epoch": 21.52,
"learning_rate": 1.8913242009132423e-05,
"loss": 0.2397,
"step": 2410
},
{
"epoch": 21.61,
"learning_rate": 1.882191780821918e-05,
"loss": 0.2153,
"step": 2420
},
{
"epoch": 21.7,
"learning_rate": 1.8730593607305937e-05,
"loss": 0.2309,
"step": 2430
},
{
"epoch": 21.78,
"learning_rate": 1.8639269406392694e-05,
"loss": 0.247,
"step": 2440
},
{
"epoch": 21.87,
"learning_rate": 1.8547945205479455e-05,
"loss": 0.2313,
"step": 2450
},
{
"epoch": 21.96,
"learning_rate": 1.8456621004566212e-05,
"loss": 0.2265,
"step": 2460
},
{
"epoch": 22.0,
"eval_loss": 0.6150508522987366,
"eval_runtime": 228.3778,
"eval_samples_per_second": 8.731,
"eval_steps_per_second": 0.876,
"eval_wer": 0.2853236929246286,
"step": 2464
},
{
"epoch": 22.05,
"learning_rate": 1.836529680365297e-05,
"loss": 0.242,
"step": 2470
},
{
"epoch": 22.14,
"learning_rate": 1.8273972602739726e-05,
"loss": 0.2084,
"step": 2480
},
{
"epoch": 22.23,
"learning_rate": 1.8182648401826486e-05,
"loss": 0.224,
"step": 2490
},
{
"epoch": 22.32,
"learning_rate": 1.8091324200913243e-05,
"loss": 0.23,
"step": 2500
},
{
"epoch": 22.41,
"learning_rate": 1.8e-05,
"loss": 0.212,
"step": 2510
},
{
"epoch": 22.5,
"learning_rate": 1.7908675799086757e-05,
"loss": 0.2389,
"step": 2520
},
{
"epoch": 22.59,
"learning_rate": 1.7817351598173518e-05,
"loss": 0.2076,
"step": 2530
},
{
"epoch": 22.68,
"learning_rate": 1.7726027397260275e-05,
"loss": 0.217,
"step": 2540
},
{
"epoch": 22.77,
"learning_rate": 1.7634703196347035e-05,
"loss": 0.2347,
"step": 2550
},
{
"epoch": 22.86,
"learning_rate": 1.7543378995433792e-05,
"loss": 0.2012,
"step": 2560
},
{
"epoch": 22.95,
"learning_rate": 1.745205479452055e-05,
"loss": 0.2158,
"step": 2570
},
{
"epoch": 23.0,
"eval_loss": 0.5842714905738831,
"eval_runtime": 223.4604,
"eval_samples_per_second": 8.923,
"eval_steps_per_second": 0.895,
"eval_wer": 0.267035270528906,
"step": 2576
},
{
"epoch": 23.04,
"learning_rate": 1.7360730593607307e-05,
"loss": 0.2268,
"step": 2580
},
{
"epoch": 23.12,
"learning_rate": 1.7269406392694067e-05,
"loss": 0.2014,
"step": 2590
},
{
"epoch": 23.21,
"learning_rate": 1.7178082191780824e-05,
"loss": 0.2006,
"step": 2600
},
{
"epoch": 23.3,
"learning_rate": 1.708675799086758e-05,
"loss": 0.2215,
"step": 2610
},
{
"epoch": 23.39,
"learning_rate": 1.6995433789954338e-05,
"loss": 0.208,
"step": 2620
},
{
"epoch": 23.48,
"learning_rate": 1.69041095890411e-05,
"loss": 0.1958,
"step": 2630
},
{
"epoch": 23.57,
"learning_rate": 1.6812785388127856e-05,
"loss": 0.218,
"step": 2640
},
{
"epoch": 23.66,
"learning_rate": 1.6721461187214613e-05,
"loss": 0.2024,
"step": 2650
},
{
"epoch": 23.75,
"learning_rate": 1.663013698630137e-05,
"loss": 0.2249,
"step": 2660
},
{
"epoch": 23.84,
"learning_rate": 1.653881278538813e-05,
"loss": 0.205,
"step": 2670
},
{
"epoch": 23.93,
"learning_rate": 1.6447488584474887e-05,
"loss": 0.2015,
"step": 2680
},
{
"epoch": 24.0,
"eval_loss": 0.6621034741401672,
"eval_runtime": 224.0191,
"eval_samples_per_second": 8.901,
"eval_steps_per_second": 0.893,
"eval_wer": 0.27380988546951424,
"step": 2688
},
{
"epoch": 24.02,
"learning_rate": 1.6356164383561644e-05,
"loss": 0.2197,
"step": 2690
},
{
"epoch": 24.11,
"learning_rate": 1.62648401826484e-05,
"loss": 0.1858,
"step": 2700
},
{
"epoch": 24.2,
"learning_rate": 1.617351598173516e-05,
"loss": 0.1936,
"step": 2710
},
{
"epoch": 24.29,
"learning_rate": 1.608219178082192e-05,
"loss": 0.2104,
"step": 2720
},
{
"epoch": 24.37,
"learning_rate": 1.599086757990868e-05,
"loss": 0.186,
"step": 2730
},
{
"epoch": 24.46,
"learning_rate": 1.5899543378995436e-05,
"loss": 0.1929,
"step": 2740
},
{
"epoch": 24.55,
"learning_rate": 1.5808219178082193e-05,
"loss": 0.2061,
"step": 2750
},
{
"epoch": 24.64,
"learning_rate": 1.571689497716895e-05,
"loss": 0.187,
"step": 2760
},
{
"epoch": 24.73,
"learning_rate": 1.562557077625571e-05,
"loss": 0.2,
"step": 2770
},
{
"epoch": 24.82,
"learning_rate": 1.5534246575342468e-05,
"loss": 0.2065,
"step": 2780
},
{
"epoch": 24.91,
"learning_rate": 1.5442922374429225e-05,
"loss": 0.198,
"step": 2790
},
{
"epoch": 25.0,
"learning_rate": 1.5351598173515982e-05,
"loss": 0.215,
"step": 2800
},
{
"epoch": 25.0,
"eval_loss": 0.6068045496940613,
"eval_runtime": 223.6951,
"eval_samples_per_second": 8.914,
"eval_steps_per_second": 0.894,
"eval_wer": 0.2651821247379773,
"step": 2800
},
{
"epoch": 25.09,
"learning_rate": 1.5260273972602742e-05,
"loss": 0.1834,
"step": 2810
},
{
"epoch": 25.18,
"learning_rate": 1.51689497716895e-05,
"loss": 0.1777,
"step": 2820
},
{
"epoch": 25.27,
"learning_rate": 1.5077625570776258e-05,
"loss": 0.1974,
"step": 2830
},
{
"epoch": 25.36,
"learning_rate": 1.4986301369863015e-05,
"loss": 0.1763,
"step": 2840
},
{
"epoch": 25.45,
"learning_rate": 1.4894977168949774e-05,
"loss": 0.1824,
"step": 2850
},
{
"epoch": 25.54,
"learning_rate": 1.480365296803653e-05,
"loss": 0.1997,
"step": 2860
},
{
"epoch": 25.62,
"learning_rate": 1.471232876712329e-05,
"loss": 0.1762,
"step": 2870
},
{
"epoch": 25.71,
"learning_rate": 1.4621004566210046e-05,
"loss": 0.1784,
"step": 2880
},
{
"epoch": 25.8,
"learning_rate": 1.4529680365296805e-05,
"loss": 0.2031,
"step": 2890
},
{
"epoch": 25.89,
"learning_rate": 1.4438356164383562e-05,
"loss": 0.1765,
"step": 2900
},
{
"epoch": 25.98,
"learning_rate": 1.4347031963470321e-05,
"loss": 0.1859,
"step": 2910
},
{
"epoch": 26.0,
"eval_loss": 0.6136422157287598,
"eval_runtime": 229.3437,
"eval_samples_per_second": 8.694,
"eval_steps_per_second": 0.872,
"eval_wer": 0.25701005559437373,
"step": 2912
},
{
"epoch": 26.07,
"learning_rate": 1.4255707762557078e-05,
"loss": 0.1896,
"step": 2920
},
{
"epoch": 26.16,
"learning_rate": 1.4164383561643837e-05,
"loss": 0.1752,
"step": 2930
},
{
"epoch": 26.25,
"learning_rate": 1.4073059360730594e-05,
"loss": 0.1969,
"step": 2940
},
{
"epoch": 26.34,
"learning_rate": 1.3981735159817354e-05,
"loss": 0.1688,
"step": 2950
},
{
"epoch": 26.43,
"learning_rate": 1.389041095890411e-05,
"loss": 0.1741,
"step": 2960
},
{
"epoch": 26.52,
"learning_rate": 1.379908675799087e-05,
"loss": 0.1968,
"step": 2970
},
{
"epoch": 26.61,
"learning_rate": 1.3707762557077627e-05,
"loss": 0.171,
"step": 2980
},
{
"epoch": 26.7,
"learning_rate": 1.3616438356164386e-05,
"loss": 0.1692,
"step": 2990
},
{
"epoch": 26.78,
"learning_rate": 1.3525114155251143e-05,
"loss": 0.1875,
"step": 3000
},
{
"epoch": 26.87,
"learning_rate": 1.3433789954337902e-05,
"loss": 0.1659,
"step": 3010
},
{
"epoch": 26.96,
"learning_rate": 1.3342465753424659e-05,
"loss": 0.1745,
"step": 3020
},
{
"epoch": 27.0,
"eval_loss": 0.6190728545188904,
"eval_runtime": 232.5977,
"eval_samples_per_second": 8.573,
"eval_steps_per_second": 0.86,
"eval_wer": 0.2624479752103776,
"step": 3024
},
{
"epoch": 27.05,
"learning_rate": 1.3251141552511417e-05,
"loss": 0.1997,
"step": 3030
},
{
"epoch": 27.14,
"learning_rate": 1.3159817351598174e-05,
"loss": 0.1618,
"step": 3040
},
{
"epoch": 27.23,
"learning_rate": 1.3068493150684933e-05,
"loss": 0.1668,
"step": 3050
},
{
"epoch": 27.32,
"learning_rate": 1.297716894977169e-05,
"loss": 0.1811,
"step": 3060
},
{
"epoch": 27.41,
"learning_rate": 1.2885844748858449e-05,
"loss": 0.1605,
"step": 3070
},
{
"epoch": 27.5,
"learning_rate": 1.2794520547945206e-05,
"loss": 0.192,
"step": 3080
},
{
"epoch": 27.59,
"learning_rate": 1.2703196347031965e-05,
"loss": 0.1587,
"step": 3090
},
{
"epoch": 27.68,
"learning_rate": 1.2611872146118722e-05,
"loss": 0.1624,
"step": 3100
},
{
"epoch": 27.77,
"learning_rate": 1.252054794520548e-05,
"loss": 0.1811,
"step": 3110
},
{
"epoch": 27.86,
"learning_rate": 1.2429223744292237e-05,
"loss": 0.1658,
"step": 3120
},
{
"epoch": 27.95,
"learning_rate": 1.2337899543378998e-05,
"loss": 0.1611,
"step": 3130
},
{
"epoch": 28.0,
"eval_loss": 0.6364021897315979,
"eval_runtime": 229.7698,
"eval_samples_per_second": 8.678,
"eval_steps_per_second": 0.87,
"eval_wer": 0.2577999210134581,
"step": 3136
},
{
"epoch": 28.04,
"learning_rate": 1.2246575342465753e-05,
"loss": 0.1764,
"step": 3140
},
{
"epoch": 28.12,
"learning_rate": 1.2155251141552514e-05,
"loss": 0.1501,
"step": 3150
},
{
"epoch": 28.21,
"learning_rate": 1.206392694063927e-05,
"loss": 0.1543,
"step": 3160
},
{
"epoch": 28.3,
"learning_rate": 1.197260273972603e-05,
"loss": 0.179,
"step": 3170
},
{
"epoch": 28.39,
"learning_rate": 1.1881278538812786e-05,
"loss": 0.1521,
"step": 3180
},
{
"epoch": 28.48,
"learning_rate": 1.1789954337899545e-05,
"loss": 0.1686,
"step": 3190
},
{
"epoch": 28.57,
"learning_rate": 1.1698630136986302e-05,
"loss": 0.1666,
"step": 3200
},
{
"epoch": 28.66,
"learning_rate": 1.1607305936073061e-05,
"loss": 0.1565,
"step": 3210
},
{
"epoch": 28.75,
"learning_rate": 1.1525114155251143e-05,
"loss": 0.1816,
"step": 3220
},
{
"epoch": 28.84,
"learning_rate": 1.1433789954337899e-05,
"loss": 0.1635,
"step": 3230
},
{
"epoch": 28.93,
"learning_rate": 1.1342465753424659e-05,
"loss": 0.1513,
"step": 3240
},
{
"epoch": 29.0,
"eval_loss": 0.6402228474617004,
"eval_runtime": 228.6093,
"eval_samples_per_second": 8.722,
"eval_steps_per_second": 0.875,
"eval_wer": 0.2535467995260807,
"step": 3248
},
{
"epoch": 29.02,
"learning_rate": 1.1251141552511416e-05,
"loss": 0.1754,
"step": 3250
},
{
"epoch": 29.11,
"learning_rate": 1.1159817351598175e-05,
"loss": 0.1455,
"step": 3260
},
{
"epoch": 29.2,
"learning_rate": 1.1068493150684932e-05,
"loss": 0.1475,
"step": 3270
},
{
"epoch": 29.29,
"learning_rate": 1.097716894977169e-05,
"loss": 0.1684,
"step": 3280
},
{
"epoch": 29.37,
"learning_rate": 1.0885844748858448e-05,
"loss": 0.154,
"step": 3290
},
{
"epoch": 29.46,
"learning_rate": 1.0794520547945206e-05,
"loss": 0.1479,
"step": 3300
},
{
"epoch": 29.55,
"learning_rate": 1.0703196347031963e-05,
"loss": 0.1732,
"step": 3310
},
{
"epoch": 29.64,
"learning_rate": 1.0611872146118722e-05,
"loss": 0.1542,
"step": 3320
},
{
"epoch": 29.73,
"learning_rate": 1.052054794520548e-05,
"loss": 0.1619,
"step": 3330
},
{
"epoch": 29.82,
"learning_rate": 1.0429223744292238e-05,
"loss": 0.1631,
"step": 3340
},
{
"epoch": 29.91,
"learning_rate": 1.0337899543378995e-05,
"loss": 0.1495,
"step": 3350
},
{
"epoch": 30.0,
"learning_rate": 1.0246575342465754e-05,
"loss": 0.172,
"step": 3360
},
{
"epoch": 30.0,
"eval_loss": 0.6329947113990784,
"eval_runtime": 230.6517,
"eval_samples_per_second": 8.645,
"eval_steps_per_second": 0.867,
"eval_wer": 0.24996202570100556,
"step": 3360
},
{
"epoch": 30.09,
"learning_rate": 1.015525114155251e-05,
"loss": 0.1479,
"step": 3370
},
{
"epoch": 30.18,
"learning_rate": 1.006392694063927e-05,
"loss": 0.1449,
"step": 3380
},
{
"epoch": 30.27,
"learning_rate": 9.972602739726028e-06,
"loss": 0.1605,
"step": 3390
},
{
"epoch": 30.36,
"learning_rate": 9.881278538812787e-06,
"loss": 0.1411,
"step": 3400
},
{
"epoch": 30.45,
"learning_rate": 9.789954337899544e-06,
"loss": 0.1386,
"step": 3410
},
{
"epoch": 30.54,
"learning_rate": 9.698630136986303e-06,
"loss": 0.1654,
"step": 3420
},
{
"epoch": 30.62,
"learning_rate": 9.60730593607306e-06,
"loss": 0.147,
"step": 3430
},
{
"epoch": 30.71,
"learning_rate": 9.515981735159819e-06,
"loss": 0.1467,
"step": 3440
},
{
"epoch": 30.8,
"learning_rate": 9.424657534246576e-06,
"loss": 0.1677,
"step": 3450
},
{
"epoch": 30.89,
"learning_rate": 9.333333333333334e-06,
"loss": 0.1423,
"step": 3460
},
{
"epoch": 30.98,
"learning_rate": 9.242009132420093e-06,
"loss": 0.1488,
"step": 3470
},
{
"epoch": 31.0,
"eval_loss": 0.6274679899215698,
"eval_runtime": 230.128,
"eval_samples_per_second": 8.665,
"eval_steps_per_second": 0.869,
"eval_wer": 0.25211896588388977,
"step": 3472
},
{
"epoch": 31.07,
"learning_rate": 9.15068493150685e-06,
"loss": 0.1546,
"step": 3480
},
{
"epoch": 31.16,
"learning_rate": 9.059360730593609e-06,
"loss": 0.1408,
"step": 3490
},
{
"epoch": 31.25,
"learning_rate": 8.968036529680366e-06,
"loss": 0.1613,
"step": 3500
},
{
"epoch": 31.34,
"learning_rate": 8.876712328767125e-06,
"loss": 0.1359,
"step": 3510
},
{
"epoch": 31.43,
"learning_rate": 8.785388127853882e-06,
"loss": 0.1391,
"step": 3520
},
{
"epoch": 31.52,
"learning_rate": 8.69406392694064e-06,
"loss": 0.1547,
"step": 3530
},
{
"epoch": 31.61,
"learning_rate": 8.602739726027397e-06,
"loss": 0.1424,
"step": 3540
},
{
"epoch": 31.7,
"learning_rate": 8.511415525114156e-06,
"loss": 0.1348,
"step": 3550
},
{
"epoch": 31.78,
"learning_rate": 8.420091324200915e-06,
"loss": 0.164,
"step": 3560
},
{
"epoch": 31.87,
"learning_rate": 8.328767123287672e-06,
"loss": 0.1371,
"step": 3570
},
{
"epoch": 31.96,
"learning_rate": 8.23744292237443e-06,
"loss": 0.1371,
"step": 3580
},
{
"epoch": 32.0,
"eval_loss": 0.6538846492767334,
"eval_runtime": 231.0287,
"eval_samples_per_second": 8.631,
"eval_steps_per_second": 0.866,
"eval_wer": 0.2539721116748185,
"step": 3584
},
{
"epoch": 32.05,
"learning_rate": 8.146118721461188e-06,
"loss": 0.1605,
"step": 3590
},
{
"epoch": 32.14,
"learning_rate": 8.054794520547946e-06,
"loss": 0.1316,
"step": 3600
},
{
"epoch": 32.23,
"learning_rate": 7.963470319634703e-06,
"loss": 0.1312,
"step": 3610
},
{
"epoch": 32.32,
"learning_rate": 7.872146118721462e-06,
"loss": 0.1507,
"step": 3620
},
{
"epoch": 32.41,
"learning_rate": 7.78082191780822e-06,
"loss": 0.1275,
"step": 3630
},
{
"epoch": 32.5,
"learning_rate": 7.689497716894978e-06,
"loss": 0.1507,
"step": 3640
},
{
"epoch": 32.59,
"learning_rate": 7.598173515981736e-06,
"loss": 0.1341,
"step": 3650
},
{
"epoch": 32.68,
"learning_rate": 7.506849315068494e-06,
"loss": 0.1369,
"step": 3660
},
{
"epoch": 32.77,
"learning_rate": 7.415525114155252e-06,
"loss": 0.1642,
"step": 3670
},
{
"epoch": 32.86,
"learning_rate": 7.32420091324201e-06,
"loss": 0.1349,
"step": 3680
},
{
"epoch": 32.95,
"learning_rate": 7.232876712328768e-06,
"loss": 0.1356,
"step": 3690
},
{
"epoch": 33.0,
"eval_loss": 0.654354989528656,
"eval_runtime": 229.8838,
"eval_samples_per_second": 8.674,
"eval_steps_per_second": 0.87,
"eval_wer": 0.24905064252513898,
"step": 3696
},
{
"epoch": 33.04,
"learning_rate": 7.141552511415526e-06,
"loss": 0.1524,
"step": 3700
},
{
"epoch": 33.12,
"learning_rate": 7.050228310502284e-06,
"loss": 0.1293,
"step": 3710
},
{
"epoch": 33.21,
"learning_rate": 6.958904109589042e-06,
"loss": 0.1315,
"step": 3720
},
{
"epoch": 33.3,
"learning_rate": 6.8675799086758e-06,
"loss": 0.1529,
"step": 3730
},
{
"epoch": 33.39,
"learning_rate": 6.776255707762558e-06,
"loss": 0.1299,
"step": 3740
},
{
"epoch": 33.48,
"learning_rate": 6.684931506849316e-06,
"loss": 0.1314,
"step": 3750
},
{
"epoch": 33.57,
"learning_rate": 6.593607305936074e-06,
"loss": 0.1362,
"step": 3760
},
{
"epoch": 33.66,
"learning_rate": 6.502283105022832e-06,
"loss": 0.133,
"step": 3770
},
{
"epoch": 33.75,
"learning_rate": 6.41095890410959e-06,
"loss": 0.1487,
"step": 3780
},
{
"epoch": 33.84,
"learning_rate": 6.319634703196348e-06,
"loss": 0.1283,
"step": 3790
},
{
"epoch": 33.93,
"learning_rate": 6.228310502283106e-06,
"loss": 0.1319,
"step": 3800
},
{
"epoch": 34.0,
"eval_loss": 0.6544836163520813,
"eval_runtime": 230.1134,
"eval_samples_per_second": 8.665,
"eval_steps_per_second": 0.869,
"eval_wer": 0.24908102196433454,
"step": 3808
},
{
"epoch": 34.02,
"learning_rate": 6.136986301369864e-06,
"loss": 0.1506,
"step": 3810
},
{
"epoch": 34.11,
"learning_rate": 6.045662100456622e-06,
"loss": 0.1261,
"step": 3820
},
{
"epoch": 34.2,
"learning_rate": 5.9543378995433795e-06,
"loss": 0.1238,
"step": 3830
},
{
"epoch": 34.29,
"learning_rate": 5.863013698630137e-06,
"loss": 0.1364,
"step": 3840
},
{
"epoch": 34.37,
"learning_rate": 5.771689497716896e-06,
"loss": 0.1301,
"step": 3850
},
{
"epoch": 34.46,
"learning_rate": 5.680365296803654e-06,
"loss": 0.1263,
"step": 3860
},
{
"epoch": 34.55,
"learning_rate": 5.589041095890412e-06,
"loss": 0.1435,
"step": 3870
},
{
"epoch": 34.64,
"learning_rate": 5.49771689497717e-06,
"loss": 0.1273,
"step": 3880
},
{
"epoch": 34.73,
"learning_rate": 5.406392694063928e-06,
"loss": 0.1332,
"step": 3890
},
{
"epoch": 34.82,
"learning_rate": 5.3150684931506856e-06,
"loss": 0.1392,
"step": 3900
},
{
"epoch": 34.91,
"learning_rate": 5.2237442922374435e-06,
"loss": 0.1275,
"step": 3910
},
{
"epoch": 35.0,
"learning_rate": 5.132420091324201e-06,
"loss": 0.1465,
"step": 3920
},
{
"epoch": 35.0,
"eval_loss": 0.6573231220245361,
"eval_runtime": 229.9173,
"eval_samples_per_second": 8.673,
"eval_steps_per_second": 0.87,
"eval_wer": 0.24950633411307227,
"step": 3920
},
{
"epoch": 35.09,
"learning_rate": 5.04109589041096e-06,
"loss": 0.126,
"step": 3930
},
{
"epoch": 35.18,
"learning_rate": 4.949771689497717e-06,
"loss": 0.1195,
"step": 3940
},
{
"epoch": 35.27,
"learning_rate": 4.858447488584475e-06,
"loss": 0.1362,
"step": 3950
},
{
"epoch": 35.36,
"learning_rate": 4.767123287671233e-06,
"loss": 0.1187,
"step": 3960
},
{
"epoch": 35.45,
"learning_rate": 4.675799086757991e-06,
"loss": 0.1269,
"step": 3970
},
{
"epoch": 35.54,
"learning_rate": 4.5844748858447495e-06,
"loss": 0.1364,
"step": 3980
},
{
"epoch": 35.62,
"learning_rate": 4.493150684931507e-06,
"loss": 0.1288,
"step": 3990
},
{
"epoch": 35.71,
"learning_rate": 4.401826484018265e-06,
"loss": 0.1255,
"step": 4000
},
{
"epoch": 35.8,
"learning_rate": 4.310502283105023e-06,
"loss": 0.1393,
"step": 4010
},
{
"epoch": 35.89,
"learning_rate": 4.219178082191781e-06,
"loss": 0.1183,
"step": 4020
},
{
"epoch": 35.98,
"learning_rate": 4.127853881278539e-06,
"loss": 0.13,
"step": 4030
},
{
"epoch": 36.0,
"eval_loss": 0.6593734622001648,
"eval_runtime": 230.854,
"eval_samples_per_second": 8.637,
"eval_steps_per_second": 0.866,
"eval_wer": 0.24935443691709452,
"step": 4032
},
{
"epoch": 36.07,
"learning_rate": 4.036529680365297e-06,
"loss": 0.1288,
"step": 4040
},
{
"epoch": 36.16,
"learning_rate": 3.945205479452055e-06,
"loss": 0.1167,
"step": 4050
},
{
"epoch": 36.25,
"learning_rate": 3.853881278538813e-06,
"loss": 0.142,
"step": 4060
},
{
"epoch": 36.34,
"learning_rate": 3.762557077625571e-06,
"loss": 0.1217,
"step": 4070
},
{
"epoch": 36.43,
"learning_rate": 3.671232876712329e-06,
"loss": 0.1249,
"step": 4080
},
{
"epoch": 36.52,
"learning_rate": 3.579908675799087e-06,
"loss": 0.1303,
"step": 4090
},
{
"epoch": 36.61,
"learning_rate": 3.488584474885845e-06,
"loss": 0.119,
"step": 4100
},
{
"epoch": 36.7,
"learning_rate": 3.397260273972603e-06,
"loss": 0.1217,
"step": 4110
},
{
"epoch": 36.78,
"learning_rate": 3.3059360730593608e-06,
"loss": 0.1417,
"step": 4120
},
{
"epoch": 36.87,
"learning_rate": 3.214611872146119e-06,
"loss": 0.1129,
"step": 4130
},
{
"epoch": 36.96,
"learning_rate": 3.123287671232877e-06,
"loss": 0.1244,
"step": 4140
},
{
"epoch": 37.0,
"eval_loss": 0.6650938987731934,
"eval_runtime": 228.9827,
"eval_samples_per_second": 8.708,
"eval_steps_per_second": 0.873,
"eval_wer": 0.24756205000455692,
"step": 4144
},
{
"epoch": 37.05,
"learning_rate": 3.031963470319635e-06,
"loss": 0.1373,
"step": 4150
},
{
"epoch": 37.14,
"learning_rate": 2.9406392694063927e-06,
"loss": 0.1155,
"step": 4160
},
{
"epoch": 37.23,
"learning_rate": 2.849315068493151e-06,
"loss": 0.1261,
"step": 4170
},
{
"epoch": 37.32,
"learning_rate": 2.757990867579909e-06,
"loss": 0.1288,
"step": 4180
},
{
"epoch": 37.41,
"learning_rate": 2.666666666666667e-06,
"loss": 0.1194,
"step": 4190
},
{
"epoch": 37.5,
"learning_rate": 2.5753424657534247e-06,
"loss": 0.1408,
"step": 4200
},
{
"epoch": 37.59,
"learning_rate": 2.484018264840183e-06,
"loss": 0.1131,
"step": 4210
},
{
"epoch": 37.68,
"learning_rate": 2.392694063926941e-06,
"loss": 0.1151,
"step": 4220
},
{
"epoch": 37.77,
"learning_rate": 2.301369863013699e-06,
"loss": 0.1381,
"step": 4230
},
{
"epoch": 37.86,
"learning_rate": 2.2100456621004567e-06,
"loss": 0.1177,
"step": 4240
},
{
"epoch": 37.95,
"learning_rate": 2.1187214611872146e-06,
"loss": 0.1228,
"step": 4250
},
{
"epoch": 38.0,
"eval_loss": 0.67536461353302,
"eval_runtime": 229.8367,
"eval_samples_per_second": 8.676,
"eval_steps_per_second": 0.87,
"eval_wer": 0.24965823130905004,
"step": 4256
},
{
"epoch": 38.04,
"learning_rate": 2.027397260273973e-06,
"loss": 0.1357,
"step": 4260
},
{
"epoch": 38.12,
"learning_rate": 1.9360730593607308e-06,
"loss": 0.1169,
"step": 4270
},
{
"epoch": 38.21,
"learning_rate": 1.8447488584474887e-06,
"loss": 0.1139,
"step": 4280
},
{
"epoch": 38.3,
"learning_rate": 1.7534246575342468e-06,
"loss": 0.1275,
"step": 4290
},
{
"epoch": 38.39,
"learning_rate": 1.6621004566210046e-06,
"loss": 0.11,
"step": 4300
},
{
"epoch": 38.48,
"learning_rate": 1.5707762557077627e-06,
"loss": 0.1182,
"step": 4310
},
{
"epoch": 38.57,
"learning_rate": 1.4794520547945206e-06,
"loss": 0.1286,
"step": 4320
},
{
"epoch": 38.66,
"learning_rate": 1.3881278538812787e-06,
"loss": 0.1161,
"step": 4330
},
{
"epoch": 38.75,
"learning_rate": 1.2968036529680366e-06,
"loss": 0.1348,
"step": 4340
},
{
"epoch": 38.84,
"learning_rate": 1.2054794520547947e-06,
"loss": 0.1163,
"step": 4350
},
{
"epoch": 38.93,
"learning_rate": 1.1141552511415526e-06,
"loss": 0.1181,
"step": 4360
},
{
"epoch": 39.0,
"eval_loss": 0.6683635115623474,
"eval_runtime": 230.5717,
"eval_samples_per_second": 8.648,
"eval_steps_per_second": 0.867,
"eval_wer": 0.2468025640246681,
"step": 4368
},
{
"epoch": 39.02,
"learning_rate": 1.0228310502283107e-06,
"loss": 0.1305,
"step": 4370
},
{
"epoch": 39.11,
"learning_rate": 9.315068493150686e-07,
"loss": 0.1119,
"step": 4380
},
{
"epoch": 39.2,
"learning_rate": 8.401826484018266e-07,
"loss": 0.1073,
"step": 4390
},
{
"epoch": 39.29,
"learning_rate": 7.488584474885845e-07,
"loss": 0.132,
"step": 4400
},
{
"epoch": 39.37,
"learning_rate": 6.575342465753425e-07,
"loss": 0.1151,
"step": 4410
},
{
"epoch": 39.46,
"learning_rate": 5.662100456621006e-07,
"loss": 0.112,
"step": 4420
},
{
"epoch": 39.55,
"learning_rate": 4.748858447488585e-07,
"loss": 0.1277,
"step": 4430
},
{
"epoch": 39.64,
"learning_rate": 3.835616438356165e-07,
"loss": 0.1098,
"step": 4440
},
{
"epoch": 39.73,
"learning_rate": 2.922374429223744e-07,
"loss": 0.1194,
"step": 4450
},
{
"epoch": 39.82,
"learning_rate": 2.0091324200913244e-07,
"loss": 0.1326,
"step": 4460
},
{
"epoch": 39.91,
"learning_rate": 1.0958904109589042e-07,
"loss": 0.1124,
"step": 4470
},
{
"epoch": 40.0,
"learning_rate": 1.82648401826484e-08,
"loss": 0.1338,
"step": 4480
},
{
"epoch": 40.0,
"eval_loss": 0.6712809205055237,
"eval_runtime": 230.7582,
"eval_samples_per_second": 8.641,
"eval_steps_per_second": 0.867,
"eval_wer": 0.2471367378558192,
"step": 4480
},
{
"epoch": 40.0,
"step": 4480,
"total_flos": 2.1744313988908705e+20,
"train_loss": 0.42816097148294957,
"train_runtime": 135691.4721,
"train_samples_per_second": 5.288,
"train_steps_per_second": 0.033
}
],
"max_steps": 4480,
"num_train_epochs": 40,
"total_flos": 2.1744313988908705e+20,
"trial_name": null,
"trial_params": null
}