xlsr_mid1_zh-ko / trainer_state.json
yesj1234's picture
Upload folder using huggingface_hub
9b1ce71
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 15.0,
"eval_steps": 330,
"global_step": 9870,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.23,
"learning_rate": 0.0003,
"loss": 33.5779,
"step": 150
},
{
"epoch": 0.46,
"learning_rate": 0.00029537037037037037,
"loss": 6.0153,
"step": 300
},
{
"epoch": 0.5,
"eval_cer": 0.9522337427844499,
"eval_loss": 5.343827724456787,
"eval_runtime": 251.7628,
"eval_samples_per_second": 10.573,
"eval_steps_per_second": 0.663,
"step": 330
},
{
"epoch": 0.68,
"learning_rate": 0.0002907407407407407,
"loss": 5.513,
"step": 450
},
{
"epoch": 0.91,
"learning_rate": 0.0002861111111111111,
"loss": 5.3776,
"step": 600
},
{
"epoch": 1.0,
"eval_cer": 0.9409077230323093,
"eval_loss": 5.153414726257324,
"eval_runtime": 250.9411,
"eval_samples_per_second": 10.608,
"eval_steps_per_second": 0.665,
"step": 660
},
{
"epoch": 1.14,
"learning_rate": 0.00028148148148148146,
"loss": 5.3202,
"step": 750
},
{
"epoch": 1.37,
"learning_rate": 0.0002768518518518518,
"loss": 5.2604,
"step": 900
},
{
"epoch": 1.5,
"eval_cer": 0.9108417913714333,
"eval_loss": 5.083229064941406,
"eval_runtime": 252.1577,
"eval_samples_per_second": 10.557,
"eval_steps_per_second": 0.662,
"step": 990
},
{
"epoch": 1.6,
"learning_rate": 0.0002722222222222222,
"loss": 5.2287,
"step": 1050
},
{
"epoch": 1.82,
"learning_rate": 0.00026759259259259255,
"loss": 5.2393,
"step": 1200
},
{
"epoch": 2.01,
"eval_cer": 0.9073126692747517,
"eval_loss": 5.065478324890137,
"eval_runtime": 251.9951,
"eval_samples_per_second": 10.564,
"eval_steps_per_second": 0.663,
"step": 1320
},
{
"epoch": 2.05,
"learning_rate": 0.00026296296296296294,
"loss": 5.2058,
"step": 1350
},
{
"epoch": 2.28,
"learning_rate": 0.00025833333333333334,
"loss": 5.1796,
"step": 1500
},
{
"epoch": 2.51,
"learning_rate": 0.0002537037037037037,
"loss": 5.1721,
"step": 1650
},
{
"epoch": 2.51,
"eval_cer": 0.9000355647963232,
"eval_loss": 5.046383380889893,
"eval_runtime": 251.3928,
"eval_samples_per_second": 10.589,
"eval_steps_per_second": 0.664,
"step": 1650
},
{
"epoch": 2.74,
"learning_rate": 0.0002490740740740741,
"loss": 5.1736,
"step": 1800
},
{
"epoch": 2.96,
"learning_rate": 0.00024444444444444443,
"loss": 5.1619,
"step": 1950
},
{
"epoch": 3.01,
"eval_cer": 0.9044674855689,
"eval_loss": 5.024378776550293,
"eval_runtime": 248.9825,
"eval_samples_per_second": 10.692,
"eval_steps_per_second": 0.671,
"step": 1980
},
{
"epoch": 3.19,
"learning_rate": 0.0002398148148148148,
"loss": 5.1484,
"step": 2100
},
{
"epoch": 3.42,
"learning_rate": 0.00023518518518518517,
"loss": 5.1308,
"step": 2250
},
{
"epoch": 3.51,
"eval_cer": 0.9020326648975461,
"eval_loss": 5.021634578704834,
"eval_runtime": 250.5536,
"eval_samples_per_second": 10.624,
"eval_steps_per_second": 0.667,
"step": 2310
},
{
"epoch": 3.65,
"learning_rate": 0.00023055555555555552,
"loss": 5.0855,
"step": 2400
},
{
"epoch": 3.88,
"learning_rate": 0.00022592592592592591,
"loss": 5.0971,
"step": 2550
},
{
"epoch": 4.01,
"eval_cer": 0.9040297649987689,
"eval_loss": 4.9340667724609375,
"eval_runtime": 248.8115,
"eval_samples_per_second": 10.699,
"eval_steps_per_second": 0.671,
"step": 2640
},
{
"epoch": 4.1,
"learning_rate": 0.00022129629629629626,
"loss": 5.0599,
"step": 2700
},
{
"epoch": 4.33,
"learning_rate": 0.00021666666666666666,
"loss": 5.0137,
"step": 2850
},
{
"epoch": 4.51,
"eval_cer": 0.9143709134681148,
"eval_loss": 4.879497051239014,
"eval_runtime": 248.574,
"eval_samples_per_second": 10.709,
"eval_steps_per_second": 0.672,
"step": 2970
},
{
"epoch": 4.56,
"learning_rate": 0.00021203703703703703,
"loss": 4.9809,
"step": 3000
},
{
"epoch": 4.79,
"learning_rate": 0.00020740740740740737,
"loss": 4.9939,
"step": 3150
},
{
"epoch": 5.02,
"learning_rate": 0.00020277777777777777,
"loss": 4.9341,
"step": 3300
},
{
"epoch": 5.02,
"eval_cer": 0.9039476923918693,
"eval_loss": 4.725036144256592,
"eval_runtime": 250.9019,
"eval_samples_per_second": 10.61,
"eval_steps_per_second": 0.666,
"step": 3300
},
{
"epoch": 5.24,
"learning_rate": 0.00019814814814814814,
"loss": 4.8114,
"step": 3450
},
{
"epoch": 5.47,
"learning_rate": 0.00019351851851851849,
"loss": 4.6832,
"step": 3600
},
{
"epoch": 5.52,
"eval_cer": 0.8367302273411211,
"eval_loss": 4.214047908782959,
"eval_runtime": 249.955,
"eval_samples_per_second": 10.65,
"eval_steps_per_second": 0.668,
"step": 3630
},
{
"epoch": 5.7,
"learning_rate": 0.00018888888888888888,
"loss": 4.4588,
"step": 3750
},
{
"epoch": 5.93,
"learning_rate": 0.00018425925925925923,
"loss": 4.1627,
"step": 3900
},
{
"epoch": 6.02,
"eval_cer": 0.7318140781878368,
"eval_loss": 3.4010486602783203,
"eval_runtime": 249.3206,
"eval_samples_per_second": 10.677,
"eval_steps_per_second": 0.67,
"step": 3960
},
{
"epoch": 6.16,
"learning_rate": 0.0001796296296296296,
"loss": 3.7597,
"step": 4050
},
{
"epoch": 6.38,
"learning_rate": 0.000175,
"loss": 3.5448,
"step": 4200
},
{
"epoch": 6.52,
"eval_cer": 0.6479905890077422,
"eval_loss": 2.882997989654541,
"eval_runtime": 248.069,
"eval_samples_per_second": 10.731,
"eval_steps_per_second": 0.673,
"step": 4290
},
{
"epoch": 6.61,
"learning_rate": 0.00017037037037037034,
"loss": 3.3922,
"step": 4350
},
{
"epoch": 6.84,
"learning_rate": 0.00016574074074074074,
"loss": 3.2576,
"step": 4500
},
{
"epoch": 7.02,
"eval_cer": 0.6265696386069542,
"eval_loss": 2.6253392696380615,
"eval_runtime": 244.3615,
"eval_samples_per_second": 10.894,
"eval_steps_per_second": 0.683,
"step": 4620
},
{
"epoch": 7.07,
"learning_rate": 0.0001611111111111111,
"loss": 3.0846,
"step": 4650
},
{
"epoch": 7.29,
"learning_rate": 0.00015648148148148146,
"loss": 2.9344,
"step": 4800
},
{
"epoch": 7.52,
"learning_rate": 0.00015185185185185185,
"loss": 2.8561,
"step": 4950
},
{
"epoch": 7.52,
"eval_cer": 0.5866002790468634,
"eval_loss": 2.430042266845703,
"eval_runtime": 245.691,
"eval_samples_per_second": 10.835,
"eval_steps_per_second": 0.68,
"step": 4950
},
{
"epoch": 7.75,
"learning_rate": 0.00014722222222222223,
"loss": 2.8167,
"step": 5100
},
{
"epoch": 7.98,
"learning_rate": 0.00014259259259259257,
"loss": 2.7894,
"step": 5250
},
{
"epoch": 8.02,
"eval_cer": 0.575028041474024,
"eval_loss": 2.2997841835021973,
"eval_runtime": 245.9682,
"eval_samples_per_second": 10.823,
"eval_steps_per_second": 0.679,
"step": 5280
},
{
"epoch": 8.21,
"learning_rate": 0.00013796296296296294,
"loss": 2.6472,
"step": 5400
},
{
"epoch": 8.43,
"learning_rate": 0.0001333333333333333,
"loss": 2.6018,
"step": 5550
},
{
"epoch": 8.53,
"eval_cer": 0.554892895247996,
"eval_loss": 2.187838554382324,
"eval_runtime": 244.7687,
"eval_samples_per_second": 10.876,
"eval_steps_per_second": 0.682,
"step": 5610
},
{
"epoch": 8.66,
"learning_rate": 0.0001287037037037037,
"loss": 2.5751,
"step": 5700
},
{
"epoch": 8.89,
"learning_rate": 0.00012407407407407406,
"loss": 2.546,
"step": 5850
},
{
"epoch": 9.03,
"eval_cer": 0.5350860394495663,
"eval_loss": 2.1450469493865967,
"eval_runtime": 244.8384,
"eval_samples_per_second": 10.872,
"eval_steps_per_second": 0.682,
"step": 5940
},
{
"epoch": 9.12,
"learning_rate": 0.00011944444444444443,
"loss": 2.4555,
"step": 6000
},
{
"epoch": 9.35,
"learning_rate": 0.0001148148148148148,
"loss": 2.3787,
"step": 6150
},
{
"epoch": 9.53,
"eval_cer": 0.5339917380242388,
"eval_loss": 2.102729558944702,
"eval_runtime": 244.3347,
"eval_samples_per_second": 10.895,
"eval_steps_per_second": 0.683,
"step": 6270
},
{
"epoch": 9.57,
"learning_rate": 0.00011018518518518518,
"loss": 2.3783,
"step": 6300
},
{
"epoch": 9.8,
"learning_rate": 0.00010555555555555555,
"loss": 2.3806,
"step": 6450
},
{
"epoch": 10.03,
"learning_rate": 0.00010092592592592591,
"loss": 2.335,
"step": 6600
},
{
"epoch": 10.03,
"eval_cer": 0.5165923453615299,
"eval_loss": 2.0303709506988525,
"eval_runtime": 244.6547,
"eval_samples_per_second": 10.881,
"eval_steps_per_second": 0.683,
"step": 6600
},
{
"epoch": 10.26,
"learning_rate": 9.629629629629628e-05,
"loss": 2.2373,
"step": 6750
},
{
"epoch": 10.49,
"learning_rate": 9.166666666666667e-05,
"loss": 2.2138,
"step": 6900
},
{
"epoch": 10.53,
"eval_cer": 0.5164555576833639,
"eval_loss": 2.0100014209747314,
"eval_runtime": 244.9943,
"eval_samples_per_second": 10.866,
"eval_steps_per_second": 0.682,
"step": 6930
},
{
"epoch": 10.71,
"learning_rate": 8.703703703703704e-05,
"loss": 2.2174,
"step": 7050
},
{
"epoch": 10.94,
"learning_rate": 8.24074074074074e-05,
"loss": 2.2381,
"step": 7200
},
{
"epoch": 11.03,
"eval_cer": 0.5031050802943671,
"eval_loss": 1.9650695323944092,
"eval_runtime": 244.4808,
"eval_samples_per_second": 10.888,
"eval_steps_per_second": 0.683,
"step": 7260
},
{
"epoch": 11.17,
"learning_rate": 7.777777777777777e-05,
"loss": 2.155,
"step": 7350
},
{
"epoch": 11.4,
"learning_rate": 7.314814814814814e-05,
"loss": 2.1108,
"step": 7500
},
{
"epoch": 11.53,
"eval_cer": 0.5034880857932318,
"eval_loss": 1.9666314125061035,
"eval_runtime": 244.872,
"eval_samples_per_second": 10.871,
"eval_steps_per_second": 0.682,
"step": 7590
},
{
"epoch": 11.63,
"learning_rate": 6.851851851851851e-05,
"loss": 2.1221,
"step": 7650
},
{
"epoch": 11.85,
"learning_rate": 6.388888888888888e-05,
"loss": 2.0916,
"step": 7800
},
{
"epoch": 12.04,
"eval_cer": 0.49982217601838425,
"eval_loss": 1.913594365119934,
"eval_runtime": 244.907,
"eval_samples_per_second": 10.869,
"eval_steps_per_second": 0.682,
"step": 7920
},
{
"epoch": 12.08,
"learning_rate": 5.925925925925925e-05,
"loss": 2.0517,
"step": 7950
},
{
"epoch": 12.31,
"learning_rate": 5.4629629629629624e-05,
"loss": 2.0279,
"step": 8100
},
{
"epoch": 12.54,
"learning_rate": 4.9999999999999996e-05,
"loss": 2.0229,
"step": 8250
},
{
"epoch": 12.54,
"eval_cer": 0.5027767898667688,
"eval_loss": 1.898772120475769,
"eval_runtime": 244.3507,
"eval_samples_per_second": 10.894,
"eval_steps_per_second": 0.683,
"step": 8250
},
{
"epoch": 12.77,
"learning_rate": 4.537037037037037e-05,
"loss": 1.9891,
"step": 8400
},
{
"epoch": 12.99,
"learning_rate": 4.074074074074074e-05,
"loss": 2.0056,
"step": 8550
},
{
"epoch": 13.04,
"eval_cer": 0.4995759581976856,
"eval_loss": 1.8768519163131714,
"eval_runtime": 244.2609,
"eval_samples_per_second": 10.898,
"eval_steps_per_second": 0.684,
"step": 8580
},
{
"epoch": 13.22,
"learning_rate": 3.61111111111111e-05,
"loss": 1.9451,
"step": 8700
},
{
"epoch": 13.45,
"learning_rate": 3.1481481481481474e-05,
"loss": 1.9245,
"step": 8850
},
{
"epoch": 13.54,
"eval_cer": 0.495472327852707,
"eval_loss": 1.8715523481369019,
"eval_runtime": 244.453,
"eval_samples_per_second": 10.89,
"eval_steps_per_second": 0.683,
"step": 8910
},
{
"epoch": 13.68,
"learning_rate": 2.685185185185185e-05,
"loss": 1.916,
"step": 9000
},
{
"epoch": 13.91,
"learning_rate": 2.222222222222222e-05,
"loss": 1.9378,
"step": 9150
},
{
"epoch": 14.04,
"eval_cer": 0.49459688671244495,
"eval_loss": 1.8560909032821655,
"eval_runtime": 244.76,
"eval_samples_per_second": 10.876,
"eval_steps_per_second": 0.682,
"step": 9240
},
{
"epoch": 14.13,
"learning_rate": 1.759259259259259e-05,
"loss": 1.9169,
"step": 9300
},
{
"epoch": 14.36,
"learning_rate": 1.296296296296296e-05,
"loss": 1.9003,
"step": 9450
},
{
"epoch": 14.54,
"eval_cer": 0.49363937296528326,
"eval_loss": 1.848546028137207,
"eval_runtime": 244.6555,
"eval_samples_per_second": 10.881,
"eval_steps_per_second": 0.683,
"step": 9570
},
{
"epoch": 14.59,
"learning_rate": 8.333333333333332e-06,
"loss": 1.8611,
"step": 9600
},
{
"epoch": 14.82,
"learning_rate": 3.7037037037037033e-06,
"loss": 1.8698,
"step": 9750
},
{
"epoch": 15.0,
"step": 9870,
"total_flos": 3.0352534937393955e+19,
"train_loss": 3.8899900426739014,
"train_runtime": 13723.3522,
"train_samples_per_second": 23.006,
"train_steps_per_second": 0.719
}
],
"logging_steps": 150,
"max_steps": 9870,
"num_train_epochs": 15,
"save_steps": 330,
"total_flos": 3.0352534937393955e+19,
"trial_name": null,
"trial_params": null
}