xlsr_mid_en-k_1 / trainer_state.json
yesj1234's picture
Upload folder using huggingface_hub
bf05986
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 9.989094874591057,
"eval_steps": 100,
"global_step": 4580,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.02,
"learning_rate": 0.0003,
"loss": 9.4785,
"step": 10
},
{
"epoch": 0.04,
"learning_rate": 0.00029934354485776804,
"loss": 4.3277,
"step": 20
},
{
"epoch": 0.07,
"learning_rate": 0.00029868708971553606,
"loss": 3.2452,
"step": 30
},
{
"epoch": 0.09,
"learning_rate": 0.00029803063457330413,
"loss": 3.0373,
"step": 40
},
{
"epoch": 0.11,
"learning_rate": 0.0002973741794310722,
"loss": 2.9545,
"step": 50
},
{
"epoch": 0.13,
"learning_rate": 0.0002967177242888403,
"loss": 2.9837,
"step": 60
},
{
"epoch": 0.15,
"learning_rate": 0.0002960612691466083,
"loss": 2.9667,
"step": 70
},
{
"epoch": 0.17,
"learning_rate": 0.00029540481400437636,
"loss": 3.0758,
"step": 80
},
{
"epoch": 0.2,
"learning_rate": 0.00029474835886214443,
"loss": 2.9736,
"step": 90
},
{
"epoch": 0.22,
"learning_rate": 0.00029409190371991245,
"loss": 2.9534,
"step": 100
},
{
"epoch": 0.22,
"eval_cer": 1.0,
"eval_loss": 2.95331072807312,
"eval_runtime": 78.5573,
"eval_samples_per_second": 25.141,
"eval_steps_per_second": 1.578,
"eval_wer": 1.0,
"step": 100
},
{
"epoch": 0.24,
"learning_rate": 0.0002934354485776805,
"loss": 2.9745,
"step": 110
},
{
"epoch": 0.26,
"learning_rate": 0.00029277899343544854,
"loss": 2.9693,
"step": 120
},
{
"epoch": 0.28,
"learning_rate": 0.0002921225382932166,
"loss": 3.0029,
"step": 130
},
{
"epoch": 0.31,
"learning_rate": 0.0002914660831509847,
"loss": 2.9767,
"step": 140
},
{
"epoch": 0.33,
"learning_rate": 0.0002908096280087527,
"loss": 2.9339,
"step": 150
},
{
"epoch": 0.35,
"learning_rate": 0.00029015317286652077,
"loss": 2.9611,
"step": 160
},
{
"epoch": 0.37,
"learning_rate": 0.0002894967177242888,
"loss": 2.9468,
"step": 170
},
{
"epoch": 0.39,
"learning_rate": 0.00028884026258205685,
"loss": 2.9734,
"step": 180
},
{
"epoch": 0.41,
"learning_rate": 0.0002881838074398249,
"loss": 2.9493,
"step": 190
},
{
"epoch": 0.44,
"learning_rate": 0.00028752735229759294,
"loss": 2.933,
"step": 200
},
{
"epoch": 0.44,
"eval_cer": 1.0,
"eval_loss": 2.923107147216797,
"eval_runtime": 76.0631,
"eval_samples_per_second": 25.965,
"eval_steps_per_second": 1.63,
"eval_wer": 1.0,
"step": 200
},
{
"epoch": 0.46,
"learning_rate": 0.000286870897155361,
"loss": 2.9433,
"step": 210
},
{
"epoch": 0.48,
"learning_rate": 0.0002862144420131291,
"loss": 2.9201,
"step": 220
},
{
"epoch": 0.5,
"learning_rate": 0.0002855579868708971,
"loss": 2.9529,
"step": 230
},
{
"epoch": 0.52,
"learning_rate": 0.00028490153172866517,
"loss": 2.9164,
"step": 240
},
{
"epoch": 0.55,
"learning_rate": 0.00028424507658643324,
"loss": 2.9435,
"step": 250
},
{
"epoch": 0.57,
"learning_rate": 0.0002835886214442013,
"loss": 2.9255,
"step": 260
},
{
"epoch": 0.59,
"learning_rate": 0.00028293216630196933,
"loss": 2.9487,
"step": 270
},
{
"epoch": 0.61,
"learning_rate": 0.0002822757111597374,
"loss": 2.9766,
"step": 280
},
{
"epoch": 0.63,
"learning_rate": 0.00028161925601750547,
"loss": 2.9314,
"step": 290
},
{
"epoch": 0.65,
"learning_rate": 0.0002809628008752735,
"loss": 2.904,
"step": 300
},
{
"epoch": 0.65,
"eval_cer": 1.0,
"eval_loss": 2.885084629058838,
"eval_runtime": 75.9215,
"eval_samples_per_second": 26.014,
"eval_steps_per_second": 1.633,
"eval_wer": 1.0,
"step": 300
},
{
"epoch": 0.68,
"learning_rate": 0.00028030634573304156,
"loss": 2.9118,
"step": 310
},
{
"epoch": 0.7,
"learning_rate": 0.00027964989059080963,
"loss": 2.8872,
"step": 320
},
{
"epoch": 0.72,
"learning_rate": 0.00027899343544857764,
"loss": 2.8687,
"step": 330
},
{
"epoch": 0.74,
"learning_rate": 0.0002783369803063457,
"loss": 2.8685,
"step": 340
},
{
"epoch": 0.76,
"learning_rate": 0.0002776805251641138,
"loss": 2.8346,
"step": 350
},
{
"epoch": 0.79,
"learning_rate": 0.0002770240700218818,
"loss": 2.8082,
"step": 360
},
{
"epoch": 0.81,
"learning_rate": 0.0002763676148796499,
"loss": 2.7622,
"step": 370
},
{
"epoch": 0.83,
"learning_rate": 0.00027571115973741794,
"loss": 2.6548,
"step": 380
},
{
"epoch": 0.85,
"learning_rate": 0.00027505470459518596,
"loss": 2.525,
"step": 390
},
{
"epoch": 0.87,
"learning_rate": 0.00027439824945295403,
"loss": 2.3607,
"step": 400
},
{
"epoch": 0.87,
"eval_cer": 0.6798586572438162,
"eval_loss": 2.154635190963745,
"eval_runtime": 76.1981,
"eval_samples_per_second": 25.919,
"eval_steps_per_second": 1.627,
"eval_wer": 0.9976168184526343,
"step": 400
},
{
"epoch": 0.89,
"learning_rate": 0.00027374179431072205,
"loss": 2.1658,
"step": 410
},
{
"epoch": 0.92,
"learning_rate": 0.0002730853391684901,
"loss": 1.9075,
"step": 420
},
{
"epoch": 0.94,
"learning_rate": 0.0002724288840262582,
"loss": 1.8073,
"step": 430
},
{
"epoch": 0.96,
"learning_rate": 0.0002717724288840262,
"loss": 1.655,
"step": 440
},
{
"epoch": 0.98,
"learning_rate": 0.0002711159737417943,
"loss": 1.5691,
"step": 450
},
{
"epoch": 1.0,
"learning_rate": 0.00027045951859956235,
"loss": 1.406,
"step": 460
},
{
"epoch": 1.03,
"learning_rate": 0.00026980306345733037,
"loss": 1.3484,
"step": 470
},
{
"epoch": 1.05,
"learning_rate": 0.00026914660831509844,
"loss": 1.3013,
"step": 480
},
{
"epoch": 1.07,
"learning_rate": 0.0002684901531728665,
"loss": 1.2271,
"step": 490
},
{
"epoch": 1.09,
"learning_rate": 0.0002678336980306345,
"loss": 1.1725,
"step": 500
},
{
"epoch": 1.09,
"eval_cer": 0.26647526501766783,
"eval_loss": 0.9899328947067261,
"eval_runtime": 76.6334,
"eval_samples_per_second": 25.772,
"eval_steps_per_second": 1.618,
"eval_wer": 0.6191165205549408,
"step": 500
},
{
"epoch": 1.11,
"learning_rate": 0.0002671772428884026,
"loss": 1.1775,
"step": 510
},
{
"epoch": 1.13,
"learning_rate": 0.00026652078774617067,
"loss": 1.1966,
"step": 520
},
{
"epoch": 1.16,
"learning_rate": 0.00026586433260393874,
"loss": 1.032,
"step": 530
},
{
"epoch": 1.18,
"learning_rate": 0.00026520787746170675,
"loss": 1.059,
"step": 540
},
{
"epoch": 1.2,
"learning_rate": 0.0002645514223194748,
"loss": 1.089,
"step": 550
},
{
"epoch": 1.22,
"learning_rate": 0.0002638949671772429,
"loss": 1.0714,
"step": 560
},
{
"epoch": 1.24,
"learning_rate": 0.0002632385120350109,
"loss": 1.0671,
"step": 570
},
{
"epoch": 1.26,
"learning_rate": 0.000262582056892779,
"loss": 1.0015,
"step": 580
},
{
"epoch": 1.29,
"learning_rate": 0.00026192560175054705,
"loss": 1.0036,
"step": 590
},
{
"epoch": 1.31,
"learning_rate": 0.00026126914660831507,
"loss": 0.9865,
"step": 600
},
{
"epoch": 1.31,
"eval_cer": 0.2125530035335689,
"eval_loss": 0.806027352809906,
"eval_runtime": 76.3842,
"eval_samples_per_second": 25.856,
"eval_steps_per_second": 1.623,
"eval_wer": 0.5063835220018725,
"step": 600
},
{
"epoch": 1.33,
"learning_rate": 0.00026061269146608314,
"loss": 0.9341,
"step": 610
},
{
"epoch": 1.35,
"learning_rate": 0.0002599562363238512,
"loss": 0.9844,
"step": 620
},
{
"epoch": 1.37,
"learning_rate": 0.00025929978118161923,
"loss": 0.9056,
"step": 630
},
{
"epoch": 1.4,
"learning_rate": 0.0002586433260393873,
"loss": 0.946,
"step": 640
},
{
"epoch": 1.42,
"learning_rate": 0.0002579868708971553,
"loss": 0.9063,
"step": 650
},
{
"epoch": 1.44,
"learning_rate": 0.0002573304157549234,
"loss": 0.8988,
"step": 660
},
{
"epoch": 1.46,
"learning_rate": 0.00025667396061269146,
"loss": 0.8914,
"step": 670
},
{
"epoch": 1.48,
"learning_rate": 0.0002560175054704595,
"loss": 0.8457,
"step": 680
},
{
"epoch": 1.5,
"learning_rate": 0.00025536105032822754,
"loss": 0.9111,
"step": 690
},
{
"epoch": 1.53,
"learning_rate": 0.00025470459518599556,
"loss": 0.8959,
"step": 700
},
{
"epoch": 1.53,
"eval_cer": 0.19803886925795053,
"eval_loss": 0.713085949420929,
"eval_runtime": 77.034,
"eval_samples_per_second": 25.638,
"eval_steps_per_second": 1.61,
"eval_wer": 0.4606775044684654,
"step": 700
},
{
"epoch": 1.55,
"learning_rate": 0.00025404814004376363,
"loss": 0.8875,
"step": 710
},
{
"epoch": 1.57,
"learning_rate": 0.0002533916849015317,
"loss": 0.8102,
"step": 720
},
{
"epoch": 1.59,
"learning_rate": 0.0002527352297592998,
"loss": 0.8105,
"step": 730
},
{
"epoch": 1.61,
"learning_rate": 0.0002520787746170678,
"loss": 0.8125,
"step": 740
},
{
"epoch": 1.64,
"learning_rate": 0.00025142231947483586,
"loss": 0.7744,
"step": 750
},
{
"epoch": 1.66,
"learning_rate": 0.00025076586433260393,
"loss": 0.9137,
"step": 760
},
{
"epoch": 1.68,
"learning_rate": 0.00025010940919037195,
"loss": 0.833,
"step": 770
},
{
"epoch": 1.7,
"learning_rate": 0.00024945295404814,
"loss": 0.7844,
"step": 780
},
{
"epoch": 1.72,
"learning_rate": 0.0002487964989059081,
"loss": 0.8361,
"step": 790
},
{
"epoch": 1.74,
"learning_rate": 0.0002481400437636761,
"loss": 0.7743,
"step": 800
},
{
"epoch": 1.74,
"eval_cer": 0.17991166077738516,
"eval_loss": 0.666309654712677,
"eval_runtime": 77.8448,
"eval_samples_per_second": 25.371,
"eval_steps_per_second": 1.593,
"eval_wer": 0.4369733594348455,
"step": 800
},
{
"epoch": 1.77,
"learning_rate": 0.0002474835886214442,
"loss": 0.809,
"step": 810
},
{
"epoch": 1.79,
"learning_rate": 0.00024682713347921225,
"loss": 0.7796,
"step": 820
},
{
"epoch": 1.81,
"learning_rate": 0.0002461706783369803,
"loss": 0.7802,
"step": 830
},
{
"epoch": 1.83,
"learning_rate": 0.00024551422319474834,
"loss": 0.7669,
"step": 840
},
{
"epoch": 1.85,
"learning_rate": 0.0002448577680525164,
"loss": 0.8044,
"step": 850
},
{
"epoch": 1.88,
"learning_rate": 0.0002442013129102845,
"loss": 0.8241,
"step": 860
},
{
"epoch": 1.9,
"learning_rate": 0.0002435448577680525,
"loss": 0.7402,
"step": 870
},
{
"epoch": 1.92,
"learning_rate": 0.00024288840262582054,
"loss": 0.7299,
"step": 880
},
{
"epoch": 1.94,
"learning_rate": 0.0002422319474835886,
"loss": 0.7455,
"step": 890
},
{
"epoch": 1.96,
"learning_rate": 0.00024157549234135668,
"loss": 0.7805,
"step": 900
},
{
"epoch": 1.96,
"eval_cer": 0.16825088339222616,
"eval_loss": 0.6159283518791199,
"eval_runtime": 76.6296,
"eval_samples_per_second": 25.773,
"eval_steps_per_second": 1.618,
"eval_wer": 0.3996510341305643,
"step": 900
},
{
"epoch": 1.98,
"learning_rate": 0.0002409190371991247,
"loss": 0.8158,
"step": 910
},
{
"epoch": 2.01,
"learning_rate": 0.00024026258205689277,
"loss": 0.7092,
"step": 920
},
{
"epoch": 2.03,
"learning_rate": 0.0002396061269146608,
"loss": 0.6462,
"step": 930
},
{
"epoch": 2.05,
"learning_rate": 0.00023894967177242885,
"loss": 0.6201,
"step": 940
},
{
"epoch": 2.07,
"learning_rate": 0.00023829321663019693,
"loss": 0.6306,
"step": 950
},
{
"epoch": 2.09,
"learning_rate": 0.00023763676148796497,
"loss": 0.6428,
"step": 960
},
{
"epoch": 2.12,
"learning_rate": 0.000236980306345733,
"loss": 0.6456,
"step": 970
},
{
"epoch": 2.14,
"learning_rate": 0.00023632385120350106,
"loss": 0.6512,
"step": 980
},
{
"epoch": 2.16,
"learning_rate": 0.00023566739606126913,
"loss": 0.6547,
"step": 990
},
{
"epoch": 2.18,
"learning_rate": 0.0002350109409190372,
"loss": 0.6562,
"step": 1000
},
{
"epoch": 2.18,
"eval_cer": 0.1536660777385159,
"eval_loss": 0.618610680103302,
"eval_runtime": 76.2421,
"eval_samples_per_second": 25.904,
"eval_steps_per_second": 1.626,
"eval_wer": 0.3705421738020257,
"step": 1000
},
{
"epoch": 2.2,
"learning_rate": 0.00023435448577680521,
"loss": 0.678,
"step": 1010
},
{
"epoch": 2.22,
"learning_rate": 0.00023369803063457329,
"loss": 0.604,
"step": 1020
},
{
"epoch": 2.25,
"learning_rate": 0.00023304157549234136,
"loss": 0.6403,
"step": 1030
},
{
"epoch": 2.27,
"learning_rate": 0.00023238512035010937,
"loss": 0.5697,
"step": 1040
},
{
"epoch": 2.29,
"learning_rate": 0.00023172866520787744,
"loss": 0.6814,
"step": 1050
},
{
"epoch": 2.31,
"learning_rate": 0.00023107221006564551,
"loss": 0.6109,
"step": 1060
},
{
"epoch": 2.33,
"learning_rate": 0.00023041575492341353,
"loss": 0.6573,
"step": 1070
},
{
"epoch": 2.36,
"learning_rate": 0.0002297592997811816,
"loss": 0.682,
"step": 1080
},
{
"epoch": 2.38,
"learning_rate": 0.00022910284463894965,
"loss": 0.6031,
"step": 1090
},
{
"epoch": 2.4,
"learning_rate": 0.00022844638949671772,
"loss": 0.6223,
"step": 1100
},
{
"epoch": 2.4,
"eval_cer": 0.14960247349823322,
"eval_loss": 0.5698063969612122,
"eval_runtime": 76.2271,
"eval_samples_per_second": 25.909,
"eval_steps_per_second": 1.627,
"eval_wer": 0.3551791641841859,
"step": 1100
},
{
"epoch": 2.42,
"learning_rate": 0.00022778993435448576,
"loss": 0.5628,
"step": 1110
},
{
"epoch": 2.44,
"learning_rate": 0.0002271334792122538,
"loss": 0.6137,
"step": 1120
},
{
"epoch": 2.46,
"learning_rate": 0.00022647702407002188,
"loss": 0.601,
"step": 1130
},
{
"epoch": 2.49,
"learning_rate": 0.0002258205689277899,
"loss": 0.6086,
"step": 1140
},
{
"epoch": 2.51,
"learning_rate": 0.00022516411378555796,
"loss": 0.6377,
"step": 1150
},
{
"epoch": 2.53,
"learning_rate": 0.00022450765864332603,
"loss": 0.6022,
"step": 1160
},
{
"epoch": 2.55,
"learning_rate": 0.00022385120350109405,
"loss": 0.6024,
"step": 1170
},
{
"epoch": 2.57,
"learning_rate": 0.00022319474835886212,
"loss": 0.6133,
"step": 1180
},
{
"epoch": 2.6,
"learning_rate": 0.0002225382932166302,
"loss": 0.5927,
"step": 1190
},
{
"epoch": 2.62,
"learning_rate": 0.00022188183807439824,
"loss": 0.5627,
"step": 1200
},
{
"epoch": 2.62,
"eval_cer": 0.14463780918727914,
"eval_loss": 0.5555065870285034,
"eval_runtime": 77.0134,
"eval_samples_per_second": 25.645,
"eval_steps_per_second": 1.61,
"eval_wer": 0.33722018895225125,
"step": 1200
},
{
"epoch": 2.64,
"learning_rate": 0.00022122538293216628,
"loss": 0.6592,
"step": 1210
},
{
"epoch": 2.66,
"learning_rate": 0.00022056892778993432,
"loss": 0.6241,
"step": 1220
},
{
"epoch": 2.68,
"learning_rate": 0.0002199124726477024,
"loss": 0.6223,
"step": 1230
},
{
"epoch": 2.7,
"learning_rate": 0.00021925601750547044,
"loss": 0.6151,
"step": 1240
},
{
"epoch": 2.73,
"learning_rate": 0.00021859956236323848,
"loss": 0.6498,
"step": 1250
},
{
"epoch": 2.75,
"learning_rate": 0.00021794310722100655,
"loss": 0.6824,
"step": 1260
},
{
"epoch": 2.77,
"learning_rate": 0.00021728665207877462,
"loss": 0.6187,
"step": 1270
},
{
"epoch": 2.79,
"learning_rate": 0.00021663019693654264,
"loss": 0.6401,
"step": 1280
},
{
"epoch": 2.81,
"learning_rate": 0.0002159737417943107,
"loss": 0.5839,
"step": 1290
},
{
"epoch": 2.84,
"learning_rate": 0.00021531728665207878,
"loss": 0.5476,
"step": 1300
},
{
"epoch": 2.84,
"eval_cer": 0.1416166077738516,
"eval_loss": 0.5435045957565308,
"eval_runtime": 76.2684,
"eval_samples_per_second": 25.895,
"eval_steps_per_second": 1.626,
"eval_wer": 0.3306664396969955,
"step": 1300
},
{
"epoch": 2.86,
"learning_rate": 0.0002146608315098468,
"loss": 0.561,
"step": 1310
},
{
"epoch": 2.88,
"learning_rate": 0.00021400437636761487,
"loss": 0.6238,
"step": 1320
},
{
"epoch": 2.9,
"learning_rate": 0.0002133479212253829,
"loss": 0.6401,
"step": 1330
},
{
"epoch": 2.92,
"learning_rate": 0.00021269146608315096,
"loss": 0.5713,
"step": 1340
},
{
"epoch": 2.94,
"learning_rate": 0.00021203501094091903,
"loss": 0.6343,
"step": 1350
},
{
"epoch": 2.97,
"learning_rate": 0.00021137855579868707,
"loss": 0.5926,
"step": 1360
},
{
"epoch": 2.99,
"learning_rate": 0.00021072210065645514,
"loss": 0.6025,
"step": 1370
},
{
"epoch": 3.01,
"learning_rate": 0.00021006564551422316,
"loss": 0.5069,
"step": 1380
},
{
"epoch": 3.03,
"learning_rate": 0.00020940919037199123,
"loss": 0.4978,
"step": 1390
},
{
"epoch": 3.05,
"learning_rate": 0.0002087527352297593,
"loss": 0.5002,
"step": 1400
},
{
"epoch": 3.05,
"eval_cer": 0.14363957597173144,
"eval_loss": 0.530446469783783,
"eval_runtime": 76.7632,
"eval_samples_per_second": 25.728,
"eval_steps_per_second": 1.615,
"eval_wer": 0.3393054728061963,
"step": 1400
},
{
"epoch": 3.08,
"learning_rate": 0.00020809628008752732,
"loss": 0.4814,
"step": 1410
},
{
"epoch": 3.1,
"learning_rate": 0.0002074398249452954,
"loss": 0.482,
"step": 1420
},
{
"epoch": 3.12,
"learning_rate": 0.00020678336980306346,
"loss": 0.5199,
"step": 1430
},
{
"epoch": 3.14,
"learning_rate": 0.00020612691466083147,
"loss": 0.4718,
"step": 1440
},
{
"epoch": 3.16,
"learning_rate": 0.00020547045951859955,
"loss": 0.5226,
"step": 1450
},
{
"epoch": 3.18,
"learning_rate": 0.0002048140043763676,
"loss": 0.5668,
"step": 1460
},
{
"epoch": 3.21,
"learning_rate": 0.00020415754923413566,
"loss": 0.5,
"step": 1470
},
{
"epoch": 3.23,
"learning_rate": 0.0002035010940919037,
"loss": 0.5437,
"step": 1480
},
{
"epoch": 3.25,
"learning_rate": 0.00020284463894967175,
"loss": 0.504,
"step": 1490
},
{
"epoch": 3.27,
"learning_rate": 0.00020218818380743982,
"loss": 0.5174,
"step": 1500
},
{
"epoch": 3.27,
"eval_cer": 0.14854240282685513,
"eval_loss": 0.5377296805381775,
"eval_runtime": 76.3889,
"eval_samples_per_second": 25.855,
"eval_steps_per_second": 1.623,
"eval_wer": 0.33568814367180183,
"step": 1500
},
{
"epoch": 3.29,
"learning_rate": 0.00020153172866520784,
"loss": 0.4864,
"step": 1510
},
{
"epoch": 3.32,
"learning_rate": 0.0002008752735229759,
"loss": 0.5128,
"step": 1520
},
{
"epoch": 3.34,
"learning_rate": 0.00020021881838074398,
"loss": 0.5356,
"step": 1530
},
{
"epoch": 3.36,
"learning_rate": 0.000199562363238512,
"loss": 0.512,
"step": 1540
},
{
"epoch": 3.38,
"learning_rate": 0.00019890590809628006,
"loss": 0.4907,
"step": 1550
},
{
"epoch": 3.4,
"learning_rate": 0.00019824945295404814,
"loss": 0.4675,
"step": 1560
},
{
"epoch": 3.42,
"learning_rate": 0.00019759299781181618,
"loss": 0.5075,
"step": 1570
},
{
"epoch": 3.45,
"learning_rate": 0.00019693654266958422,
"loss": 0.4871,
"step": 1580
},
{
"epoch": 3.47,
"learning_rate": 0.0001962800875273523,
"loss": 0.5308,
"step": 1590
},
{
"epoch": 3.49,
"learning_rate": 0.00019562363238512034,
"loss": 0.4745,
"step": 1600
},
{
"epoch": 3.49,
"eval_cer": 0.13403710247349823,
"eval_loss": 0.528931736946106,
"eval_runtime": 76.9163,
"eval_samples_per_second": 25.677,
"eval_steps_per_second": 1.612,
"eval_wer": 0.31321814622521066,
"step": 1600
},
{
"epoch": 3.51,
"learning_rate": 0.00019496717724288838,
"loss": 0.5471,
"step": 1610
},
{
"epoch": 3.53,
"learning_rate": 0.00019431072210065642,
"loss": 0.4837,
"step": 1620
},
{
"epoch": 3.56,
"learning_rate": 0.0001936542669584245,
"loss": 0.5438,
"step": 1630
},
{
"epoch": 3.58,
"learning_rate": 0.00019299781181619254,
"loss": 0.4934,
"step": 1640
},
{
"epoch": 3.6,
"learning_rate": 0.00019234135667396058,
"loss": 0.5325,
"step": 1650
},
{
"epoch": 3.62,
"learning_rate": 0.00019168490153172865,
"loss": 0.4638,
"step": 1660
},
{
"epoch": 3.64,
"learning_rate": 0.00019102844638949672,
"loss": 0.5239,
"step": 1670
},
{
"epoch": 3.66,
"learning_rate": 0.00019037199124726474,
"loss": 0.5334,
"step": 1680
},
{
"epoch": 3.69,
"learning_rate": 0.0001897155361050328,
"loss": 0.5197,
"step": 1690
},
{
"epoch": 3.71,
"learning_rate": 0.00018905908096280086,
"loss": 0.5239,
"step": 1700
},
{
"epoch": 3.71,
"eval_cer": 0.13953180212014135,
"eval_loss": 0.5112127661705017,
"eval_runtime": 76.4879,
"eval_samples_per_second": 25.821,
"eval_steps_per_second": 1.621,
"eval_wer": 0.32394246318835646,
"step": 1700
},
{
"epoch": 3.73,
"learning_rate": 0.0001884026258205689,
"loss": 0.4774,
"step": 1710
},
{
"epoch": 3.75,
"learning_rate": 0.00018774617067833697,
"loss": 0.5223,
"step": 1720
},
{
"epoch": 3.77,
"learning_rate": 0.00018708971553610501,
"loss": 0.5327,
"step": 1730
},
{
"epoch": 3.79,
"learning_rate": 0.00018643326039387309,
"loss": 0.4677,
"step": 1740
},
{
"epoch": 3.82,
"learning_rate": 0.0001857768052516411,
"loss": 0.4709,
"step": 1750
},
{
"epoch": 3.84,
"learning_rate": 0.00018512035010940917,
"loss": 0.5068,
"step": 1760
},
{
"epoch": 3.86,
"learning_rate": 0.00018446389496717724,
"loss": 0.4975,
"step": 1770
},
{
"epoch": 3.88,
"learning_rate": 0.00018380743982494526,
"loss": 0.5317,
"step": 1780
},
{
"epoch": 3.9,
"learning_rate": 0.00018315098468271333,
"loss": 0.4874,
"step": 1790
},
{
"epoch": 3.93,
"learning_rate": 0.0001824945295404814,
"loss": 0.5115,
"step": 1800
},
{
"epoch": 3.93,
"eval_cer": 0.13419611307420495,
"eval_loss": 0.5079012513160706,
"eval_runtime": 76.8322,
"eval_samples_per_second": 25.705,
"eval_steps_per_second": 1.614,
"eval_wer": 0.3093880330240872,
"step": 1800
},
{
"epoch": 3.95,
"learning_rate": 0.00018183807439824942,
"loss": 0.4567,
"step": 1810
},
{
"epoch": 3.97,
"learning_rate": 0.0001811816192560175,
"loss": 0.4669,
"step": 1820
},
{
"epoch": 3.99,
"learning_rate": 0.00018052516411378556,
"loss": 0.5232,
"step": 1830
},
{
"epoch": 4.01,
"learning_rate": 0.0001798687089715536,
"loss": 0.4515,
"step": 1840
},
{
"epoch": 4.03,
"learning_rate": 0.00017921225382932165,
"loss": 0.4392,
"step": 1850
},
{
"epoch": 4.06,
"learning_rate": 0.0001785557986870897,
"loss": 0.4071,
"step": 1860
},
{
"epoch": 4.08,
"learning_rate": 0.00017789934354485776,
"loss": 0.4555,
"step": 1870
},
{
"epoch": 4.1,
"learning_rate": 0.0001772428884026258,
"loss": 0.39,
"step": 1880
},
{
"epoch": 4.12,
"learning_rate": 0.00017658643326039385,
"loss": 0.4553,
"step": 1890
},
{
"epoch": 4.14,
"learning_rate": 0.00017592997811816192,
"loss": 0.4471,
"step": 1900
},
{
"epoch": 4.14,
"eval_cer": 0.13007950530035337,
"eval_loss": 0.5131427645683289,
"eval_runtime": 76.8195,
"eval_samples_per_second": 25.71,
"eval_steps_per_second": 1.614,
"eval_wer": 0.2964933185803047,
"step": 1900
},
{
"epoch": 4.17,
"learning_rate": 0.00017527352297592994,
"loss": 0.4744,
"step": 1910
},
{
"epoch": 4.19,
"learning_rate": 0.000174617067833698,
"loss": 0.4303,
"step": 1920
},
{
"epoch": 4.21,
"learning_rate": 0.00017396061269146608,
"loss": 0.4447,
"step": 1930
},
{
"epoch": 4.23,
"learning_rate": 0.00017330415754923412,
"loss": 0.4543,
"step": 1940
},
{
"epoch": 4.25,
"learning_rate": 0.00017264770240700217,
"loss": 0.4768,
"step": 1950
},
{
"epoch": 4.27,
"learning_rate": 0.00017199124726477024,
"loss": 0.475,
"step": 1960
},
{
"epoch": 4.3,
"learning_rate": 0.00017133479212253828,
"loss": 0.4539,
"step": 1970
},
{
"epoch": 4.32,
"learning_rate": 0.00017067833698030632,
"loss": 0.42,
"step": 1980
},
{
"epoch": 4.34,
"learning_rate": 0.00017002188183807437,
"loss": 0.4361,
"step": 1990
},
{
"epoch": 4.36,
"learning_rate": 0.00016936542669584244,
"loss": 0.4455,
"step": 2000
},
{
"epoch": 4.36,
"eval_cer": 0.12781802120141342,
"eval_loss": 0.5015448331832886,
"eval_runtime": 76.7477,
"eval_samples_per_second": 25.734,
"eval_steps_per_second": 1.616,
"eval_wer": 0.2930887735126394,
"step": 2000
},
{
"epoch": 4.38,
"learning_rate": 0.00016870897155361048,
"loss": 0.4328,
"step": 2010
},
{
"epoch": 4.41,
"learning_rate": 0.00016805251641137853,
"loss": 0.4189,
"step": 2020
},
{
"epoch": 4.43,
"learning_rate": 0.0001673960612691466,
"loss": 0.4207,
"step": 2030
},
{
"epoch": 4.45,
"learning_rate": 0.00016673960612691467,
"loss": 0.4453,
"step": 2040
},
{
"epoch": 4.47,
"learning_rate": 0.00016608315098468268,
"loss": 0.4352,
"step": 2050
},
{
"epoch": 4.49,
"learning_rate": 0.00016542669584245076,
"loss": 0.4666,
"step": 2060
},
{
"epoch": 4.51,
"learning_rate": 0.00016477024070021883,
"loss": 0.4031,
"step": 2070
},
{
"epoch": 4.54,
"learning_rate": 0.00016411378555798684,
"loss": 0.3793,
"step": 2080
},
{
"epoch": 4.56,
"learning_rate": 0.00016345733041575491,
"loss": 0.4319,
"step": 2090
},
{
"epoch": 4.58,
"learning_rate": 0.00016280087527352296,
"loss": 0.4199,
"step": 2100
},
{
"epoch": 4.58,
"eval_cer": 0.12986749116607774,
"eval_loss": 0.49543923139572144,
"eval_runtime": 76.5642,
"eval_samples_per_second": 25.795,
"eval_steps_per_second": 1.62,
"eval_wer": 0.2962379777002298,
"step": 2100
},
{
"epoch": 4.6,
"learning_rate": 0.00016214442013129103,
"loss": 0.4658,
"step": 2110
},
{
"epoch": 4.62,
"learning_rate": 0.00016148796498905907,
"loss": 0.446,
"step": 2120
},
{
"epoch": 4.65,
"learning_rate": 0.00016083150984682712,
"loss": 0.3957,
"step": 2130
},
{
"epoch": 4.67,
"learning_rate": 0.0001601750547045952,
"loss": 0.4544,
"step": 2140
},
{
"epoch": 4.69,
"learning_rate": 0.0001595185995623632,
"loss": 0.4609,
"step": 2150
},
{
"epoch": 4.71,
"learning_rate": 0.00015886214442013127,
"loss": 0.4175,
"step": 2160
},
{
"epoch": 4.73,
"learning_rate": 0.00015820568927789935,
"loss": 0.4258,
"step": 2170
},
{
"epoch": 4.75,
"learning_rate": 0.00015754923413566736,
"loss": 0.4329,
"step": 2180
},
{
"epoch": 4.78,
"learning_rate": 0.00015689277899343543,
"loss": 0.4624,
"step": 2190
},
{
"epoch": 4.8,
"learning_rate": 0.0001562363238512035,
"loss": 0.4699,
"step": 2200
},
{
"epoch": 4.8,
"eval_cer": 0.12684628975265017,
"eval_loss": 0.48266056180000305,
"eval_runtime": 76.2619,
"eval_samples_per_second": 25.898,
"eval_steps_per_second": 1.626,
"eval_wer": 0.2890458762447868,
"step": 2200
},
{
"epoch": 4.82,
"learning_rate": 0.00015557986870897155,
"loss": 0.454,
"step": 2210
},
{
"epoch": 4.84,
"learning_rate": 0.0001549234135667396,
"loss": 0.433,
"step": 2220
},
{
"epoch": 4.86,
"learning_rate": 0.00015426695842450763,
"loss": 0.4222,
"step": 2230
},
{
"epoch": 4.89,
"learning_rate": 0.0001536105032822757,
"loss": 0.3791,
"step": 2240
},
{
"epoch": 4.91,
"learning_rate": 0.00015295404814004375,
"loss": 0.4325,
"step": 2250
},
{
"epoch": 4.93,
"learning_rate": 0.0001522975929978118,
"loss": 0.4396,
"step": 2260
},
{
"epoch": 4.95,
"learning_rate": 0.00015164113785557986,
"loss": 0.3989,
"step": 2270
},
{
"epoch": 4.97,
"learning_rate": 0.00015098468271334788,
"loss": 0.4068,
"step": 2280
},
{
"epoch": 4.99,
"learning_rate": 0.00015032822757111595,
"loss": 0.4422,
"step": 2290
},
{
"epoch": 5.02,
"learning_rate": 0.00014967177242888402,
"loss": 0.3521,
"step": 2300
},
{
"epoch": 5.02,
"eval_cer": 0.12168727915194347,
"eval_loss": 0.48572927713394165,
"eval_runtime": 76.3316,
"eval_samples_per_second": 25.874,
"eval_steps_per_second": 1.624,
"eval_wer": 0.27823644565494937,
"step": 2300
},
{
"epoch": 5.04,
"learning_rate": 0.00014901531728665207,
"loss": 0.4338,
"step": 2310
},
{
"epoch": 5.06,
"learning_rate": 0.00014835886214442014,
"loss": 0.3637,
"step": 2320
},
{
"epoch": 5.08,
"learning_rate": 0.00014770240700218818,
"loss": 0.4171,
"step": 2330
},
{
"epoch": 5.1,
"learning_rate": 0.00014704595185995622,
"loss": 0.3593,
"step": 2340
},
{
"epoch": 5.13,
"learning_rate": 0.00014638949671772427,
"loss": 0.4326,
"step": 2350
},
{
"epoch": 5.15,
"learning_rate": 0.00014573304157549234,
"loss": 0.3774,
"step": 2360
},
{
"epoch": 5.17,
"learning_rate": 0.00014507658643326038,
"loss": 0.3695,
"step": 2370
},
{
"epoch": 5.19,
"learning_rate": 0.00014442013129102843,
"loss": 0.3842,
"step": 2380
},
{
"epoch": 5.21,
"learning_rate": 0.00014376367614879647,
"loss": 0.3443,
"step": 2390
},
{
"epoch": 5.23,
"learning_rate": 0.00014310722100656454,
"loss": 0.3976,
"step": 2400
},
{
"epoch": 5.23,
"eval_cer": 0.12307420494699646,
"eval_loss": 0.493564635515213,
"eval_runtime": 76.5075,
"eval_samples_per_second": 25.814,
"eval_steps_per_second": 1.621,
"eval_wer": 0.2801940590688569,
"step": 2400
},
{
"epoch": 5.26,
"learning_rate": 0.00014245076586433258,
"loss": 0.3759,
"step": 2410
},
{
"epoch": 5.28,
"learning_rate": 0.00014179431072210066,
"loss": 0.3789,
"step": 2420
},
{
"epoch": 5.3,
"learning_rate": 0.0001411378555798687,
"loss": 0.366,
"step": 2430
},
{
"epoch": 5.32,
"learning_rate": 0.00014048140043763674,
"loss": 0.3961,
"step": 2440
},
{
"epoch": 5.34,
"learning_rate": 0.00013982494529540481,
"loss": 0.3863,
"step": 2450
},
{
"epoch": 5.37,
"learning_rate": 0.00013916849015317286,
"loss": 0.3987,
"step": 2460
},
{
"epoch": 5.39,
"learning_rate": 0.0001385120350109409,
"loss": 0.3788,
"step": 2470
},
{
"epoch": 5.41,
"learning_rate": 0.00013785557986870897,
"loss": 0.3735,
"step": 2480
},
{
"epoch": 5.43,
"learning_rate": 0.00013719912472647702,
"loss": 0.377,
"step": 2490
},
{
"epoch": 5.45,
"learning_rate": 0.00013654266958424506,
"loss": 0.365,
"step": 2500
},
{
"epoch": 5.45,
"eval_cer": 0.12206713780918728,
"eval_loss": 0.49057817459106445,
"eval_runtime": 76.5154,
"eval_samples_per_second": 25.812,
"eval_steps_per_second": 1.621,
"eval_wer": 0.2774278662013788,
"step": 2500
},
{
"epoch": 5.47,
"learning_rate": 0.0001358862144420131,
"loss": 0.4061,
"step": 2510
},
{
"epoch": 5.5,
"learning_rate": 0.00013522975929978117,
"loss": 0.3679,
"step": 2520
},
{
"epoch": 5.52,
"learning_rate": 0.00013457330415754922,
"loss": 0.399,
"step": 2530
},
{
"epoch": 5.54,
"learning_rate": 0.00013391684901531726,
"loss": 0.3358,
"step": 2540
},
{
"epoch": 5.56,
"learning_rate": 0.00013326039387308533,
"loss": 0.3544,
"step": 2550
},
{
"epoch": 5.58,
"learning_rate": 0.00013260393873085338,
"loss": 0.3766,
"step": 2560
},
{
"epoch": 5.61,
"learning_rate": 0.00013194748358862145,
"loss": 0.3696,
"step": 2570
},
{
"epoch": 5.63,
"learning_rate": 0.0001312910284463895,
"loss": 0.3834,
"step": 2580
},
{
"epoch": 5.65,
"learning_rate": 0.00013063457330415753,
"loss": 0.3654,
"step": 2590
},
{
"epoch": 5.67,
"learning_rate": 0.0001299781181619256,
"loss": 0.3857,
"step": 2600
},
{
"epoch": 5.67,
"eval_cer": 0.12015017667844523,
"eval_loss": 0.48429372906684875,
"eval_runtime": 76.9088,
"eval_samples_per_second": 25.68,
"eval_steps_per_second": 1.612,
"eval_wer": 0.2757255936675462,
"step": 2600
},
{
"epoch": 5.69,
"learning_rate": 0.00012932166301969365,
"loss": 0.4039,
"step": 2610
},
{
"epoch": 5.71,
"learning_rate": 0.0001286652078774617,
"loss": 0.3752,
"step": 2620
},
{
"epoch": 5.74,
"learning_rate": 0.00012800875273522974,
"loss": 0.4239,
"step": 2630
},
{
"epoch": 5.76,
"learning_rate": 0.00012735229759299778,
"loss": 0.3803,
"step": 2640
},
{
"epoch": 5.78,
"learning_rate": 0.00012669584245076585,
"loss": 0.3371,
"step": 2650
},
{
"epoch": 5.8,
"learning_rate": 0.0001260393873085339,
"loss": 0.3713,
"step": 2660
},
{
"epoch": 5.82,
"learning_rate": 0.00012538293216630197,
"loss": 0.4023,
"step": 2670
},
{
"epoch": 5.85,
"learning_rate": 0.00012472647702407,
"loss": 0.3747,
"step": 2680
},
{
"epoch": 5.87,
"learning_rate": 0.00012407002188183805,
"loss": 0.3474,
"step": 2690
},
{
"epoch": 5.89,
"learning_rate": 0.00012341356673960612,
"loss": 0.3578,
"step": 2700
},
{
"epoch": 5.89,
"eval_cer": 0.11956713780918728,
"eval_loss": 0.4857370853424072,
"eval_runtime": 77.8242,
"eval_samples_per_second": 25.378,
"eval_steps_per_second": 1.593,
"eval_wer": 0.27083156013277726,
"step": 2700
},
{
"epoch": 5.91,
"learning_rate": 0.00012275711159737417,
"loss": 0.3594,
"step": 2710
},
{
"epoch": 5.93,
"learning_rate": 0.00012210065645514224,
"loss": 0.4503,
"step": 2720
},
{
"epoch": 5.95,
"learning_rate": 0.00012144420131291027,
"loss": 0.3817,
"step": 2730
},
{
"epoch": 5.98,
"learning_rate": 0.00012078774617067834,
"loss": 0.3685,
"step": 2740
},
{
"epoch": 6.0,
"learning_rate": 0.00012013129102844638,
"loss": 0.3637,
"step": 2750
},
{
"epoch": 6.02,
"learning_rate": 0.00011947483588621443,
"loss": 0.3677,
"step": 2760
},
{
"epoch": 6.04,
"learning_rate": 0.00011881838074398248,
"loss": 0.3523,
"step": 2770
},
{
"epoch": 6.06,
"learning_rate": 0.00011816192560175053,
"loss": 0.3554,
"step": 2780
},
{
"epoch": 6.09,
"learning_rate": 0.0001175054704595186,
"loss": 0.3441,
"step": 2790
},
{
"epoch": 6.11,
"learning_rate": 0.00011684901531728664,
"loss": 0.3298,
"step": 2800
},
{
"epoch": 6.11,
"eval_cer": 0.11965547703180213,
"eval_loss": 0.4866645634174347,
"eval_runtime": 76.5936,
"eval_samples_per_second": 25.785,
"eval_steps_per_second": 1.619,
"eval_wer": 0.2688739467188697,
"step": 2800
},
{
"epoch": 6.13,
"learning_rate": 0.00011619256017505469,
"loss": 0.3301,
"step": 2810
},
{
"epoch": 6.15,
"learning_rate": 0.00011553610503282276,
"loss": 0.3277,
"step": 2820
},
{
"epoch": 6.17,
"learning_rate": 0.0001148796498905908,
"loss": 0.3392,
"step": 2830
},
{
"epoch": 6.19,
"learning_rate": 0.00011422319474835886,
"loss": 0.342,
"step": 2840
},
{
"epoch": 6.22,
"learning_rate": 0.0001135667396061269,
"loss": 0.3534,
"step": 2850
},
{
"epoch": 6.24,
"learning_rate": 0.00011291028446389495,
"loss": 0.3009,
"step": 2860
},
{
"epoch": 6.26,
"learning_rate": 0.00011225382932166302,
"loss": 0.3287,
"step": 2870
},
{
"epoch": 6.28,
"learning_rate": 0.00011159737417943106,
"loss": 0.3427,
"step": 2880
},
{
"epoch": 6.3,
"learning_rate": 0.00011094091903719912,
"loss": 0.3959,
"step": 2890
},
{
"epoch": 6.32,
"learning_rate": 0.00011028446389496716,
"loss": 0.3099,
"step": 2900
},
{
"epoch": 6.32,
"eval_cer": 0.12365724381625441,
"eval_loss": 0.49239638447761536,
"eval_runtime": 77.6773,
"eval_samples_per_second": 25.426,
"eval_steps_per_second": 1.596,
"eval_wer": 0.27695974125457484,
"step": 2900
},
{
"epoch": 6.35,
"learning_rate": 0.00010962800875273522,
"loss": 0.364,
"step": 2910
},
{
"epoch": 6.37,
"learning_rate": 0.00010897155361050328,
"loss": 0.3946,
"step": 2920
},
{
"epoch": 6.39,
"learning_rate": 0.00010831509846827132,
"loss": 0.3134,
"step": 2930
},
{
"epoch": 6.41,
"learning_rate": 0.00010765864332603939,
"loss": 0.3433,
"step": 2940
},
{
"epoch": 6.43,
"learning_rate": 0.00010700218818380743,
"loss": 0.3756,
"step": 2950
},
{
"epoch": 6.46,
"learning_rate": 0.00010634573304157548,
"loss": 0.3307,
"step": 2960
},
{
"epoch": 6.48,
"learning_rate": 0.00010568927789934354,
"loss": 0.3647,
"step": 2970
},
{
"epoch": 6.5,
"learning_rate": 0.00010503282275711158,
"loss": 0.3617,
"step": 2980
},
{
"epoch": 6.52,
"learning_rate": 0.00010437636761487965,
"loss": 0.3632,
"step": 2990
},
{
"epoch": 6.54,
"learning_rate": 0.0001037199124726477,
"loss": 0.3606,
"step": 3000
},
{
"epoch": 6.54,
"eval_cer": 0.11892226148409894,
"eval_loss": 0.48514822125434875,
"eval_runtime": 76.2961,
"eval_samples_per_second": 25.886,
"eval_steps_per_second": 1.625,
"eval_wer": 0.26840582177206573,
"step": 3000
},
{
"epoch": 6.56,
"learning_rate": 0.00010306345733041574,
"loss": 0.326,
"step": 3010
},
{
"epoch": 6.59,
"learning_rate": 0.0001024070021881838,
"loss": 0.3282,
"step": 3020
},
{
"epoch": 6.61,
"learning_rate": 0.00010175054704595185,
"loss": 0.3225,
"step": 3030
},
{
"epoch": 6.63,
"learning_rate": 0.00010109409190371991,
"loss": 0.3726,
"step": 3040
},
{
"epoch": 6.65,
"learning_rate": 0.00010043763676148795,
"loss": 0.3231,
"step": 3050
},
{
"epoch": 6.67,
"learning_rate": 9.9781181619256e-05,
"loss": 0.3161,
"step": 3060
},
{
"epoch": 6.7,
"learning_rate": 9.912472647702407e-05,
"loss": 0.3354,
"step": 3070
},
{
"epoch": 6.72,
"learning_rate": 9.846827133479211e-05,
"loss": 0.3628,
"step": 3080
},
{
"epoch": 6.74,
"learning_rate": 9.781181619256017e-05,
"loss": 0.372,
"step": 3090
},
{
"epoch": 6.76,
"learning_rate": 9.715536105032821e-05,
"loss": 0.3807,
"step": 3100
},
{
"epoch": 6.76,
"eval_cer": 0.11962897526501767,
"eval_loss": 0.47002533078193665,
"eval_runtime": 76.3383,
"eval_samples_per_second": 25.872,
"eval_steps_per_second": 1.624,
"eval_wer": 0.2655970720912418,
"step": 3100
},
{
"epoch": 6.78,
"learning_rate": 9.649890590809627e-05,
"loss": 0.3163,
"step": 3110
},
{
"epoch": 6.8,
"learning_rate": 9.584245076586433e-05,
"loss": 0.3699,
"step": 3120
},
{
"epoch": 6.83,
"learning_rate": 9.518599562363237e-05,
"loss": 0.3421,
"step": 3130
},
{
"epoch": 6.85,
"learning_rate": 9.452954048140043e-05,
"loss": 0.2966,
"step": 3140
},
{
"epoch": 6.87,
"learning_rate": 9.387308533916849e-05,
"loss": 0.3197,
"step": 3150
},
{
"epoch": 6.89,
"learning_rate": 9.321663019693654e-05,
"loss": 0.325,
"step": 3160
},
{
"epoch": 6.91,
"learning_rate": 9.256017505470459e-05,
"loss": 0.3375,
"step": 3170
},
{
"epoch": 6.94,
"learning_rate": 9.190371991247263e-05,
"loss": 0.3241,
"step": 3180
},
{
"epoch": 6.96,
"learning_rate": 9.12472647702407e-05,
"loss": 0.2941,
"step": 3190
},
{
"epoch": 6.98,
"learning_rate": 9.059080962800874e-05,
"loss": 0.3286,
"step": 3200
},
{
"epoch": 6.98,
"eval_cer": 0.12047703180212015,
"eval_loss": 0.47703927755355835,
"eval_runtime": 76.5362,
"eval_samples_per_second": 25.805,
"eval_steps_per_second": 1.62,
"eval_wer": 0.2730445144267597,
"step": 3200
},
{
"epoch": 7.0,
"learning_rate": 8.99343544857768e-05,
"loss": 0.3453,
"step": 3210
},
{
"epoch": 7.02,
"learning_rate": 8.927789934354485e-05,
"loss": 0.3261,
"step": 3220
},
{
"epoch": 7.04,
"learning_rate": 8.86214442013129e-05,
"loss": 0.294,
"step": 3230
},
{
"epoch": 7.07,
"learning_rate": 8.796498905908096e-05,
"loss": 0.2559,
"step": 3240
},
{
"epoch": 7.09,
"learning_rate": 8.7308533916849e-05,
"loss": 0.3186,
"step": 3250
},
{
"epoch": 7.11,
"learning_rate": 8.665207877461706e-05,
"loss": 0.3612,
"step": 3260
},
{
"epoch": 7.13,
"learning_rate": 8.599562363238512e-05,
"loss": 0.3232,
"step": 3270
},
{
"epoch": 7.15,
"learning_rate": 8.533916849015316e-05,
"loss": 0.2976,
"step": 3280
},
{
"epoch": 7.18,
"learning_rate": 8.468271334792122e-05,
"loss": 0.3237,
"step": 3290
},
{
"epoch": 7.2,
"learning_rate": 8.402625820568926e-05,
"loss": 0.3318,
"step": 3300
},
{
"epoch": 7.2,
"eval_cer": 0.11656360424028268,
"eval_loss": 0.4844971299171448,
"eval_runtime": 76.5793,
"eval_samples_per_second": 25.79,
"eval_steps_per_second": 1.619,
"eval_wer": 0.25785173206230316,
"step": 3300
},
{
"epoch": 7.22,
"learning_rate": 8.336980306345733e-05,
"loss": 0.3216,
"step": 3310
},
{
"epoch": 7.24,
"learning_rate": 8.271334792122538e-05,
"loss": 0.2986,
"step": 3320
},
{
"epoch": 7.26,
"learning_rate": 8.205689277899342e-05,
"loss": 0.3111,
"step": 3330
},
{
"epoch": 7.28,
"learning_rate": 8.140043763676148e-05,
"loss": 0.3067,
"step": 3340
},
{
"epoch": 7.31,
"learning_rate": 8.074398249452954e-05,
"loss": 0.2697,
"step": 3350
},
{
"epoch": 7.33,
"learning_rate": 8.00875273522976e-05,
"loss": 0.2998,
"step": 3360
},
{
"epoch": 7.35,
"learning_rate": 7.943107221006564e-05,
"loss": 0.3187,
"step": 3370
},
{
"epoch": 7.37,
"learning_rate": 7.877461706783368e-05,
"loss": 0.3361,
"step": 3380
},
{
"epoch": 7.39,
"learning_rate": 7.811816192560175e-05,
"loss": 0.2848,
"step": 3390
},
{
"epoch": 7.42,
"learning_rate": 7.74617067833698e-05,
"loss": 0.2936,
"step": 3400
},
{
"epoch": 7.42,
"eval_cer": 0.11591872791519435,
"eval_loss": 0.49088254570961,
"eval_runtime": 76.7751,
"eval_samples_per_second": 25.724,
"eval_steps_per_second": 1.615,
"eval_wer": 0.25700059579538687,
"step": 3400
},
{
"epoch": 7.44,
"learning_rate": 7.680525164113785e-05,
"loss": 0.2901,
"step": 3410
},
{
"epoch": 7.46,
"learning_rate": 7.61487964989059e-05,
"loss": 0.2998,
"step": 3420
},
{
"epoch": 7.48,
"learning_rate": 7.549234135667394e-05,
"loss": 0.2743,
"step": 3430
},
{
"epoch": 7.5,
"learning_rate": 7.483588621444201e-05,
"loss": 0.3297,
"step": 3440
},
{
"epoch": 7.52,
"learning_rate": 7.417943107221007e-05,
"loss": 0.2912,
"step": 3450
},
{
"epoch": 7.55,
"learning_rate": 7.352297592997811e-05,
"loss": 0.2884,
"step": 3460
},
{
"epoch": 7.57,
"learning_rate": 7.286652078774617e-05,
"loss": 0.2948,
"step": 3470
},
{
"epoch": 7.59,
"learning_rate": 7.221006564551421e-05,
"loss": 0.2908,
"step": 3480
},
{
"epoch": 7.61,
"learning_rate": 7.155361050328227e-05,
"loss": 0.3031,
"step": 3490
},
{
"epoch": 7.63,
"learning_rate": 7.089715536105033e-05,
"loss": 0.3119,
"step": 3500
},
{
"epoch": 7.63,
"eval_cer": 0.11498233215547703,
"eval_loss": 0.4898751676082611,
"eval_runtime": 77.9528,
"eval_samples_per_second": 25.336,
"eval_steps_per_second": 1.591,
"eval_wer": 0.253936505234488,
"step": 3500
},
{
"epoch": 7.66,
"learning_rate": 7.024070021881837e-05,
"loss": 0.2985,
"step": 3510
},
{
"epoch": 7.68,
"learning_rate": 6.958424507658643e-05,
"loss": 0.2669,
"step": 3520
},
{
"epoch": 7.7,
"learning_rate": 6.892778993435449e-05,
"loss": 0.3468,
"step": 3530
},
{
"epoch": 7.72,
"learning_rate": 6.827133479212253e-05,
"loss": 0.2897,
"step": 3540
},
{
"epoch": 7.74,
"learning_rate": 6.761487964989059e-05,
"loss": 0.301,
"step": 3550
},
{
"epoch": 7.76,
"learning_rate": 6.695842450765863e-05,
"loss": 0.2838,
"step": 3560
},
{
"epoch": 7.79,
"learning_rate": 6.630196936542669e-05,
"loss": 0.332,
"step": 3570
},
{
"epoch": 7.81,
"learning_rate": 6.564551422319475e-05,
"loss": 0.3232,
"step": 3580
},
{
"epoch": 7.83,
"learning_rate": 6.49890590809628e-05,
"loss": 0.3118,
"step": 3590
},
{
"epoch": 7.85,
"learning_rate": 6.433260393873085e-05,
"loss": 0.3142,
"step": 3600
},
{
"epoch": 7.85,
"eval_cer": 0.11430212014134275,
"eval_loss": 0.4782133996486664,
"eval_runtime": 76.4776,
"eval_samples_per_second": 25.825,
"eval_steps_per_second": 1.621,
"eval_wer": 0.25495786875478765,
"step": 3600
},
{
"epoch": 7.87,
"learning_rate": 6.367614879649889e-05,
"loss": 0.3406,
"step": 3610
},
{
"epoch": 7.9,
"learning_rate": 6.301969365426695e-05,
"loss": 0.3147,
"step": 3620
},
{
"epoch": 7.92,
"learning_rate": 6.2363238512035e-05,
"loss": 0.3376,
"step": 3630
},
{
"epoch": 7.94,
"learning_rate": 6.170678336980306e-05,
"loss": 0.3083,
"step": 3640
},
{
"epoch": 7.96,
"learning_rate": 6.105032822757112e-05,
"loss": 0.2793,
"step": 3650
},
{
"epoch": 7.98,
"learning_rate": 6.039387308533917e-05,
"loss": 0.3022,
"step": 3660
},
{
"epoch": 8.0,
"learning_rate": 5.9737417943107214e-05,
"loss": 0.2657,
"step": 3670
},
{
"epoch": 8.03,
"learning_rate": 5.9080962800875264e-05,
"loss": 0.2794,
"step": 3680
},
{
"epoch": 8.05,
"learning_rate": 5.842450765864332e-05,
"loss": 0.2583,
"step": 3690
},
{
"epoch": 8.07,
"learning_rate": 5.776805251641138e-05,
"loss": 0.2935,
"step": 3700
},
{
"epoch": 8.07,
"eval_cer": 0.11532685512367491,
"eval_loss": 0.4885237514972687,
"eval_runtime": 77.8857,
"eval_samples_per_second": 25.358,
"eval_steps_per_second": 1.592,
"eval_wer": 0.25270235764745935,
"step": 3700
},
{
"epoch": 8.09,
"learning_rate": 5.711159737417943e-05,
"loss": 0.2726,
"step": 3710
},
{
"epoch": 8.11,
"learning_rate": 5.645514223194747e-05,
"loss": 0.2831,
"step": 3720
},
{
"epoch": 8.14,
"learning_rate": 5.579868708971553e-05,
"loss": 0.2656,
"step": 3730
},
{
"epoch": 8.16,
"learning_rate": 5.514223194748358e-05,
"loss": 0.2792,
"step": 3740
},
{
"epoch": 8.18,
"learning_rate": 5.448577680525164e-05,
"loss": 0.268,
"step": 3750
},
{
"epoch": 8.2,
"learning_rate": 5.3829321663019695e-05,
"loss": 0.2779,
"step": 3760
},
{
"epoch": 8.22,
"learning_rate": 5.317286652078774e-05,
"loss": 0.2782,
"step": 3770
},
{
"epoch": 8.24,
"learning_rate": 5.251641137855579e-05,
"loss": 0.3074,
"step": 3780
},
{
"epoch": 8.27,
"learning_rate": 5.185995623632385e-05,
"loss": 0.262,
"step": 3790
},
{
"epoch": 8.29,
"learning_rate": 5.12035010940919e-05,
"loss": 0.2805,
"step": 3800
},
{
"epoch": 8.29,
"eval_cer": 0.11428445229681979,
"eval_loss": 0.49059855937957764,
"eval_runtime": 76.8475,
"eval_samples_per_second": 25.7,
"eval_steps_per_second": 1.614,
"eval_wer": 0.25287258490084263,
"step": 3800
},
{
"epoch": 8.31,
"learning_rate": 5.0547045951859955e-05,
"loss": 0.2733,
"step": 3810
},
{
"epoch": 8.33,
"learning_rate": 4.9890590809628e-05,
"loss": 0.2947,
"step": 3820
},
{
"epoch": 8.35,
"learning_rate": 4.9234135667396056e-05,
"loss": 0.2963,
"step": 3830
},
{
"epoch": 8.38,
"learning_rate": 4.8577680525164106e-05,
"loss": 0.286,
"step": 3840
},
{
"epoch": 8.4,
"learning_rate": 4.7921225382932163e-05,
"loss": 0.2971,
"step": 3850
},
{
"epoch": 8.42,
"learning_rate": 4.7264770240700214e-05,
"loss": 0.2492,
"step": 3860
},
{
"epoch": 8.44,
"learning_rate": 4.660831509846827e-05,
"loss": 0.2564,
"step": 3870
},
{
"epoch": 8.46,
"learning_rate": 4.5951859956236315e-05,
"loss": 0.2787,
"step": 3880
},
{
"epoch": 8.48,
"learning_rate": 4.529540481400437e-05,
"loss": 0.2799,
"step": 3890
},
{
"epoch": 8.51,
"learning_rate": 4.463894967177242e-05,
"loss": 0.254,
"step": 3900
},
{
"epoch": 8.51,
"eval_cer": 0.1143904593639576,
"eval_loss": 0.48224031925201416,
"eval_runtime": 76.6309,
"eval_samples_per_second": 25.773,
"eval_steps_per_second": 1.618,
"eval_wer": 0.2537662779811048,
"step": 3900
},
{
"epoch": 8.53,
"learning_rate": 4.398249452954048e-05,
"loss": 0.2835,
"step": 3910
},
{
"epoch": 8.55,
"learning_rate": 4.332603938730853e-05,
"loss": 0.3104,
"step": 3920
},
{
"epoch": 8.57,
"learning_rate": 4.266958424507658e-05,
"loss": 0.2795,
"step": 3930
},
{
"epoch": 8.59,
"learning_rate": 4.201312910284463e-05,
"loss": 0.2655,
"step": 3940
},
{
"epoch": 8.62,
"learning_rate": 4.135667396061269e-05,
"loss": 0.2652,
"step": 3950
},
{
"epoch": 8.64,
"learning_rate": 4.070021881838074e-05,
"loss": 0.2679,
"step": 3960
},
{
"epoch": 8.66,
"learning_rate": 4.00437636761488e-05,
"loss": 0.2776,
"step": 3970
},
{
"epoch": 8.68,
"learning_rate": 3.938730853391684e-05,
"loss": 0.3042,
"step": 3980
},
{
"epoch": 8.7,
"learning_rate": 3.87308533916849e-05,
"loss": 0.2951,
"step": 3990
},
{
"epoch": 8.72,
"learning_rate": 3.807439824945295e-05,
"loss": 0.2855,
"step": 4000
},
{
"epoch": 8.72,
"eval_cer": 0.11234098939929328,
"eval_loss": 0.4852147400379181,
"eval_runtime": 77.8636,
"eval_samples_per_second": 25.365,
"eval_steps_per_second": 1.593,
"eval_wer": 0.24755298323261554,
"step": 4000
},
{
"epoch": 8.75,
"learning_rate": 3.7417943107221006e-05,
"loss": 0.279,
"step": 4010
},
{
"epoch": 8.77,
"learning_rate": 3.6761487964989056e-05,
"loss": 0.2916,
"step": 4020
},
{
"epoch": 8.79,
"learning_rate": 3.6105032822757107e-05,
"loss": 0.3098,
"step": 4030
},
{
"epoch": 8.81,
"learning_rate": 3.5448577680525164e-05,
"loss": 0.2907,
"step": 4040
},
{
"epoch": 8.83,
"learning_rate": 3.4792122538293214e-05,
"loss": 0.2496,
"step": 4050
},
{
"epoch": 8.85,
"learning_rate": 3.4135667396061265e-05,
"loss": 0.2618,
"step": 4060
},
{
"epoch": 8.88,
"learning_rate": 3.3479212253829315e-05,
"loss": 0.2516,
"step": 4070
},
{
"epoch": 8.9,
"learning_rate": 3.282275711159737e-05,
"loss": 0.2721,
"step": 4080
},
{
"epoch": 8.92,
"learning_rate": 3.216630196936542e-05,
"loss": 0.3041,
"step": 4090
},
{
"epoch": 8.94,
"learning_rate": 3.1509846827133474e-05,
"loss": 0.2661,
"step": 4100
},
{
"epoch": 8.94,
"eval_cer": 0.11321554770318021,
"eval_loss": 0.4847288429737091,
"eval_runtime": 76.8119,
"eval_samples_per_second": 25.712,
"eval_steps_per_second": 1.614,
"eval_wer": 0.24963826708656056,
"step": 4100
},
{
"epoch": 8.96,
"learning_rate": 3.085339168490153e-05,
"loss": 0.2588,
"step": 4110
},
{
"epoch": 8.99,
"learning_rate": 3.0196936542669585e-05,
"loss": 0.2807,
"step": 4120
},
{
"epoch": 9.01,
"learning_rate": 2.9540481400437632e-05,
"loss": 0.2572,
"step": 4130
},
{
"epoch": 9.03,
"learning_rate": 2.888402625820569e-05,
"loss": 0.2789,
"step": 4140
},
{
"epoch": 9.05,
"learning_rate": 2.8227571115973736e-05,
"loss": 0.234,
"step": 4150
},
{
"epoch": 9.07,
"learning_rate": 2.757111597374179e-05,
"loss": 0.2443,
"step": 4160
},
{
"epoch": 9.09,
"learning_rate": 2.6914660831509848e-05,
"loss": 0.2936,
"step": 4170
},
{
"epoch": 9.12,
"learning_rate": 2.6258205689277895e-05,
"loss": 0.2776,
"step": 4180
},
{
"epoch": 9.14,
"learning_rate": 2.560175054704595e-05,
"loss": 0.2588,
"step": 4190
},
{
"epoch": 9.16,
"learning_rate": 2.4945295404814e-05,
"loss": 0.2524,
"step": 4200
},
{
"epoch": 9.16,
"eval_cer": 0.11163427561837456,
"eval_loss": 0.4900279939174652,
"eval_runtime": 77.608,
"eval_samples_per_second": 25.448,
"eval_steps_per_second": 1.598,
"eval_wer": 0.24419099497829602,
"step": 4200
},
{
"epoch": 9.18,
"learning_rate": 2.4288840262582053e-05,
"loss": 0.275,
"step": 4210
},
{
"epoch": 9.2,
"learning_rate": 2.3632385120350107e-05,
"loss": 0.2859,
"step": 4220
},
{
"epoch": 9.23,
"learning_rate": 2.2975929978118158e-05,
"loss": 0.2422,
"step": 4230
},
{
"epoch": 9.25,
"learning_rate": 2.231947483588621e-05,
"loss": 0.2835,
"step": 4240
},
{
"epoch": 9.27,
"learning_rate": 2.1663019693654265e-05,
"loss": 0.2505,
"step": 4250
},
{
"epoch": 9.29,
"learning_rate": 2.1006564551422316e-05,
"loss": 0.2413,
"step": 4260
},
{
"epoch": 9.31,
"learning_rate": 2.035010940919037e-05,
"loss": 0.2737,
"step": 4270
},
{
"epoch": 9.33,
"learning_rate": 1.969365426695842e-05,
"loss": 0.2544,
"step": 4280
},
{
"epoch": 9.36,
"learning_rate": 1.9037199124726474e-05,
"loss": 0.258,
"step": 4290
},
{
"epoch": 9.38,
"learning_rate": 1.8380743982494528e-05,
"loss": 0.253,
"step": 4300
},
{
"epoch": 9.38,
"eval_cer": 0.11195229681978798,
"eval_loss": 0.4887804090976715,
"eval_runtime": 76.736,
"eval_samples_per_second": 25.738,
"eval_steps_per_second": 1.616,
"eval_wer": 0.24580815388543706,
"step": 4300
},
{
"epoch": 9.4,
"learning_rate": 1.7724288840262582e-05,
"loss": 0.2478,
"step": 4310
},
{
"epoch": 9.42,
"learning_rate": 1.7067833698030632e-05,
"loss": 0.243,
"step": 4320
},
{
"epoch": 9.44,
"learning_rate": 1.6411378555798686e-05,
"loss": 0.2583,
"step": 4330
},
{
"epoch": 9.47,
"learning_rate": 1.5754923413566737e-05,
"loss": 0.265,
"step": 4340
},
{
"epoch": 9.49,
"learning_rate": 1.5098468271334792e-05,
"loss": 0.247,
"step": 4350
},
{
"epoch": 9.51,
"learning_rate": 1.4442013129102845e-05,
"loss": 0.2825,
"step": 4360
},
{
"epoch": 9.53,
"learning_rate": 1.3785557986870895e-05,
"loss": 0.2637,
"step": 4370
},
{
"epoch": 9.55,
"learning_rate": 1.3129102844638947e-05,
"loss": 0.251,
"step": 4380
},
{
"epoch": 9.57,
"learning_rate": 1.2472647702407e-05,
"loss": 0.2831,
"step": 4390
},
{
"epoch": 9.6,
"learning_rate": 1.1816192560175053e-05,
"loss": 0.2591,
"step": 4400
},
{
"epoch": 9.6,
"eval_cer": 0.11252650176678446,
"eval_loss": 0.48128968477249146,
"eval_runtime": 78.3767,
"eval_samples_per_second": 25.199,
"eval_steps_per_second": 1.582,
"eval_wer": 0.24576559707209125,
"step": 4400
},
{
"epoch": 9.62,
"learning_rate": 1.1159737417943106e-05,
"loss": 0.2567,
"step": 4410
},
{
"epoch": 9.64,
"learning_rate": 1.0503282275711158e-05,
"loss": 0.2358,
"step": 4420
},
{
"epoch": 9.66,
"learning_rate": 9.84682713347921e-06,
"loss": 0.2817,
"step": 4430
},
{
"epoch": 9.68,
"learning_rate": 9.190371991247264e-06,
"loss": 0.2499,
"step": 4440
},
{
"epoch": 9.71,
"learning_rate": 8.533916849015316e-06,
"loss": 0.2441,
"step": 4450
},
{
"epoch": 9.73,
"learning_rate": 7.877461706783368e-06,
"loss": 0.246,
"step": 4460
},
{
"epoch": 9.75,
"learning_rate": 7.221006564551422e-06,
"loss": 0.243,
"step": 4470
},
{
"epoch": 9.77,
"learning_rate": 6.564551422319474e-06,
"loss": 0.2746,
"step": 4480
},
{
"epoch": 9.79,
"learning_rate": 5.908096280087527e-06,
"loss": 0.2386,
"step": 4490
},
{
"epoch": 9.81,
"learning_rate": 5.251641137855579e-06,
"loss": 0.2583,
"step": 4500
},
{
"epoch": 9.81,
"eval_cer": 0.11135159010600706,
"eval_loss": 0.48441562056541443,
"eval_runtime": 76.831,
"eval_samples_per_second": 25.706,
"eval_steps_per_second": 1.614,
"eval_wer": 0.24346752915141714,
"step": 4500
},
{
"epoch": 9.84,
"learning_rate": 4.595185995623632e-06,
"loss": 0.2616,
"step": 4510
},
{
"epoch": 9.86,
"learning_rate": 3.938730853391684e-06,
"loss": 0.2747,
"step": 4520
},
{
"epoch": 9.88,
"learning_rate": 3.282275711159737e-06,
"loss": 0.2566,
"step": 4530
},
{
"epoch": 9.9,
"learning_rate": 2.6258205689277895e-06,
"loss": 0.3137,
"step": 4540
},
{
"epoch": 9.92,
"learning_rate": 1.969365426695842e-06,
"loss": 0.2975,
"step": 4550
},
{
"epoch": 9.95,
"learning_rate": 1.3129102844638947e-06,
"loss": 0.2599,
"step": 4560
},
{
"epoch": 9.97,
"learning_rate": 6.564551422319474e-07,
"loss": 0.2785,
"step": 4570
},
{
"epoch": 9.99,
"learning_rate": 0.0,
"loss": 0.2543,
"step": 4580
},
{
"epoch": 9.99,
"step": 4580,
"total_flos": 1.5922287912029258e+19,
"train_loss": 0.6996097780211004,
"train_runtime": 9428.0567,
"train_samples_per_second": 15.551,
"train_steps_per_second": 0.486
}
],
"logging_steps": 10,
"max_steps": 4580,
"num_train_epochs": 10,
"save_steps": 100,
"total_flos": 1.5922287912029258e+19,
"trial_name": null,
"trial_params": null
}