wav2vec2-xlsr-53-ft-btb-ccv-cy / trainer_state.json
DewiBrynJones's picture
End of training
c91eef2 verified
raw
history blame
33.5 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.7858854964831624,
"eval_steps": 200,
"global_step": 20000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.007858854964831625,
"eval_loss": 3.1856138706207275,
"eval_runtime": 145.9906,
"eval_samples_per_second": 38.742,
"eval_steps_per_second": 4.843,
"eval_wer": 1.0,
"step": 200
},
{
"epoch": 0.01571770992966325,
"eval_loss": 2.649242877960205,
"eval_runtime": 145.4142,
"eval_samples_per_second": 38.896,
"eval_steps_per_second": 4.862,
"eval_wer": 1.0,
"step": 400
},
{
"epoch": 0.01964713741207906,
"grad_norm": 2.1910934448242188,
"learning_rate": 0.0002982,
"loss": 4.6997,
"step": 500
},
{
"epoch": 0.023576564894494872,
"eval_loss": 1.3868569135665894,
"eval_runtime": 144.4791,
"eval_samples_per_second": 39.148,
"eval_steps_per_second": 4.893,
"eval_wer": 0.8722215981126927,
"step": 600
},
{
"epoch": 0.0314354198593265,
"eval_loss": 1.230230689048767,
"eval_runtime": 145.2382,
"eval_samples_per_second": 38.943,
"eval_steps_per_second": 4.868,
"eval_wer": 0.830768243167338,
"step": 800
},
{
"epoch": 0.03929427482415812,
"grad_norm": 4.400168418884277,
"learning_rate": 0.0002923538461538461,
"loss": 1.0569,
"step": 1000
},
{
"epoch": 0.03929427482415812,
"eval_loss": 1.1379698514938354,
"eval_runtime": 146.1828,
"eval_samples_per_second": 38.691,
"eval_steps_per_second": 4.836,
"eval_wer": 0.795846640240086,
"step": 1000
},
{
"epoch": 0.047153129788989744,
"eval_loss": 1.066832184791565,
"eval_runtime": 145.7914,
"eval_samples_per_second": 38.795,
"eval_steps_per_second": 4.849,
"eval_wer": 0.7697998748214601,
"step": 1200
},
{
"epoch": 0.055011984753821366,
"eval_loss": 1.0207505226135254,
"eval_runtime": 146.2293,
"eval_samples_per_second": 38.679,
"eval_steps_per_second": 4.835,
"eval_wer": 0.7310426730432829,
"step": 1400
},
{
"epoch": 0.05894141223623718,
"grad_norm": 3.929137945175171,
"learning_rate": 0.00028466153846153845,
"loss": 0.8131,
"step": 1500
},
{
"epoch": 0.062870839718653,
"eval_loss": 0.970230758190155,
"eval_runtime": 145.9066,
"eval_samples_per_second": 38.765,
"eval_steps_per_second": 4.846,
"eval_wer": 0.7151385790630868,
"step": 1600
},
{
"epoch": 0.07072969468348461,
"eval_loss": 0.9408352375030518,
"eval_runtime": 146.5767,
"eval_samples_per_second": 38.587,
"eval_steps_per_second": 4.823,
"eval_wer": 0.6882091444528253,
"step": 1800
},
{
"epoch": 0.07858854964831624,
"grad_norm": 2.3885908126831055,
"learning_rate": 0.0002769692307692307,
"loss": 0.7194,
"step": 2000
},
{
"epoch": 0.07858854964831624,
"eval_loss": 0.9249575138092041,
"eval_runtime": 145.9151,
"eval_samples_per_second": 38.762,
"eval_steps_per_second": 4.845,
"eval_wer": 0.6804095585049189,
"step": 2000
},
{
"epoch": 0.08644740461314787,
"eval_loss": 0.9052397608757019,
"eval_runtime": 146.242,
"eval_samples_per_second": 38.676,
"eval_steps_per_second": 4.834,
"eval_wer": 0.6726099725570124,
"step": 2200
},
{
"epoch": 0.09430625957797949,
"eval_loss": 0.8985734581947327,
"eval_runtime": 146.806,
"eval_samples_per_second": 38.527,
"eval_steps_per_second": 4.816,
"eval_wer": 0.6573478198070967,
"step": 2400
},
{
"epoch": 0.0982356870603953,
"grad_norm": 1.7855921983718872,
"learning_rate": 0.00026927692307692305,
"loss": 0.6688,
"step": 2500
},
{
"epoch": 0.10216511454281112,
"eval_loss": 0.8814770579338074,
"eval_runtime": 147.0906,
"eval_samples_per_second": 38.452,
"eval_steps_per_second": 4.807,
"eval_wer": 0.6473495851454799,
"step": 2600
},
{
"epoch": 0.11002396950764273,
"eval_loss": 0.858833372592926,
"eval_runtime": 146.5037,
"eval_samples_per_second": 38.607,
"eval_steps_per_second": 4.826,
"eval_wer": 0.6444608496092182,
"step": 2800
},
{
"epoch": 0.11788282447247436,
"grad_norm": 2.42130184173584,
"learning_rate": 0.00026158461538461537,
"loss": 0.645,
"step": 3000
},
{
"epoch": 0.11788282447247436,
"eval_loss": 0.875824511051178,
"eval_runtime": 147.7065,
"eval_samples_per_second": 38.292,
"eval_steps_per_second": 4.787,
"eval_wer": 0.6487458073213397,
"step": 3000
},
{
"epoch": 0.125741679437306,
"eval_loss": 0.8724836707115173,
"eval_runtime": 146.8488,
"eval_samples_per_second": 38.516,
"eval_steps_per_second": 4.814,
"eval_wer": 0.6690953443212274,
"step": 3200
},
{
"epoch": 0.13360053440213762,
"eval_loss": 0.8295639157295227,
"eval_runtime": 147.6357,
"eval_samples_per_second": 38.311,
"eval_steps_per_second": 4.789,
"eval_wer": 0.6298085410280689,
"step": 3400
},
{
"epoch": 0.1375299618845534,
"grad_norm": 3.927525520324707,
"learning_rate": 0.0002538923076923077,
"loss": 0.6077,
"step": 3500
},
{
"epoch": 0.14145938936696922,
"eval_loss": 0.8355618715286255,
"eval_runtime": 147.0713,
"eval_samples_per_second": 38.458,
"eval_steps_per_second": 4.807,
"eval_wer": 0.6551652196241434,
"step": 3600
},
{
"epoch": 0.14931824433180085,
"eval_loss": 0.8262892961502075,
"eval_runtime": 147.1539,
"eval_samples_per_second": 38.436,
"eval_steps_per_second": 4.804,
"eval_wer": 0.6228595272102839,
"step": 3800
},
{
"epoch": 0.15717709929663248,
"grad_norm": 2.224973201751709,
"learning_rate": 0.00024619999999999997,
"loss": 0.5983,
"step": 4000
},
{
"epoch": 0.15717709929663248,
"eval_loss": 0.8710989356040955,
"eval_runtime": 147.2188,
"eval_samples_per_second": 38.419,
"eval_steps_per_second": 4.802,
"eval_wer": 0.6884659209449375,
"step": 4000
},
{
"epoch": 0.1650359542614641,
"eval_loss": 0.7836620807647705,
"eval_runtime": 148.3617,
"eval_samples_per_second": 38.123,
"eval_steps_per_second": 4.765,
"eval_wer": 0.5918216687262281,
"step": 4200
},
{
"epoch": 0.17289480922629574,
"eval_loss": 0.8097087144851685,
"eval_runtime": 147.4155,
"eval_samples_per_second": 38.368,
"eval_steps_per_second": 4.796,
"eval_wer": 0.659755099420648,
"step": 4400
},
{
"epoch": 0.17682423670871153,
"grad_norm": 1.8362923860549927,
"learning_rate": 0.0002385230769230769,
"loss": 0.5788,
"step": 4500
},
{
"epoch": 0.18075366419112734,
"eval_loss": 0.77768874168396,
"eval_runtime": 146.8579,
"eval_samples_per_second": 38.513,
"eval_steps_per_second": 4.814,
"eval_wer": 0.5869268668453403,
"step": 4600
},
{
"epoch": 0.18861251915595897,
"eval_loss": 0.7912825345993042,
"eval_runtime": 146.176,
"eval_samples_per_second": 38.693,
"eval_steps_per_second": 4.837,
"eval_wer": 0.5895588258894898,
"step": 4800
},
{
"epoch": 0.1964713741207906,
"grad_norm": 3.315845489501953,
"learning_rate": 0.0002308307692307692,
"loss": 0.5501,
"step": 5000
},
{
"epoch": 0.1964713741207906,
"eval_loss": 0.7924312353134155,
"eval_runtime": 146.7719,
"eval_samples_per_second": 38.536,
"eval_steps_per_second": 4.817,
"eval_wer": 0.5899760876891721,
"step": 5000
},
{
"epoch": 0.20433022908562223,
"eval_loss": 0.7602530717849731,
"eval_runtime": 146.9845,
"eval_samples_per_second": 38.48,
"eval_steps_per_second": 4.81,
"eval_wer": 0.5737189260323218,
"step": 5200
},
{
"epoch": 0.21218908405045384,
"eval_loss": 0.7750186920166016,
"eval_runtime": 146.5887,
"eval_samples_per_second": 38.584,
"eval_steps_per_second": 4.823,
"eval_wer": 0.5931697453098169,
"step": 5400
},
{
"epoch": 0.21611851153286965,
"grad_norm": 9.320504188537598,
"learning_rate": 0.00022313846153846153,
"loss": 0.5694,
"step": 5500
},
{
"epoch": 0.22004793901528547,
"eval_loss": 0.7516711950302124,
"eval_runtime": 146.9947,
"eval_samples_per_second": 38.478,
"eval_steps_per_second": 4.81,
"eval_wer": 0.5711190640496863,
"step": 5600
},
{
"epoch": 0.2279067939801171,
"eval_loss": 0.7651358842849731,
"eval_runtime": 146.6177,
"eval_samples_per_second": 38.577,
"eval_steps_per_second": 4.822,
"eval_wer": 0.5698191330583685,
"step": 5800
},
{
"epoch": 0.23576564894494872,
"grad_norm": 2.727358102798462,
"learning_rate": 0.00021544615384615383,
"loss": 0.5424,
"step": 6000
},
{
"epoch": 0.23576564894494872,
"eval_loss": 0.7547870874404907,
"eval_runtime": 146.6389,
"eval_samples_per_second": 38.571,
"eval_steps_per_second": 4.821,
"eval_wer": 0.5820481134952095,
"step": 6000
},
{
"epoch": 0.24362450390978035,
"eval_loss": 0.730515718460083,
"eval_runtime": 146.763,
"eval_samples_per_second": 38.538,
"eval_steps_per_second": 4.817,
"eval_wer": 0.5681019402673685,
"step": 6200
},
{
"epoch": 0.251483358874612,
"eval_loss": 0.7314247488975525,
"eval_runtime": 147.0063,
"eval_samples_per_second": 38.475,
"eval_steps_per_second": 4.809,
"eval_wer": 0.5589221806743593,
"step": 6400
},
{
"epoch": 0.2554127863570278,
"grad_norm": 3.2329583168029785,
"learning_rate": 0.00020775384615384613,
"loss": 0.521,
"step": 6500
},
{
"epoch": 0.2593422138394436,
"eval_loss": 0.7227704524993896,
"eval_runtime": 147.7519,
"eval_samples_per_second": 38.28,
"eval_steps_per_second": 4.785,
"eval_wer": 0.565437884161705,
"step": 6600
},
{
"epoch": 0.26720106880427524,
"eval_loss": 0.7350090146064758,
"eval_runtime": 147.7712,
"eval_samples_per_second": 38.275,
"eval_steps_per_second": 4.784,
"eval_wer": 0.5633194781017797,
"step": 6800
},
{
"epoch": 0.2750599237691068,
"grad_norm": 3.2193281650543213,
"learning_rate": 0.00020006153846153843,
"loss": 0.5119,
"step": 7000
},
{
"epoch": 0.2750599237691068,
"eval_loss": 0.7079117298126221,
"eval_runtime": 146.6317,
"eval_samples_per_second": 38.573,
"eval_steps_per_second": 4.822,
"eval_wer": 0.5346888992312754,
"step": 7000
},
{
"epoch": 0.28291877873393845,
"eval_loss": 0.7105109691619873,
"eval_runtime": 147.7789,
"eval_samples_per_second": 38.273,
"eval_steps_per_second": 4.784,
"eval_wer": 0.5601097719503779,
"step": 7200
},
{
"epoch": 0.2907776336987701,
"eval_loss": 0.6876121163368225,
"eval_runtime": 147.4709,
"eval_samples_per_second": 38.353,
"eval_steps_per_second": 4.794,
"eval_wer": 0.5378344112596491,
"step": 7400
},
{
"epoch": 0.2947070611811859,
"grad_norm": 2.7452991008758545,
"learning_rate": 0.00019236923076923075,
"loss": 0.5007,
"step": 7500
},
{
"epoch": 0.2986364886636017,
"eval_loss": 0.6834765076637268,
"eval_runtime": 147.74,
"eval_samples_per_second": 38.283,
"eval_steps_per_second": 4.785,
"eval_wer": 0.5303397473961259,
"step": 7600
},
{
"epoch": 0.30649534362843334,
"eval_loss": 0.7131712436676025,
"eval_runtime": 147.6824,
"eval_samples_per_second": 38.298,
"eval_steps_per_second": 4.787,
"eval_wer": 0.5350740639694436,
"step": 7800
},
{
"epoch": 0.31435419859326497,
"grad_norm": 2.4165494441986084,
"learning_rate": 0.00018467692307692308,
"loss": 0.4934,
"step": 8000
},
{
"epoch": 0.31435419859326497,
"eval_loss": 0.697209358215332,
"eval_runtime": 146.9527,
"eval_samples_per_second": 38.489,
"eval_steps_per_second": 4.811,
"eval_wer": 0.5241771115854343,
"step": 8000
},
{
"epoch": 0.3222130535580966,
"eval_loss": 0.680029571056366,
"eval_runtime": 147.261,
"eval_samples_per_second": 38.408,
"eval_steps_per_second": 4.801,
"eval_wer": 0.5226685496942755,
"step": 8200
},
{
"epoch": 0.3300719085229282,
"eval_loss": 0.6915732026100159,
"eval_runtime": 146.7546,
"eval_samples_per_second": 38.541,
"eval_steps_per_second": 4.818,
"eval_wer": 0.5364702861453033,
"step": 8400
},
{
"epoch": 0.33400133600534404,
"grad_norm": 2.036782741546631,
"learning_rate": 0.00017698461538461537,
"loss": 0.4762,
"step": 8500
},
{
"epoch": 0.33793076348775986,
"eval_loss": 0.6801823377609253,
"eval_runtime": 147.6195,
"eval_samples_per_second": 38.315,
"eval_steps_per_second": 4.789,
"eval_wer": 0.5255251881690232,
"step": 8600
},
{
"epoch": 0.3457896184525915,
"eval_loss": 0.6977699398994446,
"eval_runtime": 148.3269,
"eval_samples_per_second": 38.132,
"eval_steps_per_second": 4.766,
"eval_wer": 0.5336938903243408,
"step": 8800
},
{
"epoch": 0.35364847341742306,
"grad_norm": 3.6703684329986572,
"learning_rate": 0.0001693076923076923,
"loss": 0.4774,
"step": 9000
},
{
"epoch": 0.35364847341742306,
"eval_loss": 0.6566863059997559,
"eval_runtime": 147.4913,
"eval_samples_per_second": 38.348,
"eval_steps_per_second": 4.794,
"eval_wer": 0.5210957936800886,
"step": 9000
},
{
"epoch": 0.3615073283822547,
"eval_loss": 0.6478887796401978,
"eval_runtime": 146.7975,
"eval_samples_per_second": 38.529,
"eval_steps_per_second": 4.816,
"eval_wer": 0.5152380799537802,
"step": 9200
},
{
"epoch": 0.3693661833470863,
"eval_loss": 0.6551229953765869,
"eval_runtime": 147.2799,
"eval_samples_per_second": 38.403,
"eval_steps_per_second": 4.8,
"eval_wer": 0.5147405755003129,
"step": 9400
},
{
"epoch": 0.37329561082950213,
"grad_norm": 2.4989895820617676,
"learning_rate": 0.00016161538461538462,
"loss": 0.4632,
"step": 9500
},
{
"epoch": 0.37722503831191795,
"eval_loss": 0.6358110308647156,
"eval_runtime": 148.0942,
"eval_samples_per_second": 38.192,
"eval_steps_per_second": 4.774,
"eval_wer": 0.4954502415303879,
"step": 9600
},
{
"epoch": 0.3850838932767496,
"eval_loss": 0.6466320157051086,
"eval_runtime": 147.5131,
"eval_samples_per_second": 38.342,
"eval_steps_per_second": 4.793,
"eval_wer": 0.5109049766493877,
"step": 9800
},
{
"epoch": 0.3929427482415812,
"grad_norm": 1.681718349456787,
"learning_rate": 0.00015392307692307691,
"loss": 0.4483,
"step": 10000
},
{
"epoch": 0.3929427482415812,
"eval_loss": 0.6306164264678955,
"eval_runtime": 147.9882,
"eval_samples_per_second": 38.219,
"eval_steps_per_second": 4.777,
"eval_wer": 0.504421370223556,
"step": 10000
},
{
"epoch": 0.40080160320641284,
"eval_loss": 0.6359797716140747,
"eval_runtime": 147.568,
"eval_samples_per_second": 38.328,
"eval_steps_per_second": 4.791,
"eval_wer": 0.5003771404727897,
"step": 10200
},
{
"epoch": 0.40866045817124447,
"eval_loss": 0.6301611661911011,
"eval_runtime": 146.6632,
"eval_samples_per_second": 38.565,
"eval_steps_per_second": 4.821,
"eval_wer": 0.49135786618735056,
"step": 10400
},
{
"epoch": 0.4125898856536603,
"grad_norm": 2.8097307682037354,
"learning_rate": 0.0001462307692307692,
"loss": 0.4454,
"step": 10500
},
{
"epoch": 0.4165193131360761,
"eval_loss": 0.616253674030304,
"eval_runtime": 147.6301,
"eval_samples_per_second": 38.312,
"eval_steps_per_second": 4.789,
"eval_wer": 0.48505079359984593,
"step": 10600
},
{
"epoch": 0.42437816810090767,
"eval_loss": 0.6221349239349365,
"eval_runtime": 147.7897,
"eval_samples_per_second": 38.271,
"eval_steps_per_second": 4.784,
"eval_wer": 0.49105294410296735,
"step": 10800
},
{
"epoch": 0.4322370230657393,
"grad_norm": 4.2350687980651855,
"learning_rate": 0.0001385384615384615,
"loss": 0.4302,
"step": 11000
},
{
"epoch": 0.4322370230657393,
"eval_loss": 0.6395624279975891,
"eval_runtime": 147.7882,
"eval_samples_per_second": 38.271,
"eval_steps_per_second": 4.784,
"eval_wer": 0.5000561698576496,
"step": 11000
},
{
"epoch": 0.44009587803057093,
"eval_loss": 0.6212363839149475,
"eval_runtime": 148.3281,
"eval_samples_per_second": 38.132,
"eval_steps_per_second": 4.766,
"eval_wer": 0.4840557846929114,
"step": 11200
},
{
"epoch": 0.44795473299540256,
"eval_loss": 0.6267797946929932,
"eval_runtime": 147.5749,
"eval_samples_per_second": 38.326,
"eval_steps_per_second": 4.791,
"eval_wer": 0.49379724286241594,
"step": 11400
},
{
"epoch": 0.4518841604778184,
"grad_norm": 2.2473807334899902,
"learning_rate": 0.00013086153846153845,
"loss": 0.4261,
"step": 11500
},
{
"epoch": 0.4558135879602342,
"eval_loss": 0.6097697019577026,
"eval_runtime": 148.2001,
"eval_samples_per_second": 38.165,
"eval_steps_per_second": 4.771,
"eval_wer": 0.48201762128677117,
"step": 11600
},
{
"epoch": 0.4636724429250658,
"eval_loss": 0.6009463667869568,
"eval_runtime": 147.4302,
"eval_samples_per_second": 38.364,
"eval_steps_per_second": 4.795,
"eval_wer": 0.4689220201890517,
"step": 11800
},
{
"epoch": 0.47153129788989745,
"grad_norm": 2.0571179389953613,
"learning_rate": 0.00012316923076923078,
"loss": 0.4026,
"step": 12000
},
{
"epoch": 0.47153129788989745,
"eval_loss": 0.609122633934021,
"eval_runtime": 147.8463,
"eval_samples_per_second": 38.256,
"eval_steps_per_second": 4.782,
"eval_wer": 0.4809584182568086,
"step": 12000
},
{
"epoch": 0.4793901528547291,
"eval_loss": 0.6019255518913269,
"eval_runtime": 148.5912,
"eval_samples_per_second": 38.064,
"eval_steps_per_second": 4.758,
"eval_wer": 0.4805732535186404,
"step": 12200
},
{
"epoch": 0.4872490078195607,
"eval_loss": 0.5946715474128723,
"eval_runtime": 147.4021,
"eval_samples_per_second": 38.371,
"eval_steps_per_second": 4.796,
"eval_wer": 0.4671085362135097,
"step": 12400
},
{
"epoch": 0.4911784353019765,
"grad_norm": 2.5033822059631348,
"learning_rate": 0.00011547692307692306,
"loss": 0.4027,
"step": 12500
},
{
"epoch": 0.49510786278439234,
"eval_loss": 0.5993836522102356,
"eval_runtime": 147.7878,
"eval_samples_per_second": 38.271,
"eval_steps_per_second": 4.784,
"eval_wer": 0.47092808653367785,
"step": 12600
},
{
"epoch": 0.502966717749224,
"eval_loss": 0.5981957912445068,
"eval_runtime": 147.5137,
"eval_samples_per_second": 38.342,
"eval_steps_per_second": 4.793,
"eval_wer": 0.4760796649066778,
"step": 12800
},
{
"epoch": 0.5108255727140556,
"grad_norm": 3.0013859272003174,
"learning_rate": 0.00010778461538461537,
"loss": 0.3978,
"step": 13000
},
{
"epoch": 0.5108255727140556,
"eval_loss": 0.5889731645584106,
"eval_runtime": 148.3841,
"eval_samples_per_second": 38.117,
"eval_steps_per_second": 4.765,
"eval_wer": 0.4632087432395564,
"step": 13000
},
{
"epoch": 0.5186844276788872,
"eval_loss": 0.5871375799179077,
"eval_runtime": 147.6289,
"eval_samples_per_second": 38.312,
"eval_steps_per_second": 4.789,
"eval_wer": 0.4567090882829677,
"step": 13200
},
{
"epoch": 0.5265432826437189,
"eval_loss": 0.5873442888259888,
"eval_runtime": 148.2022,
"eval_samples_per_second": 38.164,
"eval_steps_per_second": 4.771,
"eval_wer": 0.4634815682624256,
"step": 13400
},
{
"epoch": 0.5304727101261346,
"grad_norm": 3.4298863410949707,
"learning_rate": 0.00010009230769230768,
"loss": 0.3875,
"step": 13500
},
{
"epoch": 0.5344021376085505,
"eval_loss": 0.5772218704223633,
"eval_runtime": 148.5655,
"eval_samples_per_second": 38.071,
"eval_steps_per_second": 4.759,
"eval_wer": 0.4538684983389771,
"step": 13600
},
{
"epoch": 0.542260992573382,
"eval_loss": 0.5603720545768738,
"eval_runtime": 147.4995,
"eval_samples_per_second": 38.346,
"eval_steps_per_second": 4.793,
"eval_wer": 0.4419123429250052,
"step": 13800
},
{
"epoch": 0.5501198475382136,
"grad_norm": 2.6594979763031006,
"learning_rate": 9.24e-05,
"loss": 0.404,
"step": 14000
},
{
"epoch": 0.5501198475382136,
"eval_loss": 0.5688683986663818,
"eval_runtime": 147.0269,
"eval_samples_per_second": 38.469,
"eval_steps_per_second": 4.809,
"eval_wer": 0.4454269711607902,
"step": 14000
},
{
"epoch": 0.5579787025030453,
"eval_loss": 0.5594531893730164,
"eval_runtime": 147.2136,
"eval_samples_per_second": 38.42,
"eval_steps_per_second": 4.803,
"eval_wer": 0.443292516570108,
"step": 14200
},
{
"epoch": 0.5658375574678769,
"eval_loss": 0.5574955940246582,
"eval_runtime": 147.8377,
"eval_samples_per_second": 38.258,
"eval_steps_per_second": 4.782,
"eval_wer": 0.4405803148721735,
"step": 14400
},
{
"epoch": 0.5697669849502928,
"grad_norm": 1.834619402885437,
"learning_rate": 8.472307692307691e-05,
"loss": 0.3878,
"step": 14500
},
{
"epoch": 0.5736964124327085,
"eval_loss": 0.5521669983863831,
"eval_runtime": 147.831,
"eval_samples_per_second": 38.26,
"eval_steps_per_second": 4.782,
"eval_wer": 0.43528429972236043,
"step": 14600
},
{
"epoch": 0.5815552673975402,
"eval_loss": 0.5521777272224426,
"eval_runtime": 148.1309,
"eval_samples_per_second": 38.182,
"eval_steps_per_second": 4.773,
"eval_wer": 0.4351719600070614,
"step": 14800
},
{
"epoch": 0.5894141223623718,
"grad_norm": 2.739065408706665,
"learning_rate": 7.703076923076922e-05,
"loss": 0.3622,
"step": 15000
},
{
"epoch": 0.5894141223623718,
"eval_loss": 0.5570353865623474,
"eval_runtime": 147.6686,
"eval_samples_per_second": 38.302,
"eval_steps_per_second": 4.788,
"eval_wer": 0.4401309560109772,
"step": 15000
},
{
"epoch": 0.5972729773272034,
"eval_loss": 0.5467315912246704,
"eval_runtime": 147.8113,
"eval_samples_per_second": 38.265,
"eval_steps_per_second": 4.783,
"eval_wer": 0.42803036382019227,
"step": 15200
},
{
"epoch": 0.605131832292035,
"eval_loss": 0.5510929822921753,
"eval_runtime": 148.0412,
"eval_samples_per_second": 38.206,
"eval_steps_per_second": 4.776,
"eval_wer": 0.43404856285407073,
"step": 15400
},
{
"epoch": 0.6090612597744509,
"grad_norm": 3.7551369667053223,
"learning_rate": 6.933846153846154e-05,
"loss": 0.3545,
"step": 15500
},
{
"epoch": 0.6129906872568667,
"eval_loss": 0.5437116026878357,
"eval_runtime": 147.4367,
"eval_samples_per_second": 38.362,
"eval_steps_per_second": 4.795,
"eval_wer": 0.4245478326459213,
"step": 15600
},
{
"epoch": 0.6208495422216983,
"eval_loss": 0.5488719940185547,
"eval_runtime": 147.4687,
"eval_samples_per_second": 38.354,
"eval_steps_per_second": 4.794,
"eval_wer": 0.4296352168958932,
"step": 15800
},
{
"epoch": 0.6287083971865299,
"grad_norm": 1.4097563028335571,
"learning_rate": 6.164615384615383e-05,
"loss": 0.3486,
"step": 16000
},
{
"epoch": 0.6287083971865299,
"eval_loss": 0.5420017242431641,
"eval_runtime": 147.3988,
"eval_samples_per_second": 38.372,
"eval_steps_per_second": 4.797,
"eval_wer": 0.4277896358588371,
"step": 16000
},
{
"epoch": 0.6365672521513616,
"eval_loss": 0.5352106094360352,
"eval_runtime": 148.3938,
"eval_samples_per_second": 38.115,
"eval_steps_per_second": 4.764,
"eval_wer": 0.4212739323714914,
"step": 16200
},
{
"epoch": 0.6444261071161932,
"eval_loss": 0.5376533269882202,
"eval_runtime": 147.9443,
"eval_samples_per_second": 38.231,
"eval_steps_per_second": 4.779,
"eval_wer": 0.42592800629102406,
"step": 16400
},
{
"epoch": 0.648355534598609,
"grad_norm": 2.8873980045318604,
"learning_rate": 5.395384615384615e-05,
"loss": 0.3374,
"step": 16500
},
{
"epoch": 0.6522849620810248,
"eval_loss": 0.5336429476737976,
"eval_runtime": 149.7227,
"eval_samples_per_second": 37.776,
"eval_steps_per_second": 4.722,
"eval_wer": 0.4305018375567717,
"step": 16600
},
{
"epoch": 0.6601438170458565,
"eval_loss": 0.5293694734573364,
"eval_runtime": 148.8049,
"eval_samples_per_second": 38.01,
"eval_steps_per_second": 4.751,
"eval_wer": 0.41875431304264094,
"step": 16800
},
{
"epoch": 0.6680026720106881,
"grad_norm": 3.3759422302246094,
"learning_rate": 4.6276923076923074e-05,
"loss": 0.3389,
"step": 17000
},
{
"epoch": 0.6680026720106881,
"eval_loss": 0.5252575278282166,
"eval_runtime": 149.4186,
"eval_samples_per_second": 37.853,
"eval_steps_per_second": 4.732,
"eval_wer": 0.4169408290670989,
"step": 17000
},
{
"epoch": 0.6758615269755197,
"eval_loss": 0.5194066166877747,
"eval_runtime": 147.8703,
"eval_samples_per_second": 38.25,
"eval_steps_per_second": 4.781,
"eval_wer": 0.41443725826900546,
"step": 17200
},
{
"epoch": 0.6837203819403513,
"eval_loss": 0.5231760740280151,
"eval_runtime": 147.3205,
"eval_samples_per_second": 38.392,
"eval_steps_per_second": 4.799,
"eval_wer": 0.417101314374669,
"step": 17400
},
{
"epoch": 0.6876498094227671,
"grad_norm": 3.4031200408935547,
"learning_rate": 3.858461538461538e-05,
"loss": 0.3258,
"step": 17500
},
{
"epoch": 0.691579236905183,
"eval_loss": 0.5179495811462402,
"eval_runtime": 148.0293,
"eval_samples_per_second": 38.209,
"eval_steps_per_second": 4.776,
"eval_wer": 0.41653961579817367,
"step": 17600
},
{
"epoch": 0.6994380918700145,
"eval_loss": 0.5132375359535217,
"eval_runtime": 149.1897,
"eval_samples_per_second": 37.911,
"eval_steps_per_second": 4.739,
"eval_wer": 0.4103769799874821,
"step": 17800
},
{
"epoch": 0.7072969468348461,
"grad_norm": 4.0969133377075195,
"learning_rate": 3.090769230769231e-05,
"loss": 0.327,
"step": 18000
},
{
"epoch": 0.7072969468348461,
"eval_loss": 0.5096033215522766,
"eval_runtime": 148.384,
"eval_samples_per_second": 38.117,
"eval_steps_per_second": 4.765,
"eval_wer": 0.40440692654587473,
"step": 18000
},
{
"epoch": 0.7151558017996777,
"eval_loss": 0.5040988922119141,
"eval_runtime": 148.3441,
"eval_samples_per_second": 38.128,
"eval_steps_per_second": 4.766,
"eval_wer": 0.40337982057742616,
"step": 18200
},
{
"epoch": 0.7230146567645094,
"eval_loss": 0.5013387203216553,
"eval_runtime": 147.8316,
"eval_samples_per_second": 38.26,
"eval_steps_per_second": 4.782,
"eval_wer": 0.3980677568968561,
"step": 18400
},
{
"epoch": 0.7269440842469252,
"grad_norm": 2.58701491355896,
"learning_rate": 2.3215384615384613e-05,
"loss": 0.316,
"step": 18500
},
{
"epoch": 0.730873511729341,
"eval_loss": 0.5074244737625122,
"eval_runtime": 148.936,
"eval_samples_per_second": 37.976,
"eval_steps_per_second": 4.747,
"eval_wer": 0.4064611384827719,
"step": 18600
},
{
"epoch": 0.7387323666941726,
"eval_loss": 0.5014389753341675,
"eval_runtime": 148.0107,
"eval_samples_per_second": 38.213,
"eval_steps_per_second": 4.777,
"eval_wer": 0.40545008104508035,
"step": 18800
},
{
"epoch": 0.7465912216590043,
"grad_norm": 1.975710391998291,
"learning_rate": 1.552307692307692e-05,
"loss": 0.3162,
"step": 19000
},
{
"epoch": 0.7465912216590043,
"eval_loss": 0.4958648383617401,
"eval_runtime": 148.7492,
"eval_samples_per_second": 38.024,
"eval_steps_per_second": 4.753,
"eval_wer": 0.3998170467493701,
"step": 19000
},
{
"epoch": 0.7544500766238359,
"eval_loss": 0.4930271506309509,
"eval_runtime": 147.4379,
"eval_samples_per_second": 38.362,
"eval_steps_per_second": 4.795,
"eval_wer": 0.39818009661215514,
"step": 19200
},
{
"epoch": 0.7623089315886675,
"eval_loss": 0.4924609363079071,
"eval_runtime": 149.2355,
"eval_samples_per_second": 37.9,
"eval_steps_per_second": 4.737,
"eval_wer": 0.39821219367366917,
"step": 19400
},
{
"epoch": 0.7662383590710834,
"grad_norm": 2.332016944885254,
"learning_rate": 7.83076923076923e-06,
"loss": 0.3145,
"step": 19500
},
{
"epoch": 0.7701677865534992,
"eval_loss": 0.4922255575656891,
"eval_runtime": 148.1982,
"eval_samples_per_second": 38.165,
"eval_steps_per_second": 4.771,
"eval_wer": 0.3970246023976505,
"step": 19600
},
{
"epoch": 0.7780266415183308,
"eval_loss": 0.49084585905075073,
"eval_runtime": 148.8021,
"eval_samples_per_second": 38.01,
"eval_steps_per_second": 4.751,
"eval_wer": 0.39692831121310845,
"step": 19800
},
{
"epoch": 0.7858854964831624,
"grad_norm": 5.262033462524414,
"learning_rate": 1.5384615384615385e-07,
"loss": 0.3095,
"step": 20000
},
{
"epoch": 0.7858854964831624,
"eval_loss": 0.4908364713191986,
"eval_runtime": 148.1385,
"eval_samples_per_second": 38.18,
"eval_steps_per_second": 4.773,
"eval_wer": 0.39638266116737014,
"step": 20000
},
{
"epoch": 0.7858854964831624,
"step": 20000,
"total_flos": 2.4863424513490096e+19,
"train_loss": 0.5819183097839356,
"train_runtime": 23325.1985,
"train_samples_per_second": 6.86,
"train_steps_per_second": 0.857
}
],
"logging_steps": 500,
"max_steps": 20000,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 400,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.4863424513490096e+19,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}