wav2vec2-xlsr-53-ft-btb-ccv-cy / trainer_state.json
DewiBrynJones's picture
End of training
b30b16e verified
raw
history blame
82.4 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.9647137412079059,
"eval_steps": 200,
"global_step": 50000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.007858854964831625,
"eval_loss": 3.175461769104004,
"eval_runtime": 144.4533,
"eval_samples_per_second": 39.155,
"eval_steps_per_second": 4.894,
"eval_wer": 1.0,
"step": 200
},
{
"epoch": 0.01571770992966325,
"eval_loss": 2.8796634674072266,
"eval_runtime": 143.0854,
"eval_samples_per_second": 39.529,
"eval_steps_per_second": 4.941,
"eval_wer": 1.0,
"step": 400
},
{
"epoch": 0.01964713741207906,
"grad_norm": 1.9831087589263916,
"learning_rate": 0.00029759999999999997,
"loss": 4.8076,
"step": 500
},
{
"epoch": 0.023576564894494872,
"eval_loss": 1.4753953218460083,
"eval_runtime": 143.2096,
"eval_samples_per_second": 39.495,
"eval_steps_per_second": 4.937,
"eval_wer": 0.903997689011571,
"step": 600
},
{
"epoch": 0.0314354198593265,
"eval_loss": 1.25261652469635,
"eval_runtime": 144.6031,
"eval_samples_per_second": 39.114,
"eval_steps_per_second": 4.889,
"eval_wer": 0.8548410393028518,
"step": 800
},
{
"epoch": 0.03929427482415812,
"grad_norm": 2.427387237548828,
"learning_rate": 0.0002969939393939394,
"loss": 1.1153,
"step": 1000
},
{
"epoch": 0.03929427482415812,
"eval_loss": 1.1311910152435303,
"eval_runtime": 144.1542,
"eval_samples_per_second": 39.236,
"eval_steps_per_second": 4.904,
"eval_wer": 0.788769238176245,
"step": 1000
},
{
"epoch": 0.047153129788989744,
"eval_loss": 1.0895923376083374,
"eval_runtime": 144.7009,
"eval_samples_per_second": 39.088,
"eval_steps_per_second": 4.886,
"eval_wer": 0.7734749883648152,
"step": 1200
},
{
"epoch": 0.055011984753821366,
"eval_loss": 1.0287705659866333,
"eval_runtime": 143.6225,
"eval_samples_per_second": 39.381,
"eval_steps_per_second": 4.923,
"eval_wer": 0.7571054869926658,
"step": 1400
},
{
"epoch": 0.05894141223623718,
"grad_norm": 2.3919336795806885,
"learning_rate": 0.0002939636363636363,
"loss": 0.8282,
"step": 1500
},
{
"epoch": 0.062870839718653,
"eval_loss": 0.9747628569602966,
"eval_runtime": 144.5139,
"eval_samples_per_second": 39.138,
"eval_steps_per_second": 4.892,
"eval_wer": 0.7254096387475727,
"step": 1600
},
{
"epoch": 0.07072969468348461,
"eval_loss": 0.9748485088348389,
"eval_runtime": 144.3418,
"eval_samples_per_second": 39.185,
"eval_steps_per_second": 4.898,
"eval_wer": 0.7194556338367223,
"step": 1800
},
{
"epoch": 0.07858854964831624,
"grad_norm": 2.169008255004883,
"learning_rate": 0.0002909333333333333,
"loss": 0.7335,
"step": 2000
},
{
"epoch": 0.07858854964831624,
"eval_loss": 0.9882574081420898,
"eval_runtime": 145.192,
"eval_samples_per_second": 38.955,
"eval_steps_per_second": 4.869,
"eval_wer": 0.7143682495867504,
"step": 2000
},
{
"epoch": 0.08644740461314787,
"eval_loss": 0.9364911317825317,
"eval_runtime": 145.4626,
"eval_samples_per_second": 38.883,
"eval_steps_per_second": 4.86,
"eval_wer": 0.7061834989006757,
"step": 2200
},
{
"epoch": 0.09430625957797949,
"eval_loss": 0.9164892435073853,
"eval_runtime": 145.9321,
"eval_samples_per_second": 38.758,
"eval_steps_per_second": 4.845,
"eval_wer": 0.6801688305435637,
"step": 2400
},
{
"epoch": 0.0982356870603953,
"grad_norm": 5.276973247528076,
"learning_rate": 0.00028790303030303027,
"loss": 0.6931,
"step": 2500
},
{
"epoch": 0.10216511454281112,
"eval_loss": 0.9169939756393433,
"eval_runtime": 145.3478,
"eval_samples_per_second": 38.914,
"eval_steps_per_second": 4.864,
"eval_wer": 0.6773603376610872,
"step": 2600
},
{
"epoch": 0.11002396950764273,
"eval_loss": 0.9080427289009094,
"eval_runtime": 144.7759,
"eval_samples_per_second": 39.067,
"eval_steps_per_second": 4.883,
"eval_wer": 0.6692237325672835,
"step": 2800
},
{
"epoch": 0.11788282447247436,
"grad_norm": 2.9965720176696777,
"learning_rate": 0.00028487272727272726,
"loss": 0.67,
"step": 3000
},
{
"epoch": 0.11788282447247436,
"eval_loss": 0.8609287738800049,
"eval_runtime": 145.381,
"eval_samples_per_second": 38.905,
"eval_steps_per_second": 4.863,
"eval_wer": 0.6621784275649564,
"step": 3000
},
{
"epoch": 0.125741679437306,
"eval_loss": 0.8863000273704529,
"eval_runtime": 144.6247,
"eval_samples_per_second": 39.108,
"eval_steps_per_second": 4.889,
"eval_wer": 0.6659177352313396,
"step": 3200
},
{
"epoch": 0.13360053440213762,
"eval_loss": 0.8669990301132202,
"eval_runtime": 145.3885,
"eval_samples_per_second": 38.903,
"eval_steps_per_second": 4.863,
"eval_wer": 0.6610710789427228,
"step": 3400
},
{
"epoch": 0.1375299618845534,
"grad_norm": 3.541180372238159,
"learning_rate": 0.0002818424242424242,
"loss": 0.6282,
"step": 3500
},
{
"epoch": 0.14145938936696922,
"eval_loss": 0.8718289136886597,
"eval_runtime": 147.388,
"eval_samples_per_second": 38.375,
"eval_steps_per_second": 4.797,
"eval_wer": 0.6819983630498628,
"step": 3600
},
{
"epoch": 0.14931824433180085,
"eval_loss": 0.861672580242157,
"eval_runtime": 145.355,
"eval_samples_per_second": 38.912,
"eval_steps_per_second": 4.864,
"eval_wer": 0.6481520116833304,
"step": 3800
},
{
"epoch": 0.15717709929663248,
"grad_norm": 1.9885746240615845,
"learning_rate": 0.0002788121212121212,
"loss": 0.6311,
"step": 4000
},
{
"epoch": 0.15717709929663248,
"eval_loss": 0.8504879474639893,
"eval_runtime": 145.0997,
"eval_samples_per_second": 38.98,
"eval_steps_per_second": 4.873,
"eval_wer": 0.6597230023591341,
"step": 4000
},
{
"epoch": 0.1650359542614641,
"eval_loss": 0.8290337324142456,
"eval_runtime": 144.8192,
"eval_samples_per_second": 39.056,
"eval_steps_per_second": 4.882,
"eval_wer": 0.6292307939208166,
"step": 4200
},
{
"epoch": 0.17289480922629574,
"eval_loss": 0.8300275206565857,
"eval_runtime": 144.9963,
"eval_samples_per_second": 39.008,
"eval_steps_per_second": 4.876,
"eval_wer": 0.6567540241690873,
"step": 4400
},
{
"epoch": 0.17682423670871153,
"grad_norm": 3.603195905685425,
"learning_rate": 0.0002757878787878788,
"loss": 0.615,
"step": 4500
},
{
"epoch": 0.18075366419112734,
"eval_loss": 0.8007863163948059,
"eval_runtime": 144.795,
"eval_samples_per_second": 39.062,
"eval_steps_per_second": 4.883,
"eval_wer": 0.610855226204041,
"step": 4600
},
{
"epoch": 0.18861251915595897,
"eval_loss": 0.8038597702980042,
"eval_runtime": 144.8128,
"eval_samples_per_second": 39.057,
"eval_steps_per_second": 4.882,
"eval_wer": 0.6045160565550224,
"step": 4800
},
{
"epoch": 0.1964713741207906,
"grad_norm": 3.389535665512085,
"learning_rate": 0.0002727575757575757,
"loss": 0.5785,
"step": 5000
},
{
"epoch": 0.1964713741207906,
"eval_loss": 0.7907959818840027,
"eval_runtime": 144.5449,
"eval_samples_per_second": 39.13,
"eval_steps_per_second": 4.891,
"eval_wer": 0.6071801126606859,
"step": 5000
},
{
"epoch": 0.20433022908562223,
"eval_loss": 0.7867733836174011,
"eval_runtime": 144.7418,
"eval_samples_per_second": 39.076,
"eval_steps_per_second": 4.885,
"eval_wer": 0.6037457270786859,
"step": 5200
},
{
"epoch": 0.21218908405045384,
"eval_loss": 0.7709878087043762,
"eval_runtime": 146.1138,
"eval_samples_per_second": 38.71,
"eval_steps_per_second": 4.839,
"eval_wer": 0.5988348766670412,
"step": 5400
},
{
"epoch": 0.21611851153286965,
"grad_norm": 2.476861000061035,
"learning_rate": 0.00026972727272727266,
"loss": 0.5928,
"step": 5500
},
{
"epoch": 0.22004793901528547,
"eval_loss": 0.766153872013092,
"eval_runtime": 144.8164,
"eval_samples_per_second": 39.056,
"eval_steps_per_second": 4.882,
"eval_wer": 0.5747139349392563,
"step": 5600
},
{
"epoch": 0.2279067939801171,
"eval_loss": 0.767308235168457,
"eval_runtime": 145.7638,
"eval_samples_per_second": 38.803,
"eval_steps_per_second": 4.85,
"eval_wer": 0.5945820160164337,
"step": 5800
},
{
"epoch": 0.23576564894494872,
"grad_norm": 2.2588391304016113,
"learning_rate": 0.00026669696969696966,
"loss": 0.5799,
"step": 6000
},
{
"epoch": 0.23576564894494872,
"eval_loss": 0.7804461121559143,
"eval_runtime": 145.0414,
"eval_samples_per_second": 38.996,
"eval_steps_per_second": 4.874,
"eval_wer": 0.5990114105053682,
"step": 6000
},
{
"epoch": 0.24362450390978035,
"eval_loss": 0.7586621642112732,
"eval_runtime": 145.864,
"eval_samples_per_second": 38.776,
"eval_steps_per_second": 4.847,
"eval_wer": 0.5780520293367142,
"step": 6200
},
{
"epoch": 0.251483358874612,
"eval_loss": 0.749543309211731,
"eval_runtime": 145.8617,
"eval_samples_per_second": 38.776,
"eval_steps_per_second": 4.847,
"eval_wer": 0.5728683539022003,
"step": 6400
},
{
"epoch": 0.2554127863570278,
"grad_norm": 1.971763253211975,
"learning_rate": 0.00026366666666666666,
"loss": 0.5534,
"step": 6500
},
{
"epoch": 0.2593422138394436,
"eval_loss": 0.7536802291870117,
"eval_runtime": 147.4299,
"eval_samples_per_second": 38.364,
"eval_steps_per_second": 4.795,
"eval_wer": 0.5768804865914525,
"step": 6600
},
{
"epoch": 0.26720106880427524,
"eval_loss": 0.7661583423614502,
"eval_runtime": 149.947,
"eval_samples_per_second": 37.72,
"eval_steps_per_second": 4.715,
"eval_wer": 0.581245686957359,
"step": 6800
},
{
"epoch": 0.2750599237691068,
"grad_norm": 2.5072972774505615,
"learning_rate": 0.0002606363636363636,
"loss": 0.5592,
"step": 7000
},
{
"epoch": 0.2750599237691068,
"eval_loss": 0.7571460604667664,
"eval_runtime": 145.6344,
"eval_samples_per_second": 38.837,
"eval_steps_per_second": 4.855,
"eval_wer": 0.5607998587729294,
"step": 7000
},
{
"epoch": 0.28291877873393845,
"eval_loss": 0.7475385665893555,
"eval_runtime": 145.4869,
"eval_samples_per_second": 38.876,
"eval_steps_per_second": 4.86,
"eval_wer": 0.5635120604708639,
"step": 7200
},
{
"epoch": 0.2907776336987701,
"eval_loss": 0.7267230749130249,
"eval_runtime": 145.614,
"eval_samples_per_second": 38.842,
"eval_steps_per_second": 4.855,
"eval_wer": 0.5591950056972285,
"step": 7400
},
{
"epoch": 0.2947070611811859,
"grad_norm": 3.4168338775634766,
"learning_rate": 0.0002576060606060606,
"loss": 0.5512,
"step": 7500
},
{
"epoch": 0.2986364886636017,
"eval_loss": 0.7362108826637268,
"eval_runtime": 145.7986,
"eval_samples_per_second": 38.793,
"eval_steps_per_second": 4.849,
"eval_wer": 0.5588098409590602,
"step": 7600
},
{
"epoch": 0.30649534362843334,
"eval_loss": 0.7624097466468811,
"eval_runtime": 145.4148,
"eval_samples_per_second": 38.896,
"eval_steps_per_second": 4.862,
"eval_wer": 0.581117298711303,
"step": 7800
},
{
"epoch": 0.31435419859326497,
"grad_norm": 2.9330873489379883,
"learning_rate": 0.00025457575757575755,
"loss": 0.54,
"step": 8000
},
{
"epoch": 0.31435419859326497,
"eval_loss": 0.7657227516174316,
"eval_runtime": 146.1604,
"eval_samples_per_second": 38.697,
"eval_steps_per_second": 4.837,
"eval_wer": 0.5622442265410602,
"step": 8000
},
{
"epoch": 0.3222130535580966,
"eval_loss": 0.7300673127174377,
"eval_runtime": 146.8709,
"eval_samples_per_second": 38.51,
"eval_steps_per_second": 4.814,
"eval_wer": 0.5453611721846865,
"step": 8200
},
{
"epoch": 0.3300719085229282,
"eval_loss": 0.7118472456932068,
"eval_runtime": 146.4543,
"eval_samples_per_second": 38.62,
"eval_steps_per_second": 4.827,
"eval_wer": 0.5381553818747894,
"step": 8400
},
{
"epoch": 0.33400133600534404,
"grad_norm": 2.0070419311523438,
"learning_rate": 0.00025154545454545454,
"loss": 0.531,
"step": 8500
},
{
"epoch": 0.33793076348775986,
"eval_loss": 0.7252832055091858,
"eval_runtime": 145.6223,
"eval_samples_per_second": 38.84,
"eval_steps_per_second": 4.855,
"eval_wer": 0.548153616536406,
"step": 8600
},
{
"epoch": 0.3457896184525915,
"eval_loss": 0.7304599285125732,
"eval_runtime": 145.89,
"eval_samples_per_second": 38.769,
"eval_steps_per_second": 4.846,
"eval_wer": 0.5582962879748359,
"step": 8800
},
{
"epoch": 0.35364847341742306,
"grad_norm": 2.5275588035583496,
"learning_rate": 0.00024852121212121206,
"loss": 0.5406,
"step": 9000
},
{
"epoch": 0.35364847341742306,
"eval_loss": 0.7097567915916443,
"eval_runtime": 145.7013,
"eval_samples_per_second": 38.819,
"eval_steps_per_second": 4.852,
"eval_wer": 0.5520213124488453,
"step": 9000
},
{
"epoch": 0.3615073283822547,
"eval_loss": 0.698684573173523,
"eval_runtime": 146.3052,
"eval_samples_per_second": 38.659,
"eval_steps_per_second": 4.832,
"eval_wer": 0.5372245670908828,
"step": 9200
},
{
"epoch": 0.3693661833470863,
"eval_loss": 0.7044981718063354,
"eval_runtime": 145.8062,
"eval_samples_per_second": 38.791,
"eval_steps_per_second": 4.849,
"eval_wer": 0.5472548988140136,
"step": 9400
},
{
"epoch": 0.37329561082950213,
"grad_norm": 6.208221435546875,
"learning_rate": 0.00024549090909090906,
"loss": 0.5252,
"step": 9500
},
{
"epoch": 0.37722503831191795,
"eval_loss": 0.7025354504585266,
"eval_runtime": 146.2272,
"eval_samples_per_second": 38.68,
"eval_steps_per_second": 4.835,
"eval_wer": 0.5332766285246585,
"step": 9600
},
{
"epoch": 0.3850838932767496,
"eval_loss": 0.7077142000198364,
"eval_runtime": 145.5575,
"eval_samples_per_second": 38.857,
"eval_steps_per_second": 4.857,
"eval_wer": 0.5461796472532939,
"step": 9800
},
{
"epoch": 0.3929427482415812,
"grad_norm": 4.407375812530518,
"learning_rate": 0.00024246060606060606,
"loss": 0.5156,
"step": 10000
},
{
"epoch": 0.3929427482415812,
"eval_loss": 0.7006597518920898,
"eval_runtime": 146.3123,
"eval_samples_per_second": 38.657,
"eval_steps_per_second": 4.832,
"eval_wer": 0.5382516730593314,
"step": 10000
},
{
"epoch": 0.40080160320641284,
"eval_loss": 0.6947250962257385,
"eval_runtime": 145.4545,
"eval_samples_per_second": 38.885,
"eval_steps_per_second": 4.861,
"eval_wer": 0.5425847763637239,
"step": 10200
},
{
"epoch": 0.40866045817124447,
"eval_loss": 0.7127708196640015,
"eval_runtime": 145.5874,
"eval_samples_per_second": 38.85,
"eval_steps_per_second": 4.856,
"eval_wer": 0.5361332669994062,
"step": 10400
},
{
"epoch": 0.4125898856536603,
"grad_norm": 2.721827983856201,
"learning_rate": 0.000239430303030303,
"loss": 0.5181,
"step": 10500
},
{
"epoch": 0.4165193131360761,
"eval_loss": 0.6945223212242126,
"eval_runtime": 146.0143,
"eval_samples_per_second": 38.736,
"eval_steps_per_second": 4.842,
"eval_wer": 0.5276114971674343,
"step": 10600
},
{
"epoch": 0.42437816810090767,
"eval_loss": 0.6985763311386108,
"eval_runtime": 146.5657,
"eval_samples_per_second": 38.59,
"eval_steps_per_second": 4.824,
"eval_wer": 0.5310619312801913,
"step": 10800
},
{
"epoch": 0.4322370230657393,
"grad_norm": 5.38914680480957,
"learning_rate": 0.0002364,
"loss": 0.5096,
"step": 11000
},
{
"epoch": 0.4322370230657393,
"eval_loss": 0.6909800171852112,
"eval_runtime": 146.0039,
"eval_samples_per_second": 38.739,
"eval_steps_per_second": 4.842,
"eval_wer": 0.5293126414276773,
"step": 11000
},
{
"epoch": 0.44009587803057093,
"eval_loss": 0.6855354905128479,
"eval_runtime": 146.6844,
"eval_samples_per_second": 38.559,
"eval_steps_per_second": 4.82,
"eval_wer": 0.5280608560286306,
"step": 11200
},
{
"epoch": 0.44795473299540256,
"eval_loss": 0.6889775395393372,
"eval_runtime": 146.3731,
"eval_samples_per_second": 38.641,
"eval_steps_per_second": 4.83,
"eval_wer": 0.5262313235223315,
"step": 11400
},
{
"epoch": 0.4518841604778184,
"grad_norm": 3.3484437465667725,
"learning_rate": 0.00023336969696969694,
"loss": 0.5099,
"step": 11500
},
{
"epoch": 0.4558135879602342,
"eval_loss": 0.677577018737793,
"eval_runtime": 146.1848,
"eval_samples_per_second": 38.691,
"eval_steps_per_second": 4.836,
"eval_wer": 0.5298101458811446,
"step": 11600
},
{
"epoch": 0.4636724429250658,
"eval_loss": 0.6817450523376465,
"eval_runtime": 146.2301,
"eval_samples_per_second": 38.679,
"eval_steps_per_second": 4.835,
"eval_wer": 0.5141949254545747,
"step": 11800
},
{
"epoch": 0.47153129788989745,
"grad_norm": 4.75791597366333,
"learning_rate": 0.00023033939393939391,
"loss": 0.481,
"step": 12000
},
{
"epoch": 0.47153129788989745,
"eval_loss": 0.6749030351638794,
"eval_runtime": 144.9955,
"eval_samples_per_second": 39.008,
"eval_steps_per_second": 4.876,
"eval_wer": 0.5318483092872848,
"step": 12000
},
{
"epoch": 0.4793901528547291,
"eval_loss": 0.6648340225219727,
"eval_runtime": 146.7705,
"eval_samples_per_second": 38.536,
"eval_steps_per_second": 4.817,
"eval_wer": 0.513167819486126,
"step": 12200
},
{
"epoch": 0.4872490078195607,
"eval_loss": 0.6659471392631531,
"eval_runtime": 145.9108,
"eval_samples_per_second": 38.763,
"eval_steps_per_second": 4.845,
"eval_wer": 0.5151096917077241,
"step": 12400
},
{
"epoch": 0.4911784353019765,
"grad_norm": 3.3849971294403076,
"learning_rate": 0.00022730909090909089,
"loss": 0.4899,
"step": 12500
},
{
"epoch": 0.49510786278439234,
"eval_loss": 0.6744287014007568,
"eval_runtime": 146.3152,
"eval_samples_per_second": 38.656,
"eval_steps_per_second": 4.832,
"eval_wer": 0.5207266774726774,
"step": 12600
},
{
"epoch": 0.502966717749224,
"eval_loss": 0.6732743978500366,
"eval_runtime": 146.1337,
"eval_samples_per_second": 38.704,
"eval_steps_per_second": 4.838,
"eval_wer": 0.5228771805941166,
"step": 12800
},
{
"epoch": 0.5108255727140556,
"grad_norm": 3.489818811416626,
"learning_rate": 0.00022427878787878786,
"loss": 0.492,
"step": 13000
},
{
"epoch": 0.5108255727140556,
"eval_loss": 0.6456639170646667,
"eval_runtime": 146.9518,
"eval_samples_per_second": 38.489,
"eval_steps_per_second": 4.811,
"eval_wer": 0.5041645937314438,
"step": 13000
},
{
"epoch": 0.5186844276788872,
"eval_loss": 0.6671249866485596,
"eval_runtime": 145.8641,
"eval_samples_per_second": 38.776,
"eval_steps_per_second": 4.847,
"eval_wer": 0.5259103529071913,
"step": 13200
},
{
"epoch": 0.5265432826437189,
"eval_loss": 0.6544414162635803,
"eval_runtime": 146.5937,
"eval_samples_per_second": 38.583,
"eval_steps_per_second": 4.823,
"eval_wer": 0.5179181845902008,
"step": 13400
},
{
"epoch": 0.5304727101261346,
"grad_norm": 1.4167377948760986,
"learning_rate": 0.00022125454545454546,
"loss": 0.4782,
"step": 13500
},
{
"epoch": 0.5344021376085505,
"eval_loss": 0.6560591459274292,
"eval_runtime": 146.1188,
"eval_samples_per_second": 38.708,
"eval_steps_per_second": 4.839,
"eval_wer": 0.5054484761920046,
"step": 13600
},
{
"epoch": 0.542260992573382,
"eval_loss": 0.6381711363792419,
"eval_runtime": 145.8554,
"eval_samples_per_second": 38.778,
"eval_steps_per_second": 4.847,
"eval_wer": 0.49918954919677105,
"step": 13800
},
{
"epoch": 0.5501198475382136,
"grad_norm": 3.582862615585327,
"learning_rate": 0.0002182242424242424,
"loss": 0.507,
"step": 14000
},
{
"epoch": 0.5501198475382136,
"eval_loss": 0.6555091738700867,
"eval_runtime": 148.0584,
"eval_samples_per_second": 38.201,
"eval_steps_per_second": 4.775,
"eval_wer": 0.504437418754313,
"step": 14000
},
{
"epoch": 0.5579787025030453,
"eval_loss": 0.6399552822113037,
"eval_runtime": 146.0824,
"eval_samples_per_second": 38.718,
"eval_steps_per_second": 4.84,
"eval_wer": 0.49548233859190194,
"step": 14200
},
{
"epoch": 0.5658375574678769,
"eval_loss": 0.6467686891555786,
"eval_runtime": 146.3336,
"eval_samples_per_second": 38.651,
"eval_steps_per_second": 4.831,
"eval_wer": 0.5014202949719954,
"step": 14400
},
{
"epoch": 0.5697669849502928,
"grad_norm": 2.1453781127929688,
"learning_rate": 0.0002151939393939394,
"loss": 0.4899,
"step": 14500
},
{
"epoch": 0.5736964124327085,
"eval_loss": 0.6370707750320435,
"eval_runtime": 146.4635,
"eval_samples_per_second": 38.617,
"eval_steps_per_second": 4.827,
"eval_wer": 0.49723162844441593,
"step": 14600
},
{
"epoch": 0.5815552673975402,
"eval_loss": 0.6356329917907715,
"eval_runtime": 145.6834,
"eval_samples_per_second": 38.824,
"eval_steps_per_second": 4.853,
"eval_wer": 0.5025597406557429,
"step": 14800
},
{
"epoch": 0.5894141223623718,
"grad_norm": 2.615446090698242,
"learning_rate": 0.00021216363636363634,
"loss": 0.4677,
"step": 15000
},
{
"epoch": 0.5894141223623718,
"eval_loss": 0.638607919216156,
"eval_runtime": 145.7689,
"eval_samples_per_second": 38.801,
"eval_steps_per_second": 4.85,
"eval_wer": 0.5021424788560608,
"step": 15000
},
{
"epoch": 0.5972729773272034,
"eval_loss": 0.6653130650520325,
"eval_runtime": 146.0092,
"eval_samples_per_second": 38.737,
"eval_steps_per_second": 4.842,
"eval_wer": 0.5190255332124344,
"step": 15200
},
{
"epoch": 0.605131832292035,
"eval_loss": 0.6442501544952393,
"eval_runtime": 146.2404,
"eval_samples_per_second": 38.676,
"eval_steps_per_second": 4.835,
"eval_wer": 0.4998154418962944,
"step": 15400
},
{
"epoch": 0.6090612597744509,
"grad_norm": 2.680966854095459,
"learning_rate": 0.0002091333333333333,
"loss": 0.461,
"step": 15500
},
{
"epoch": 0.6129906872568667,
"eval_loss": 0.6210175156593323,
"eval_runtime": 146.9594,
"eval_samples_per_second": 38.487,
"eval_steps_per_second": 4.811,
"eval_wer": 0.4896567219271076,
"step": 15600
},
{
"epoch": 0.6208495422216983,
"eval_loss": 0.6395752429962158,
"eval_runtime": 146.5911,
"eval_samples_per_second": 38.584,
"eval_steps_per_second": 4.823,
"eval_wer": 0.5011635184798832,
"step": 15800
},
{
"epoch": 0.6287083971865299,
"grad_norm": 2.2297749519348145,
"learning_rate": 0.00020610303030303028,
"loss": 0.4528,
"step": 16000
},
{
"epoch": 0.6287083971865299,
"eval_loss": 0.6226186752319336,
"eval_runtime": 147.1935,
"eval_samples_per_second": 38.426,
"eval_steps_per_second": 4.803,
"eval_wer": 0.49333183547046267,
"step": 16000
},
{
"epoch": 0.6365672521513616,
"eval_loss": 0.6253554224967957,
"eval_runtime": 147.0403,
"eval_samples_per_second": 38.466,
"eval_steps_per_second": 4.808,
"eval_wer": 0.49365280608560286,
"step": 16200
},
{
"epoch": 0.6444261071161932,
"eval_loss": 0.6289177536964417,
"eval_runtime": 146.8167,
"eval_samples_per_second": 38.524,
"eval_steps_per_second": 4.816,
"eval_wer": 0.5013240037874532,
"step": 16400
},
{
"epoch": 0.648355534598609,
"grad_norm": 1.9119956493377686,
"learning_rate": 0.00020307272727272725,
"loss": 0.451,
"step": 16500
},
{
"epoch": 0.6522849620810248,
"eval_loss": 0.6229738593101501,
"eval_runtime": 146.4262,
"eval_samples_per_second": 38.627,
"eval_steps_per_second": 4.828,
"eval_wer": 0.49723162844441593,
"step": 16600
},
{
"epoch": 0.6601438170458565,
"eval_loss": 0.6153121590614319,
"eval_runtime": 146.6615,
"eval_samples_per_second": 38.565,
"eval_steps_per_second": 4.821,
"eval_wer": 0.4957391150840141,
"step": 16800
},
{
"epoch": 0.6680026720106881,
"grad_norm": 3.115481376647949,
"learning_rate": 0.00020004848484848485,
"loss": 0.4444,
"step": 17000
},
{
"epoch": 0.6680026720106881,
"eval_loss": 0.6032531261444092,
"eval_runtime": 146.667,
"eval_samples_per_second": 38.564,
"eval_steps_per_second": 4.82,
"eval_wer": 0.47476368538460306,
"step": 17000
},
{
"epoch": 0.6758615269755197,
"eval_loss": 0.6153914332389832,
"eval_runtime": 146.5404,
"eval_samples_per_second": 38.597,
"eval_steps_per_second": 4.825,
"eval_wer": 0.4771388679366404,
"step": 17200
},
{
"epoch": 0.6837203819403513,
"eval_loss": 0.6169700622558594,
"eval_runtime": 146.4739,
"eval_samples_per_second": 38.614,
"eval_steps_per_second": 4.827,
"eval_wer": 0.48591741426072443,
"step": 17400
},
{
"epoch": 0.6876498094227671,
"grad_norm": 3.35622501373291,
"learning_rate": 0.0001970181818181818,
"loss": 0.4357,
"step": 17500
},
{
"epoch": 0.691579236905183,
"eval_loss": 0.6020850539207458,
"eval_runtime": 146.4462,
"eval_samples_per_second": 38.622,
"eval_steps_per_second": 4.828,
"eval_wer": 0.4814559227102759,
"step": 17600
},
{
"epoch": 0.6994380918700145,
"eval_loss": 0.6071408987045288,
"eval_runtime": 147.1123,
"eval_samples_per_second": 38.447,
"eval_steps_per_second": 4.806,
"eval_wer": 0.47303044406284606,
"step": 17800
},
{
"epoch": 0.7072969468348461,
"grad_norm": 2.2534916400909424,
"learning_rate": 0.0001939939393939394,
"loss": 0.4413,
"step": 18000
},
{
"epoch": 0.7072969468348461,
"eval_loss": 0.6042246222496033,
"eval_runtime": 146.518,
"eval_samples_per_second": 38.603,
"eval_steps_per_second": 4.825,
"eval_wer": 0.47656112082938806,
"step": 18000
},
{
"epoch": 0.7151558017996777,
"eval_loss": 0.6118656396865845,
"eval_runtime": 147.1712,
"eval_samples_per_second": 38.431,
"eval_steps_per_second": 4.804,
"eval_wer": 0.4837508626085282,
"step": 18200
},
{
"epoch": 0.7230146567645094,
"eval_loss": 0.6045942902565002,
"eval_runtime": 146.4829,
"eval_samples_per_second": 38.612,
"eval_steps_per_second": 4.827,
"eval_wer": 0.47569450016850956,
"step": 18400
},
{
"epoch": 0.7269440842469252,
"grad_norm": 3.591475248336792,
"learning_rate": 0.00019096363636363634,
"loss": 0.4375,
"step": 18500
},
{
"epoch": 0.730873511729341,
"eval_loss": 0.6081308722496033,
"eval_runtime": 147.4627,
"eval_samples_per_second": 38.355,
"eval_steps_per_second": 4.794,
"eval_wer": 0.4832854552165749,
"step": 18600
},
{
"epoch": 0.7387323666941726,
"eval_loss": 0.6007533073425293,
"eval_runtime": 146.3827,
"eval_samples_per_second": 38.638,
"eval_steps_per_second": 4.83,
"eval_wer": 0.4727897161014909,
"step": 18800
},
{
"epoch": 0.7465912216590043,
"grad_norm": 1.425370693206787,
"learning_rate": 0.0001879333333333333,
"loss": 0.4329,
"step": 19000
},
{
"epoch": 0.7465912216590043,
"eval_loss": 0.6008017063140869,
"eval_runtime": 147.3011,
"eval_samples_per_second": 38.398,
"eval_steps_per_second": 4.8,
"eval_wer": 0.46924299080419185,
"step": 19000
},
{
"epoch": 0.7544500766238359,
"eval_loss": 0.6007276177406311,
"eval_runtime": 146.7759,
"eval_samples_per_second": 38.535,
"eval_steps_per_second": 4.817,
"eval_wer": 0.4822262521866123,
"step": 19200
},
{
"epoch": 0.7623089315886675,
"eval_loss": 0.5838043093681335,
"eval_runtime": 146.9473,
"eval_samples_per_second": 38.49,
"eval_steps_per_second": 4.811,
"eval_wer": 0.4657925566914349,
"step": 19400
},
{
"epoch": 0.7662383590710834,
"grad_norm": 2.780203342437744,
"learning_rate": 0.00018490303030303028,
"loss": 0.4318,
"step": 19500
},
{
"epoch": 0.7701677865534992,
"eval_loss": 0.6007500290870667,
"eval_runtime": 146.6721,
"eval_samples_per_second": 38.562,
"eval_steps_per_second": 4.82,
"eval_wer": 0.46519876105342556,
"step": 19600
},
{
"epoch": 0.7780266415183308,
"eval_loss": 0.5918843746185303,
"eval_runtime": 147.2498,
"eval_samples_per_second": 38.411,
"eval_steps_per_second": 4.801,
"eval_wer": 0.4664826435139863,
"step": 19800
},
{
"epoch": 0.7858854964831624,
"grad_norm": 3.501138687133789,
"learning_rate": 0.00018187272727272725,
"loss": 0.4265,
"step": 20000
},
{
"epoch": 0.7858854964831624,
"eval_loss": 0.59038907289505,
"eval_runtime": 147.6976,
"eval_samples_per_second": 38.294,
"eval_steps_per_second": 4.787,
"eval_wer": 0.4721959204634816,
"step": 20000
},
{
"epoch": 0.793744351447994,
"eval_loss": 0.5922533273696899,
"eval_runtime": 146.8772,
"eval_samples_per_second": 38.508,
"eval_steps_per_second": 4.814,
"eval_wer": 0.4815201168333039,
"step": 20200
},
{
"epoch": 0.8016032064128257,
"eval_loss": 0.5979217886924744,
"eval_runtime": 146.9133,
"eval_samples_per_second": 38.499,
"eval_steps_per_second": 4.812,
"eval_wer": 0.4661295758373321,
"step": 20400
},
{
"epoch": 0.8055326338952414,
"grad_norm": 2.374830484390259,
"learning_rate": 0.00017884242424242425,
"loss": 0.4321,
"step": 20500
},
{
"epoch": 0.8094620613776573,
"eval_loss": 0.5837874412536621,
"eval_runtime": 146.6078,
"eval_samples_per_second": 38.579,
"eval_steps_per_second": 4.822,
"eval_wer": 0.45608319558344435,
"step": 20600
},
{
"epoch": 0.8173209163424889,
"eval_loss": 0.5824867486953735,
"eval_runtime": 147.7105,
"eval_samples_per_second": 38.291,
"eval_steps_per_second": 4.786,
"eval_wer": 0.4523920335093322,
"step": 20800
},
{
"epoch": 0.8251797713073206,
"grad_norm": 1.430405616760254,
"learning_rate": 0.0001758121212121212,
"loss": 0.4192,
"step": 21000
},
{
"epoch": 0.8251797713073206,
"eval_loss": 0.5838850140571594,
"eval_runtime": 146.699,
"eval_samples_per_second": 38.555,
"eval_steps_per_second": 4.819,
"eval_wer": 0.4551523807995378,
"step": 21000
},
{
"epoch": 0.8330386262721522,
"eval_loss": 0.5804269909858704,
"eval_runtime": 147.0076,
"eval_samples_per_second": 38.474,
"eval_steps_per_second": 4.809,
"eval_wer": 0.4593731443886312,
"step": 21200
},
{
"epoch": 0.8408974812369838,
"eval_loss": 0.5890819430351257,
"eval_runtime": 146.6585,
"eval_samples_per_second": 38.566,
"eval_steps_per_second": 4.821,
"eval_wer": 0.4722280175249956,
"step": 21400
},
{
"epoch": 0.8448269087193996,
"grad_norm": 2.7897725105285645,
"learning_rate": 0.00017278181818181817,
"loss": 0.4151,
"step": 21500
},
{
"epoch": 0.8487563362018153,
"eval_loss": 0.5830910205841064,
"eval_runtime": 147.6653,
"eval_samples_per_second": 38.303,
"eval_steps_per_second": 4.788,
"eval_wer": 0.4525204217553883,
"step": 21600
},
{
"epoch": 0.856615191166647,
"eval_loss": 0.5677404403686523,
"eval_runtime": 146.5378,
"eval_samples_per_second": 38.598,
"eval_steps_per_second": 4.825,
"eval_wer": 0.45430180866941633,
"step": 21800
},
{
"epoch": 0.8644740461314786,
"grad_norm": 2.938485622406006,
"learning_rate": 0.00016975757575757574,
"loss": 0.417,
"step": 22000
},
{
"epoch": 0.8644740461314786,
"eval_loss": 0.5605286359786987,
"eval_runtime": 147.3751,
"eval_samples_per_second": 38.378,
"eval_steps_per_second": 4.797,
"eval_wer": 0.446807144805893,
"step": 22000
},
{
"epoch": 0.8723329010963102,
"eval_loss": 0.570513129234314,
"eval_runtime": 146.7648,
"eval_samples_per_second": 38.538,
"eval_steps_per_second": 4.817,
"eval_wer": 0.44422333135401454,
"step": 22200
},
{
"epoch": 0.8801917560611419,
"eval_loss": 0.5685856938362122,
"eval_runtime": 147.3241,
"eval_samples_per_second": 38.392,
"eval_steps_per_second": 4.799,
"eval_wer": 0.4551363322687808,
"step": 22400
},
{
"epoch": 0.8841211835435577,
"grad_norm": 5.145638942718506,
"learning_rate": 0.0001667272727272727,
"loss": 0.4014,
"step": 22500
},
{
"epoch": 0.8880506110259735,
"eval_loss": 0.5751659870147705,
"eval_runtime": 146.2417,
"eval_samples_per_second": 38.676,
"eval_steps_per_second": 4.834,
"eval_wer": 0.4602397650495097,
"step": 22600
},
{
"epoch": 0.8959094659908051,
"eval_loss": 0.5623380541801453,
"eval_runtime": 146.6371,
"eval_samples_per_second": 38.571,
"eval_steps_per_second": 4.821,
"eval_wer": 0.4452985829147342,
"step": 22800
},
{
"epoch": 0.9037683209556368,
"grad_norm": 1.9630001783370972,
"learning_rate": 0.00016369696969696968,
"loss": 0.4024,
"step": 23000
},
{
"epoch": 0.9037683209556368,
"eval_loss": 0.5631678700447083,
"eval_runtime": 146.9977,
"eval_samples_per_second": 38.477,
"eval_steps_per_second": 4.81,
"eval_wer": 0.4423777503169585,
"step": 23000
},
{
"epoch": 0.9116271759204684,
"eval_loss": 0.568145751953125,
"eval_runtime": 146.7017,
"eval_samples_per_second": 38.554,
"eval_steps_per_second": 4.819,
"eval_wer": 0.4471120668902762,
"step": 23200
},
{
"epoch": 0.9194860308853,
"eval_loss": 0.5659225583076477,
"eval_runtime": 147.422,
"eval_samples_per_second": 38.366,
"eval_steps_per_second": 4.796,
"eval_wer": 0.4510760539872575,
"step": 23400
},
{
"epoch": 0.9234154583677158,
"grad_norm": 2.880105972290039,
"learning_rate": 0.00016066666666666665,
"loss": 0.3899,
"step": 23500
},
{
"epoch": 0.9273448858501316,
"eval_loss": 0.5653769969940186,
"eval_runtime": 147.0508,
"eval_samples_per_second": 38.463,
"eval_steps_per_second": 4.808,
"eval_wer": 0.4417197605559211,
"step": 23600
},
{
"epoch": 0.9352037408149633,
"eval_loss": 0.5691047310829163,
"eval_runtime": 147.3319,
"eval_samples_per_second": 38.39,
"eval_steps_per_second": 4.799,
"eval_wer": 0.45418946895411727,
"step": 23800
},
{
"epoch": 0.9430625957797949,
"grad_norm": 1.747075080871582,
"learning_rate": 0.00015763636363636365,
"loss": 0.3977,
"step": 24000
},
{
"epoch": 0.9430625957797949,
"eval_loss": 0.5613217949867249,
"eval_runtime": 146.5842,
"eval_samples_per_second": 38.585,
"eval_steps_per_second": 4.823,
"eval_wer": 0.4434209048161641,
"step": 24000
},
{
"epoch": 0.9509214507446265,
"eval_loss": 0.5688283443450928,
"eval_runtime": 147.1422,
"eval_samples_per_second": 38.439,
"eval_steps_per_second": 4.805,
"eval_wer": 0.44326041950859396,
"step": 24200
},
{
"epoch": 0.9587803057094582,
"eval_loss": 0.57487553358078,
"eval_runtime": 146.7792,
"eval_samples_per_second": 38.534,
"eval_steps_per_second": 4.817,
"eval_wer": 0.4454751167530613,
"step": 24400
},
{
"epoch": 0.9627097331918739,
"grad_norm": NaN,
"learning_rate": 0.0001546121212121212,
"loss": 0.3889,
"step": 24500
},
{
"epoch": 0.9666391606742898,
"eval_loss": 0.5499551892280579,
"eval_runtime": 147.156,
"eval_samples_per_second": 38.435,
"eval_steps_per_second": 4.804,
"eval_wer": 0.43180176854808944,
"step": 24600
},
{
"epoch": 0.9744980156391214,
"eval_loss": 0.5436142086982727,
"eval_runtime": 147.2848,
"eval_samples_per_second": 38.402,
"eval_steps_per_second": 4.8,
"eval_wer": 0.4371780263516875,
"step": 24800
},
{
"epoch": 0.982356870603953,
"grad_norm": 4.918150424957275,
"learning_rate": 0.0001515818181818182,
"loss": 0.39,
"step": 25000
},
{
"epoch": 0.982356870603953,
"eval_loss": 0.547515332698822,
"eval_runtime": 147.2374,
"eval_samples_per_second": 38.414,
"eval_steps_per_second": 4.802,
"eval_wer": 0.4388310250196594,
"step": 25000
},
{
"epoch": 0.9902157255687847,
"eval_loss": 0.5531713366508484,
"eval_runtime": 146.8558,
"eval_samples_per_second": 38.514,
"eval_steps_per_second": 4.814,
"eval_wer": 0.4423777503169585,
"step": 25200
},
{
"epoch": 0.9980745805336163,
"eval_loss": 0.5450366139411926,
"eval_runtime": 147.6783,
"eval_samples_per_second": 38.299,
"eval_steps_per_second": 4.787,
"eval_wer": 0.4280945579432203,
"step": 25400
},
{
"epoch": 1.002004008016032,
"grad_norm": 1.2219481468200684,
"learning_rate": 0.00014855151515151514,
"loss": 0.3853,
"step": 25500
},
{
"epoch": 1.005933435498448,
"eval_loss": 0.5462915897369385,
"eval_runtime": 145.543,
"eval_samples_per_second": 38.861,
"eval_steps_per_second": 4.858,
"eval_wer": 0.43079071111039785,
"step": 25600
},
{
"epoch": 1.0137922904632795,
"eval_loss": 0.5457944869995117,
"eval_runtime": 145.2381,
"eval_samples_per_second": 38.943,
"eval_steps_per_second": 4.868,
"eval_wer": 0.4277896358588371,
"step": 25800
},
{
"epoch": 1.0216511454281112,
"grad_norm": 4.69161319732666,
"learning_rate": 0.0001455212121212121,
"loss": 0.3413,
"step": 26000
},
{
"epoch": 1.0216511454281112,
"eval_loss": 0.5470069646835327,
"eval_runtime": 145.5418,
"eval_samples_per_second": 38.862,
"eval_steps_per_second": 4.858,
"eval_wer": 0.43441767906148193,
"step": 26000
},
{
"epoch": 1.0295100003929427,
"eval_loss": 0.5358372330665588,
"eval_runtime": 145.609,
"eval_samples_per_second": 38.844,
"eval_steps_per_second": 4.855,
"eval_wer": 0.42258991189356615,
"step": 26200
},
{
"epoch": 1.0373688553577745,
"eval_loss": 0.5403576493263245,
"eval_runtime": 146.3753,
"eval_samples_per_second": 38.64,
"eval_steps_per_second": 4.83,
"eval_wer": 0.42308741634703345,
"step": 26400
},
{
"epoch": 1.0412982828401902,
"grad_norm": 1.2460460662841797,
"learning_rate": 0.00014249090909090908,
"loss": 0.339,
"step": 26500
},
{
"epoch": 1.045227710322606,
"eval_loss": 0.5345466732978821,
"eval_runtime": 145.3146,
"eval_samples_per_second": 38.922,
"eval_steps_per_second": 4.865,
"eval_wer": 0.42433920174608014,
"step": 26600
},
{
"epoch": 1.0530865652874377,
"eval_loss": 0.5396625995635986,
"eval_runtime": 145.9713,
"eval_samples_per_second": 38.747,
"eval_steps_per_second": 4.843,
"eval_wer": 0.4199579528494166,
"step": 26800
},
{
"epoch": 1.0609454202522692,
"grad_norm": 1.021347165107727,
"learning_rate": 0.00013946060606060605,
"loss": 0.3235,
"step": 27000
},
{
"epoch": 1.0609454202522692,
"eval_loss": 0.5378654599189758,
"eval_runtime": 145.6291,
"eval_samples_per_second": 38.838,
"eval_steps_per_second": 4.855,
"eval_wer": 0.4183049541814447,
"step": 27000
},
{
"epoch": 1.0688042752171008,
"eval_loss": 0.5305435657501221,
"eval_runtime": 145.36,
"eval_samples_per_second": 38.91,
"eval_steps_per_second": 4.864,
"eval_wer": 0.42753285936672497,
"step": 27200
},
{
"epoch": 1.0766631301819325,
"eval_loss": 0.5440751910209656,
"eval_runtime": 145.3458,
"eval_samples_per_second": 38.914,
"eval_steps_per_second": 4.864,
"eval_wer": 0.4247564635457624,
"step": 27400
},
{
"epoch": 1.0805925576643483,
"grad_norm": 0.5985044836997986,
"learning_rate": 0.00013643636363636362,
"loss": 0.3252,
"step": 27500
},
{
"epoch": 1.0845219851467642,
"eval_loss": 0.5361995697021484,
"eval_runtime": 146.0428,
"eval_samples_per_second": 38.728,
"eval_steps_per_second": 4.841,
"eval_wer": 0.4177753526664634,
"step": 27600
},
{
"epoch": 1.0923808401115958,
"eval_loss": 0.5305026173591614,
"eval_runtime": 145.9537,
"eval_samples_per_second": 38.752,
"eval_steps_per_second": 4.844,
"eval_wer": 0.42015053521850076,
"step": 27800
},
{
"epoch": 1.1002396950764273,
"grad_norm": 1.615342378616333,
"learning_rate": 0.0001334060606060606,
"loss": 0.3301,
"step": 28000
},
{
"epoch": 1.1002396950764273,
"eval_loss": 0.5307178497314453,
"eval_runtime": 146.253,
"eval_samples_per_second": 38.673,
"eval_steps_per_second": 4.834,
"eval_wer": 0.41851358508128583,
"step": 28000
},
{
"epoch": 1.108098550041259,
"eval_loss": 0.5402148365974426,
"eval_runtime": 145.7202,
"eval_samples_per_second": 38.814,
"eval_steps_per_second": 4.852,
"eval_wer": 0.431127730256295,
"step": 28200
},
{
"epoch": 1.1159574050060905,
"eval_loss": 0.5308640003204346,
"eval_runtime": 145.81,
"eval_samples_per_second": 38.79,
"eval_steps_per_second": 4.849,
"eval_wer": 0.41788769238176243,
"step": 28400
},
{
"epoch": 1.1198868324885065,
"grad_norm": 1.1408910751342773,
"learning_rate": 0.00013037575757575756,
"loss": 0.3087,
"step": 28500
},
{
"epoch": 1.1238162599709223,
"eval_loss": 0.5298367738723755,
"eval_runtime": 145.4349,
"eval_samples_per_second": 38.89,
"eval_steps_per_second": 4.861,
"eval_wer": 0.42137022355603343,
"step": 28600
},
{
"epoch": 1.1316751149357538,
"eval_loss": 0.5330610275268555,
"eval_runtime": 145.6355,
"eval_samples_per_second": 38.837,
"eval_steps_per_second": 4.855,
"eval_wer": 0.4214665147405755,
"step": 28800
},
{
"epoch": 1.1395339699005855,
"grad_norm": 0.8552046418190002,
"learning_rate": 0.00012734545454545453,
"loss": 0.3222,
"step": 29000
},
{
"epoch": 1.1395339699005855,
"eval_loss": 0.5273275971412659,
"eval_runtime": 145.8763,
"eval_samples_per_second": 38.773,
"eval_steps_per_second": 4.847,
"eval_wer": 0.4145495979843045,
"step": 29000
},
{
"epoch": 1.147392824865417,
"eval_loss": 0.5282542705535889,
"eval_runtime": 145.6375,
"eval_samples_per_second": 38.836,
"eval_steps_per_second": 4.855,
"eval_wer": 0.4130731331546597,
"step": 29200
},
{
"epoch": 1.1552516798302488,
"eval_loss": 0.5256520509719849,
"eval_runtime": 145.9987,
"eval_samples_per_second": 38.74,
"eval_steps_per_second": 4.843,
"eval_wer": 0.41159666832501485,
"step": 29400
},
{
"epoch": 1.1591811073126645,
"grad_norm": 3.544210195541382,
"learning_rate": 0.0001243151515151515,
"loss": 0.3227,
"step": 29500
},
{
"epoch": 1.1631105347950803,
"eval_loss": 0.5168554186820984,
"eval_runtime": 145.3157,
"eval_samples_per_second": 38.922,
"eval_steps_per_second": 4.865,
"eval_wer": 0.408419059235127,
"step": 29600
},
{
"epoch": 1.170969389759912,
"eval_loss": 0.5184837579727173,
"eval_runtime": 145.4598,
"eval_samples_per_second": 38.884,
"eval_steps_per_second": 4.86,
"eval_wer": 0.41068190207186533,
"step": 29800
},
{
"epoch": 1.1788282447247436,
"grad_norm": 0.8857652544975281,
"learning_rate": 0.00012128484848484848,
"loss": 0.309,
"step": 30000
},
{
"epoch": 1.1788282447247436,
"eval_loss": 0.5076336860656738,
"eval_runtime": 145.8517,
"eval_samples_per_second": 38.779,
"eval_steps_per_second": 4.847,
"eval_wer": 0.40275392787790276,
"step": 30000
},
{
"epoch": 1.1866870996895753,
"eval_loss": 0.5178284049034119,
"eval_runtime": 146.4004,
"eval_samples_per_second": 38.634,
"eval_steps_per_second": 4.829,
"eval_wer": 0.40535378986053827,
"step": 30200
},
{
"epoch": 1.1945459546544068,
"eval_loss": 0.5225840210914612,
"eval_runtime": 149.501,
"eval_samples_per_second": 37.833,
"eval_steps_per_second": 4.729,
"eval_wer": 0.4122065124937812,
"step": 30400
},
{
"epoch": 1.1984753821368226,
"grad_norm": 1.1116445064544678,
"learning_rate": 0.00011826060606060606,
"loss": 0.3138,
"step": 30500
},
{
"epoch": 1.2024048096192386,
"eval_loss": 0.5226925015449524,
"eval_runtime": 145.5048,
"eval_samples_per_second": 38.872,
"eval_steps_per_second": 4.859,
"eval_wer": 0.4072635650206224,
"step": 30600
},
{
"epoch": 1.21026366458407,
"eval_loss": 0.5130230784416199,
"eval_runtime": 144.8014,
"eval_samples_per_second": 39.06,
"eval_steps_per_second": 4.883,
"eval_wer": 0.40498467365312707,
"step": 30800
},
{
"epoch": 1.2181225195489018,
"grad_norm": 1.0480467081069946,
"learning_rate": 0.00011523030303030302,
"loss": 0.3083,
"step": 31000
},
{
"epoch": 1.2181225195489018,
"eval_loss": 0.516806423664093,
"eval_runtime": 145.4982,
"eval_samples_per_second": 38.873,
"eval_steps_per_second": 4.859,
"eval_wer": 0.4113077947713887,
"step": 31000
},
{
"epoch": 1.2259813745137333,
"eval_loss": 0.505409836769104,
"eval_runtime": 145.5358,
"eval_samples_per_second": 38.863,
"eval_steps_per_second": 4.858,
"eval_wer": 0.4003947938566224,
"step": 31200
},
{
"epoch": 1.2338402294785649,
"eval_loss": 0.5144046545028687,
"eval_runtime": 145.0631,
"eval_samples_per_second": 38.99,
"eval_steps_per_second": 4.874,
"eval_wer": 0.406653720851856,
"step": 31400
},
{
"epoch": 1.2377696569609808,
"grad_norm": 1.0551427602767944,
"learning_rate": 0.00011219999999999999,
"loss": 0.2981,
"step": 31500
},
{
"epoch": 1.2416990844433966,
"eval_loss": 0.5082244277000427,
"eval_runtime": 145.8395,
"eval_samples_per_second": 38.782,
"eval_steps_per_second": 4.848,
"eval_wer": 0.39923929964211774,
"step": 31600
},
{
"epoch": 1.2495579394082281,
"eval_loss": 0.5134223103523254,
"eval_runtime": 145.7659,
"eval_samples_per_second": 38.802,
"eval_steps_per_second": 4.85,
"eval_wer": 0.396125884675258,
"step": 31800
},
{
"epoch": 1.2574167943730599,
"grad_norm": 2.2508976459503174,
"learning_rate": 0.00010916969696969696,
"loss": 0.2952,
"step": 32000
},
{
"epoch": 1.2574167943730599,
"eval_loss": 0.49696260690689087,
"eval_runtime": 145.5612,
"eval_samples_per_second": 38.857,
"eval_steps_per_second": 4.857,
"eval_wer": 0.3999454349954262,
"step": 32000
},
{
"epoch": 1.2652756493378914,
"eval_loss": 0.50291907787323,
"eval_runtime": 145.2238,
"eval_samples_per_second": 38.947,
"eval_steps_per_second": 4.868,
"eval_wer": 0.4005713276949495,
"step": 32200
},
{
"epoch": 1.2731345043027231,
"eval_loss": 0.4979938268661499,
"eval_runtime": 146.0479,
"eval_samples_per_second": 38.727,
"eval_steps_per_second": 4.841,
"eval_wer": 0.4001540658952673,
"step": 32400
},
{
"epoch": 1.2770639317851389,
"grad_norm": 0.7384321689605713,
"learning_rate": 0.00010614545454545453,
"loss": 0.2995,
"step": 32500
},
{
"epoch": 1.2809933592675546,
"eval_loss": 0.49917110800743103,
"eval_runtime": 145.9484,
"eval_samples_per_second": 38.753,
"eval_steps_per_second": 4.844,
"eval_wer": 0.40463160597647285,
"step": 32600
},
{
"epoch": 1.2888522142323864,
"eval_loss": 0.49689990282058716,
"eval_runtime": 146.3024,
"eval_samples_per_second": 38.66,
"eval_steps_per_second": 4.832,
"eval_wer": 0.3911829372020991,
"step": 32800
},
{
"epoch": 1.296711069197218,
"grad_norm": 0.6462344527244568,
"learning_rate": 0.0001031151515151515,
"loss": 0.3046,
"step": 33000
},
{
"epoch": 1.296711069197218,
"eval_loss": 0.49431467056274414,
"eval_runtime": 145.566,
"eval_samples_per_second": 38.855,
"eval_steps_per_second": 4.857,
"eval_wer": 0.3933334403235384,
"step": 33000
},
{
"epoch": 1.3045699241620496,
"eval_loss": 0.4882897138595581,
"eval_runtime": 146.7921,
"eval_samples_per_second": 38.531,
"eval_steps_per_second": 4.816,
"eval_wer": 0.3932050520774823,
"step": 33200
},
{
"epoch": 1.3124287791268812,
"eval_loss": 0.49653205275535583,
"eval_runtime": 146.2261,
"eval_samples_per_second": 38.68,
"eval_steps_per_second": 4.835,
"eval_wer": 0.3935099741618655,
"step": 33400
},
{
"epoch": 1.316358206609297,
"grad_norm": 4.335805416107178,
"learning_rate": 0.00010009090909090908,
"loss": 0.2972,
"step": 33500
},
{
"epoch": 1.320287634091713,
"eval_loss": 0.49103957414627075,
"eval_runtime": 146.0953,
"eval_samples_per_second": 38.714,
"eval_steps_per_second": 4.839,
"eval_wer": 0.3942000609844169,
"step": 33600
},
{
"epoch": 1.3281464890565444,
"eval_loss": 0.5007916688919067,
"eval_runtime": 145.7572,
"eval_samples_per_second": 38.804,
"eval_steps_per_second": 4.851,
"eval_wer": 0.4097029416956878,
"step": 33800
},
{
"epoch": 1.3360053440213762,
"grad_norm": 0.6741358637809753,
"learning_rate": 9.706060606060605e-05,
"loss": 0.3093,
"step": 34000
},
{
"epoch": 1.3360053440213762,
"eval_loss": 0.4958365857601166,
"eval_runtime": 146.2684,
"eval_samples_per_second": 38.669,
"eval_steps_per_second": 4.834,
"eval_wer": 0.39574071993708976,
"step": 34000
},
{
"epoch": 1.3438641989862077,
"eval_loss": 0.5045068264007568,
"eval_runtime": 146.1991,
"eval_samples_per_second": 38.687,
"eval_steps_per_second": 4.836,
"eval_wer": 0.40179101603248224,
"step": 34200
},
{
"epoch": 1.3517230539510394,
"eval_loss": 0.492519348859787,
"eval_runtime": 146.1528,
"eval_samples_per_second": 38.699,
"eval_steps_per_second": 4.837,
"eval_wer": 0.3969925053361365,
"step": 34400
},
{
"epoch": 1.3556524814334552,
"grad_norm": 0.9136665463447571,
"learning_rate": 9.403030303030303e-05,
"loss": 0.2947,
"step": 34500
},
{
"epoch": 1.359581908915871,
"eval_loss": 0.4828738868236542,
"eval_runtime": 145.0639,
"eval_samples_per_second": 38.99,
"eval_steps_per_second": 4.874,
"eval_wer": 0.3905409959718188,
"step": 34600
},
{
"epoch": 1.3674407638807025,
"eval_loss": 0.4869907796382904,
"eval_runtime": 145.4878,
"eval_samples_per_second": 38.876,
"eval_steps_per_second": 4.86,
"eval_wer": 0.39522716695286547,
"step": 34800
},
{
"epoch": 1.3752996188455342,
"grad_norm": 1.0685299634933472,
"learning_rate": 9.099999999999999e-05,
"loss": 0.2801,
"step": 35000
},
{
"epoch": 1.3752996188455342,
"eval_loss": 0.4897337555885315,
"eval_runtime": 145.9513,
"eval_samples_per_second": 38.753,
"eval_steps_per_second": 4.844,
"eval_wer": 0.3936704594694356,
"step": 35000
},
{
"epoch": 1.383158473810366,
"eval_loss": 0.5006551146507263,
"eval_runtime": 145.7634,
"eval_samples_per_second": 38.803,
"eval_steps_per_second": 4.85,
"eval_wer": 0.39972075556482806,
"step": 35200
},
{
"epoch": 1.3910173287751975,
"eval_loss": 0.48228171467781067,
"eval_runtime": 145.956,
"eval_samples_per_second": 38.751,
"eval_steps_per_second": 4.844,
"eval_wer": 0.38492401020686556,
"step": 35400
},
{
"epoch": 1.3949467562576132,
"grad_norm": 0.6772143244743347,
"learning_rate": 8.796969696969696e-05,
"loss": 0.2772,
"step": 35500
},
{
"epoch": 1.398876183740029,
"eval_loss": 0.4848904013633728,
"eval_runtime": 145.8656,
"eval_samples_per_second": 38.775,
"eval_steps_per_second": 4.847,
"eval_wer": 0.39121503426361315,
"step": 35600
},
{
"epoch": 1.4067350387048607,
"eval_loss": 0.4844968020915985,
"eval_runtime": 146.3634,
"eval_samples_per_second": 38.644,
"eval_steps_per_second": 4.83,
"eval_wer": 0.3881658134197814,
"step": 35800
},
{
"epoch": 1.4145938936696925,
"grad_norm": 1.0455658435821533,
"learning_rate": 8.493939393939393e-05,
"loss": 0.281,
"step": 36000
},
{
"epoch": 1.4145938936696925,
"eval_loss": 0.482947438955307,
"eval_runtime": 145.7025,
"eval_samples_per_second": 38.819,
"eval_steps_per_second": 4.852,
"eval_wer": 0.38418577779204316,
"step": 36000
},
{
"epoch": 1.422452748634524,
"eval_loss": 0.48147863149642944,
"eval_runtime": 146.3811,
"eval_samples_per_second": 38.639,
"eval_steps_per_second": 4.83,
"eval_wer": 0.3859190191138001,
"step": 36200
},
{
"epoch": 1.4303116035993555,
"eval_loss": 0.4771769642829895,
"eval_runtime": 145.8053,
"eval_samples_per_second": 38.791,
"eval_steps_per_second": 4.849,
"eval_wer": 0.38075139221004317,
"step": 36400
},
{
"epoch": 1.4342410310817715,
"grad_norm": 0.6518095135688782,
"learning_rate": 8.19090909090909e-05,
"loss": 0.2697,
"step": 36500
},
{
"epoch": 1.4381704585641872,
"eval_loss": 0.48701608180999756,
"eval_runtime": 145.4126,
"eval_samples_per_second": 38.896,
"eval_steps_per_second": 4.862,
"eval_wer": 0.3914236651634543,
"step": 36600
},
{
"epoch": 1.4460293135290188,
"eval_loss": 0.47700512409210205,
"eval_runtime": 145.4281,
"eval_samples_per_second": 38.892,
"eval_steps_per_second": 4.862,
"eval_wer": 0.38662515446710854,
"step": 36800
},
{
"epoch": 1.4538881684938505,
"grad_norm": 2.1603991985321045,
"learning_rate": 7.887878787878789e-05,
"loss": 0.2766,
"step": 37000
},
{
"epoch": 1.4538881684938505,
"eval_loss": 0.4786865711212158,
"eval_runtime": 145.7912,
"eval_samples_per_second": 38.795,
"eval_steps_per_second": 4.849,
"eval_wer": 0.38209946879363194,
"step": 37000
},
{
"epoch": 1.461747023458682,
"eval_loss": 0.4793393015861511,
"eval_runtime": 145.5675,
"eval_samples_per_second": 38.855,
"eval_steps_per_second": 4.857,
"eval_wer": 0.38099212017139833,
"step": 37200
},
{
"epoch": 1.4696058784235138,
"eval_loss": 0.4738729000091553,
"eval_runtime": 145.8624,
"eval_samples_per_second": 38.776,
"eval_steps_per_second": 4.847,
"eval_wer": 0.3803341304103609,
"step": 37400
},
{
"epoch": 1.4735353059059295,
"grad_norm": 1.9566117525100708,
"learning_rate": 7.585454545454545e-05,
"loss": 0.2905,
"step": 37500
},
{
"epoch": 1.4774647333883453,
"eval_loss": 0.47245293855667114,
"eval_runtime": 145.8323,
"eval_samples_per_second": 38.784,
"eval_steps_per_second": 4.848,
"eval_wer": 0.3811205084174544,
"step": 37600
},
{
"epoch": 1.485323588353177,
"eval_loss": 0.47267088294029236,
"eval_runtime": 145.9296,
"eval_samples_per_second": 38.758,
"eval_steps_per_second": 4.845,
"eval_wer": 0.37827991847346376,
"step": 37800
},
{
"epoch": 1.4931824433180085,
"grad_norm": 2.518251895904541,
"learning_rate": 7.282424242424242e-05,
"loss": 0.2799,
"step": 38000
},
{
"epoch": 1.4931824433180085,
"eval_loss": 0.47050511837005615,
"eval_runtime": 146.8142,
"eval_samples_per_second": 38.525,
"eval_steps_per_second": 4.816,
"eval_wer": 0.3776700743046974,
"step": 38000
},
{
"epoch": 1.50104129828284,
"eval_loss": 0.4659024178981781,
"eval_runtime": 145.787,
"eval_samples_per_second": 38.796,
"eval_steps_per_second": 4.85,
"eval_wer": 0.37508626085281893,
"step": 38200
},
{
"epoch": 1.5089001532476718,
"eval_loss": 0.46910360455513,
"eval_runtime": 146.8808,
"eval_samples_per_second": 38.507,
"eval_steps_per_second": 4.813,
"eval_wer": 0.37429988284572546,
"step": 38400
},
{
"epoch": 1.5128295807300876,
"grad_norm": 1.3675510883331299,
"learning_rate": 6.979393939393939e-05,
"loss": 0.267,
"step": 38500
},
{
"epoch": 1.5167590082125035,
"eval_loss": 0.4690033495426178,
"eval_runtime": 145.2501,
"eval_samples_per_second": 38.94,
"eval_steps_per_second": 4.867,
"eval_wer": 0.3663558601210059,
"step": 38600
},
{
"epoch": 1.524617863177335,
"eval_loss": 0.4632550776004791,
"eval_runtime": 146.3252,
"eval_samples_per_second": 38.654,
"eval_steps_per_second": 4.832,
"eval_wer": 0.36810514997351995,
"step": 38800
},
{
"epoch": 1.5324767181421666,
"grad_norm": 1.2868680953979492,
"learning_rate": 6.676969696969697e-05,
"loss": 0.2632,
"step": 39000
},
{
"epoch": 1.5324767181421666,
"eval_loss": 0.4650620222091675,
"eval_runtime": 146.2691,
"eval_samples_per_second": 38.668,
"eval_steps_per_second": 4.834,
"eval_wer": 0.37255059299321147,
"step": 39000
},
{
"epoch": 1.5403355731069983,
"eval_loss": 0.46896418929100037,
"eval_runtime": 145.9823,
"eval_samples_per_second": 38.744,
"eval_steps_per_second": 4.843,
"eval_wer": 0.3673990146202115,
"step": 39200
},
{
"epoch": 1.54819442807183,
"eval_loss": 0.4612589180469513,
"eval_runtime": 145.4614,
"eval_samples_per_second": 38.883,
"eval_steps_per_second": 4.86,
"eval_wer": 0.3714913899632489,
"step": 39400
},
{
"epoch": 1.5521238555542456,
"grad_norm": 2.942875623703003,
"learning_rate": 6.373939393939393e-05,
"loss": 0.2716,
"step": 39500
},
{
"epoch": 1.5560532830366616,
"eval_loss": 0.4654790461063385,
"eval_runtime": 146.1694,
"eval_samples_per_second": 38.695,
"eval_steps_per_second": 4.837,
"eval_wer": 0.36967790598770683,
"step": 39600
},
{
"epoch": 1.563912138001493,
"eval_loss": 0.4596673846244812,
"eval_runtime": 145.7967,
"eval_samples_per_second": 38.794,
"eval_steps_per_second": 4.849,
"eval_wer": 0.364799152637576,
"step": 39800
},
{
"epoch": 1.5717709929663248,
"grad_norm": 0.4809035658836365,
"learning_rate": 6.07090909090909e-05,
"loss": 0.2651,
"step": 40000
},
{
"epoch": 1.5717709929663248,
"eval_loss": 0.4549534320831299,
"eval_runtime": 146.3998,
"eval_samples_per_second": 38.634,
"eval_steps_per_second": 4.829,
"eval_wer": 0.36619537481343584,
"step": 40000
},
{
"epoch": 1.5796298479311566,
"eval_loss": 0.4538833498954773,
"eval_runtime": 146.0948,
"eval_samples_per_second": 38.715,
"eval_steps_per_second": 4.839,
"eval_wer": 0.3676397425815667,
"step": 40200
},
{
"epoch": 1.587488702895988,
"eval_loss": 0.4542824625968933,
"eval_runtime": 146.3082,
"eval_samples_per_second": 38.658,
"eval_steps_per_second": 4.832,
"eval_wer": 0.36746320874323957,
"step": 40400
},
{
"epoch": 1.5914181303784039,
"grad_norm": 1.2710328102111816,
"learning_rate": 5.767878787878788e-05,
"loss": 0.2659,
"step": 40500
},
{
"epoch": 1.5953475578608196,
"eval_loss": 0.45555397868156433,
"eval_runtime": 146.1729,
"eval_samples_per_second": 38.694,
"eval_steps_per_second": 4.837,
"eval_wer": 0.3622795333087256,
"step": 40600
},
{
"epoch": 1.6032064128256514,
"eval_loss": 0.463294118642807,
"eval_runtime": 146.3048,
"eval_samples_per_second": 38.659,
"eval_steps_per_second": 4.832,
"eval_wer": 0.36849031471168814,
"step": 40800
},
{
"epoch": 1.611065267790483,
"grad_norm": 1.9250500202178955,
"learning_rate": 5.4660606060606054e-05,
"loss": 0.2559,
"step": 41000
},
{
"epoch": 1.611065267790483,
"eval_loss": 0.4529285132884979,
"eval_runtime": 146.9183,
"eval_samples_per_second": 38.498,
"eval_steps_per_second": 4.812,
"eval_wer": 0.36083516554059475,
"step": 41000
},
{
"epoch": 1.6189241227553146,
"eval_loss": 0.45345816016197205,
"eval_runtime": 145.5972,
"eval_samples_per_second": 38.847,
"eval_steps_per_second": 4.856,
"eval_wer": 0.36385228932291247,
"step": 41200
},
{
"epoch": 1.6267829777201461,
"eval_loss": 0.4511209726333618,
"eval_runtime": 146.7532,
"eval_samples_per_second": 38.541,
"eval_steps_per_second": 4.818,
"eval_wer": 0.3637078525460994,
"step": 41400
},
{
"epoch": 1.630712405202562,
"grad_norm": 0.9593771696090698,
"learning_rate": 5.1630303030303025e-05,
"loss": 0.2629,
"step": 41500
},
{
"epoch": 1.6346418326849776,
"eval_loss": 0.45563140511512756,
"eval_runtime": 146.0124,
"eval_samples_per_second": 38.736,
"eval_steps_per_second": 4.842,
"eval_wer": 0.36049814639469757,
"step": 41600
},
{
"epoch": 1.6425006876498094,
"eval_loss": 0.457055002450943,
"eval_runtime": 147.3584,
"eval_samples_per_second": 38.383,
"eval_steps_per_second": 4.798,
"eval_wer": 0.36390043491518353,
"step": 41800
},
{
"epoch": 1.6503595426146411,
"grad_norm": 0.9599024653434753,
"learning_rate": 4.8599999999999995e-05,
"loss": 0.259,
"step": 42000
},
{
"epoch": 1.6503595426146411,
"eval_loss": 0.46201661229133606,
"eval_runtime": 146.8464,
"eval_samples_per_second": 38.516,
"eval_steps_per_second": 4.815,
"eval_wer": 0.36903596475742645,
"step": 42000
},
{
"epoch": 1.6582183975794726,
"eval_loss": 0.45499464869499207,
"eval_runtime": 146.9092,
"eval_samples_per_second": 38.5,
"eval_steps_per_second": 4.812,
"eval_wer": 0.36348317311550127,
"step": 42200
},
{
"epoch": 1.6660772525443042,
"eval_loss": 0.45219454169273376,
"eval_runtime": 146.8863,
"eval_samples_per_second": 38.506,
"eval_steps_per_second": 4.813,
"eval_wer": 0.3584278859270434,
"step": 42400
},
{
"epoch": 1.6700066800267201,
"grad_norm": 1.0676679611206055,
"learning_rate": 4.5569696969696966e-05,
"loss": 0.2594,
"step": 42500
},
{
"epoch": 1.673936107509136,
"eval_loss": 0.4494900703430176,
"eval_runtime": 147.3169,
"eval_samples_per_second": 38.393,
"eval_steps_per_second": 4.799,
"eval_wer": 0.3589253903805107,
"step": 42600
},
{
"epoch": 1.6817949624739676,
"eval_loss": 0.4453260898590088,
"eval_runtime": 146.8159,
"eval_samples_per_second": 38.524,
"eval_steps_per_second": 4.816,
"eval_wer": 0.3562131886825761,
"step": 42800
},
{
"epoch": 1.6896538174387992,
"grad_norm": 0.4820586144924164,
"learning_rate": 4.253939393939394e-05,
"loss": 0.2538,
"step": 43000
},
{
"epoch": 1.6896538174387992,
"eval_loss": 0.4438420832157135,
"eval_runtime": 147.9055,
"eval_samples_per_second": 38.241,
"eval_steps_per_second": 4.78,
"eval_wer": 0.3555391503907817,
"step": 43000
},
{
"epoch": 1.6975126724036307,
"eval_loss": 0.4494447708129883,
"eval_runtime": 146.855,
"eval_samples_per_second": 38.514,
"eval_steps_per_second": 4.814,
"eval_wer": 0.3566946446052864,
"step": 43200
},
{
"epoch": 1.7053715273684624,
"eval_loss": 0.4443654716014862,
"eval_runtime": 146.8467,
"eval_samples_per_second": 38.516,
"eval_steps_per_second": 4.815,
"eval_wer": 0.3537898605382677,
"step": 43400
},
{
"epoch": 1.7093009548508782,
"grad_norm": 0.7214144468307495,
"learning_rate": 3.950909090909091e-05,
"loss": 0.2512,
"step": 43500
},
{
"epoch": 1.7132303823332942,
"eval_loss": 0.4454784691333771,
"eval_runtime": 147.1352,
"eval_samples_per_second": 38.441,
"eval_steps_per_second": 4.805,
"eval_wer": 0.3529713854696602,
"step": 43600
},
{
"epoch": 1.7210892372981257,
"eval_loss": 0.4453714191913605,
"eval_runtime": 147.5374,
"eval_samples_per_second": 38.336,
"eval_steps_per_second": 4.792,
"eval_wer": 0.3522010559933238,
"step": 43800
},
{
"epoch": 1.7289480922629572,
"grad_norm": 1.9711872339248657,
"learning_rate": 3.647878787878787e-05,
"loss": 0.2358,
"step": 44000
},
{
"epoch": 1.7289480922629572,
"eval_loss": 0.44450756907463074,
"eval_runtime": 146.8893,
"eval_samples_per_second": 38.505,
"eval_steps_per_second": 4.813,
"eval_wer": 0.3519763765627257,
"step": 44000
},
{
"epoch": 1.736806947227789,
"eval_loss": 0.44162794947624207,
"eval_runtime": 147.6037,
"eval_samples_per_second": 38.319,
"eval_steps_per_second": 4.79,
"eval_wer": 0.34998635874885653,
"step": 44200
},
{
"epoch": 1.7446658021926207,
"eval_loss": 0.44202086329460144,
"eval_runtime": 148.0767,
"eval_samples_per_second": 38.196,
"eval_steps_per_second": 4.775,
"eval_wer": 0.34897530131116494,
"step": 44400
},
{
"epoch": 1.7485952296750362,
"grad_norm": 1.1429784297943115,
"learning_rate": 3.344848484848484e-05,
"loss": 0.2418,
"step": 44500
},
{
"epoch": 1.7525246571574522,
"eval_loss": 0.43861278891563416,
"eval_runtime": 147.9549,
"eval_samples_per_second": 38.228,
"eval_steps_per_second": 4.778,
"eval_wer": 0.34790004975044536,
"step": 44600
},
{
"epoch": 1.7603835121222837,
"eval_loss": 0.4354783296585083,
"eval_runtime": 149.8154,
"eval_samples_per_second": 37.753,
"eval_steps_per_second": 4.719,
"eval_wer": 0.3460705172441463,
"step": 44800
},
{
"epoch": 1.7682423670871152,
"grad_norm": 1.684985637664795,
"learning_rate": 3.0418181818181817e-05,
"loss": 0.2421,
"step": 45000
},
{
"epoch": 1.7682423670871152,
"eval_loss": 0.43855908513069153,
"eval_runtime": 148.5791,
"eval_samples_per_second": 38.067,
"eval_steps_per_second": 4.758,
"eval_wer": 0.34372743175362297,
"step": 45000
},
{
"epoch": 1.776101222051947,
"eval_loss": 0.4347515106201172,
"eval_runtime": 147.9309,
"eval_samples_per_second": 38.234,
"eval_steps_per_second": 4.779,
"eval_wer": 0.3458297892827912,
"step": 45200
},
{
"epoch": 1.7839600770167787,
"eval_loss": 0.43350183963775635,
"eval_runtime": 148.2161,
"eval_samples_per_second": 38.16,
"eval_steps_per_second": 4.77,
"eval_wer": 0.3435348493845388,
"step": 45400
},
{
"epoch": 1.7878895044991945,
"grad_norm": 2.4373562335968018,
"learning_rate": 2.7387878787878784e-05,
"loss": 0.2418,
"step": 45500
},
{
"epoch": 1.7918189319816102,
"eval_loss": 0.43087294697761536,
"eval_runtime": 146.7738,
"eval_samples_per_second": 38.535,
"eval_steps_per_second": 4.817,
"eval_wer": 0.3443693729839033,
"step": 45600
},
{
"epoch": 1.7996777869464418,
"eval_loss": 0.43208202719688416,
"eval_runtime": 147.2129,
"eval_samples_per_second": 38.421,
"eval_steps_per_second": 4.803,
"eval_wer": 0.34249169488533326,
"step": 45800
},
{
"epoch": 1.8075366419112735,
"grad_norm": 1.2847892045974731,
"learning_rate": 2.4357575757575755e-05,
"loss": 0.2424,
"step": 46000
},
{
"epoch": 1.8075366419112735,
"eval_loss": 0.42999544739723206,
"eval_runtime": 147.0735,
"eval_samples_per_second": 38.457,
"eval_steps_per_second": 4.807,
"eval_wer": 0.34075845356357626,
"step": 46000
},
{
"epoch": 1.8153954968761052,
"eval_loss": 0.4301421046257019,
"eval_runtime": 146.951,
"eval_samples_per_second": 38.489,
"eval_steps_per_second": 4.811,
"eval_wer": 0.34231516104700616,
"step": 46200
},
{
"epoch": 1.8232543518409368,
"eval_loss": 0.4339451491832733,
"eval_runtime": 146.5189,
"eval_samples_per_second": 38.603,
"eval_steps_per_second": 4.825,
"eval_wer": 0.3407424050328192,
"step": 46400
},
{
"epoch": 1.8271837793233527,
"grad_norm": 7.262228965759277,
"learning_rate": 2.133333333333333e-05,
"loss": 0.228,
"step": 46500
},
{
"epoch": 1.8311132068057683,
"eval_loss": 0.43165403604507446,
"eval_runtime": 146.7443,
"eval_samples_per_second": 38.543,
"eval_steps_per_second": 4.818,
"eval_wer": 0.3428929081542585,
"step": 46600
},
{
"epoch": 1.8389720617706,
"eval_loss": 0.43002423644065857,
"eval_runtime": 146.6705,
"eval_samples_per_second": 38.563,
"eval_steps_per_second": 4.82,
"eval_wer": 0.34332621848469774,
"step": 46800
},
{
"epoch": 1.8468309167354318,
"grad_norm": 0.922248125076294,
"learning_rate": 1.8303030303030302e-05,
"loss": 0.2532,
"step": 47000
},
{
"epoch": 1.8468309167354318,
"eval_loss": 0.42492908239364624,
"eval_runtime": 147.1617,
"eval_samples_per_second": 38.434,
"eval_steps_per_second": 4.804,
"eval_wer": 0.3439360626534641,
"step": 47000
},
{
"epoch": 1.8546897717002633,
"eval_loss": 0.42566677927970886,
"eval_runtime": 147.1363,
"eval_samples_per_second": 38.441,
"eval_steps_per_second": 4.805,
"eval_wer": 0.3430373449310716,
"step": 47200
},
{
"epoch": 1.8625486266650948,
"eval_loss": 0.42639264464378357,
"eval_runtime": 147.0021,
"eval_samples_per_second": 38.476,
"eval_steps_per_second": 4.809,
"eval_wer": 0.3408226476866043,
"step": 47400
},
{
"epoch": 1.8664780541475108,
"grad_norm": 0.7899935841560364,
"learning_rate": 1.5272727272727273e-05,
"loss": 0.2347,
"step": 47500
},
{
"epoch": 1.8704074816299265,
"eval_loss": 0.4254419207572937,
"eval_runtime": 146.4448,
"eval_samples_per_second": 38.622,
"eval_steps_per_second": 4.828,
"eval_wer": 0.3408868418096323,
"step": 47600
},
{
"epoch": 1.8782663365947583,
"eval_loss": 0.423650860786438,
"eval_runtime": 147.0702,
"eval_samples_per_second": 38.458,
"eval_steps_per_second": 4.807,
"eval_wer": 0.3391215034263613,
"step": 47800
},
{
"epoch": 1.8861251915595898,
"grad_norm": 1.1323833465576172,
"learning_rate": 1.2242424242424242e-05,
"loss": 0.2265,
"step": 48000
},
{
"epoch": 1.8861251915595898,
"eval_loss": 0.4246509373188019,
"eval_runtime": 147.0222,
"eval_samples_per_second": 38.47,
"eval_steps_per_second": 4.809,
"eval_wer": 0.33952271669528655,
"step": 48000
},
{
"epoch": 1.8939840465244213,
"eval_loss": 0.42534753680229187,
"eval_runtime": 146.8715,
"eval_samples_per_second": 38.51,
"eval_steps_per_second": 4.814,
"eval_wer": 0.3389128725265202,
"step": 48200
},
{
"epoch": 1.901842901489253,
"eval_loss": 0.4245891273021698,
"eval_runtime": 146.4129,
"eval_samples_per_second": 38.63,
"eval_steps_per_second": 4.829,
"eval_wer": 0.33902521224181925,
"step": 48400
},
{
"epoch": 1.9057723289716688,
"grad_norm": 2.10141658782959,
"learning_rate": 9.212121212121211e-06,
"loss": 0.2262,
"step": 48500
},
{
"epoch": 1.9097017564540848,
"eval_loss": 0.4226687252521515,
"eval_runtime": 147.045,
"eval_samples_per_second": 38.464,
"eval_steps_per_second": 4.808,
"eval_wer": 0.3378536694965576,
"step": 48600
},
{
"epoch": 1.9175606114189163,
"eval_loss": 0.4228062033653259,
"eval_runtime": 147.4189,
"eval_samples_per_second": 38.367,
"eval_steps_per_second": 4.796,
"eval_wer": 0.33892892105727723,
"step": 48800
},
{
"epoch": 1.9254194663837478,
"grad_norm": 0.8046126365661621,
"learning_rate": 6.181818181818182e-06,
"loss": 0.2358,
"step": 49000
},
{
"epoch": 1.9254194663837478,
"eval_loss": 0.4225420653820038,
"eval_runtime": 147.7497,
"eval_samples_per_second": 38.281,
"eval_steps_per_second": 4.785,
"eval_wer": 0.3391054548956043,
"step": 49000
},
{
"epoch": 1.9332783213485794,
"eval_loss": 0.4224160313606262,
"eval_runtime": 147.1221,
"eval_samples_per_second": 38.444,
"eval_steps_per_second": 4.806,
"eval_wer": 0.33902521224181925,
"step": 49200
},
{
"epoch": 1.941137176313411,
"eval_loss": 0.4214831590652466,
"eval_runtime": 147.8229,
"eval_samples_per_second": 38.262,
"eval_steps_per_second": 4.783,
"eval_wer": 0.3389931151803052,
"step": 49400
},
{
"epoch": 1.9450666037958269,
"grad_norm": 1.517034888267517,
"learning_rate": 3.1575757575757576e-06,
"loss": 0.231,
"step": 49500
},
{
"epoch": 1.9489960312782428,
"eval_loss": 0.4215412437915802,
"eval_runtime": 147.4583,
"eval_samples_per_second": 38.357,
"eval_steps_per_second": 4.795,
"eval_wer": 0.3399560270257258,
"step": 49600
},
{
"epoch": 1.9568548862430744,
"eval_loss": 0.4211778938770294,
"eval_runtime": 146.928,
"eval_samples_per_second": 38.495,
"eval_steps_per_second": 4.812,
"eval_wer": 0.33933013432620246,
"step": 49800
},
{
"epoch": 1.9647137412079059,
"grad_norm": 2.9327681064605713,
"learning_rate": 1.2727272727272726e-07,
"loss": 0.2331,
"step": 50000
},
{
"epoch": 1.9647137412079059,
"eval_loss": 0.4211583733558655,
"eval_runtime": 147.1945,
"eval_samples_per_second": 38.425,
"eval_steps_per_second": 4.803,
"eval_wer": 0.33939432844923045,
"step": 50000
},
{
"epoch": 1.9647137412079059,
"step": 50000,
"total_flos": 6.219831968409632e+19,
"train_loss": 0.4413083312988281,
"train_runtime": 56545.9703,
"train_samples_per_second": 7.074,
"train_steps_per_second": 0.884
}
],
"logging_steps": 500,
"max_steps": 50000,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 400,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 6.219831968409632e+19,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}