|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.8714672861014323, |
|
"eval_steps": 100, |
|
"global_step": 20000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.019357336430507164, |
|
"eval_loss": 3.5565404891967773, |
|
"eval_runtime": 151.5266, |
|
"eval_samples_per_second": 37.327, |
|
"eval_steps_per_second": 4.666, |
|
"eval_wer": 1.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03871467286101433, |
|
"eval_loss": 3.0301756858825684, |
|
"eval_runtime": 150.582, |
|
"eval_samples_per_second": 37.561, |
|
"eval_steps_per_second": 4.695, |
|
"eval_wer": 1.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05807200929152149, |
|
"eval_loss": 2.9460911750793457, |
|
"eval_runtime": 148.9065, |
|
"eval_samples_per_second": 37.984, |
|
"eval_steps_per_second": 4.748, |
|
"eval_wer": 1.0, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.07742934572202866, |
|
"eval_loss": 1.8142520189285278, |
|
"eval_runtime": 149.8655, |
|
"eval_samples_per_second": 37.741, |
|
"eval_steps_per_second": 4.718, |
|
"eval_wer": 0.940732775914365, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.09678668215253582, |
|
"grad_norm": 3.132490396499634, |
|
"learning_rate": 0.00029759999999999997, |
|
"loss": 3.9521, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.09678668215253582, |
|
"eval_loss": 1.4195518493652344, |
|
"eval_runtime": 150.5171, |
|
"eval_samples_per_second": 37.577, |
|
"eval_steps_per_second": 4.697, |
|
"eval_wer": 0.8693007655149171, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.11614401858304298, |
|
"eval_loss": 1.16689133644104, |
|
"eval_runtime": 150.5387, |
|
"eval_samples_per_second": 37.572, |
|
"eval_steps_per_second": 4.696, |
|
"eval_wer": 0.8055239042865626, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.13550135501355012, |
|
"eval_loss": 1.0756505727767944, |
|
"eval_runtime": 151.2385, |
|
"eval_samples_per_second": 37.398, |
|
"eval_steps_per_second": 4.675, |
|
"eval_wer": 0.7596251063215163, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.1548586914440573, |
|
"eval_loss": 0.9944618344306946, |
|
"eval_runtime": 151.1646, |
|
"eval_samples_per_second": 37.416, |
|
"eval_steps_per_second": 4.677, |
|
"eval_wer": 0.7223925149652549, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.17421602787456447, |
|
"eval_loss": 0.9381263256072998, |
|
"eval_runtime": 151.6289, |
|
"eval_samples_per_second": 37.302, |
|
"eval_steps_per_second": 4.663, |
|
"eval_wer": 0.6870857472998347, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.19357336430507163, |
|
"grad_norm": 7.335289001464844, |
|
"learning_rate": 0.0002844, |
|
"loss": 1.0266, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.19357336430507163, |
|
"eval_loss": 0.8977694511413574, |
|
"eval_runtime": 156.0202, |
|
"eval_samples_per_second": 36.252, |
|
"eval_steps_per_second": 4.531, |
|
"eval_wer": 0.661472292211648, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.2129307007355788, |
|
"eval_loss": 0.8770694136619568, |
|
"eval_runtime": 151.6589, |
|
"eval_samples_per_second": 37.294, |
|
"eval_steps_per_second": 4.662, |
|
"eval_wer": 0.6450385967164706, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.23228803716608595, |
|
"eval_loss": 0.851553201675415, |
|
"eval_runtime": 151.5945, |
|
"eval_samples_per_second": 37.31, |
|
"eval_steps_per_second": 4.664, |
|
"eval_wer": 0.640432668389209, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.2516453735965931, |
|
"eval_loss": 0.8273979425430298, |
|
"eval_runtime": 151.4524, |
|
"eval_samples_per_second": 37.345, |
|
"eval_steps_per_second": 4.668, |
|
"eval_wer": 0.6138081558633307, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.27100271002710025, |
|
"eval_loss": 0.7992698550224304, |
|
"eval_runtime": 152.8076, |
|
"eval_samples_per_second": 37.014, |
|
"eval_steps_per_second": 4.627, |
|
"eval_wer": 0.596973247099228, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.29036004645760743, |
|
"grad_norm": 4.0737223625183105, |
|
"learning_rate": 0.00026861052631578947, |
|
"loss": 0.8454, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.29036004645760743, |
|
"eval_loss": 0.7768516540527344, |
|
"eval_runtime": 152.3743, |
|
"eval_samples_per_second": 37.119, |
|
"eval_steps_per_second": 4.64, |
|
"eval_wer": 0.5887563993516394, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.3097173828881146, |
|
"eval_loss": 0.7664207220077515, |
|
"eval_runtime": 154.3668, |
|
"eval_samples_per_second": 36.64, |
|
"eval_steps_per_second": 4.58, |
|
"eval_wer": 0.5997977885124617, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.32907471931862176, |
|
"eval_loss": 0.7400562763214111, |
|
"eval_runtime": 153.7228, |
|
"eval_samples_per_second": 36.793, |
|
"eval_steps_per_second": 4.599, |
|
"eval_wer": 0.5592110542279854, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.34843205574912894, |
|
"eval_loss": 0.746478796005249, |
|
"eval_runtime": 151.7535, |
|
"eval_samples_per_second": 37.271, |
|
"eval_steps_per_second": 4.659, |
|
"eval_wer": 0.5650206223620228, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.3677893921796361, |
|
"eval_loss": 0.7252949476242065, |
|
"eval_runtime": 151.7548, |
|
"eval_samples_per_second": 37.271, |
|
"eval_steps_per_second": 4.659, |
|
"eval_wer": 0.5791272808974338, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.38714672861014326, |
|
"grad_norm": 2.4802448749542236, |
|
"learning_rate": 0.0002528210526315789, |
|
"loss": 0.7537, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.38714672861014326, |
|
"eval_loss": 0.7039346098899841, |
|
"eval_runtime": 152.7969, |
|
"eval_samples_per_second": 37.016, |
|
"eval_steps_per_second": 4.627, |
|
"eval_wer": 0.5343518800853782, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.4065040650406504, |
|
"eval_loss": 0.6932350397109985, |
|
"eval_runtime": 152.4406, |
|
"eval_samples_per_second": 37.103, |
|
"eval_steps_per_second": 4.638, |
|
"eval_wer": 0.5168429330294811, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.4258614014711576, |
|
"eval_loss": 0.696869432926178, |
|
"eval_runtime": 153.0527, |
|
"eval_samples_per_second": 36.955, |
|
"eval_steps_per_second": 4.619, |
|
"eval_wer": 0.5364381890837894, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.4452187379016647, |
|
"eval_loss": 0.6781283617019653, |
|
"eval_runtime": 152.1378, |
|
"eval_samples_per_second": 37.177, |
|
"eval_steps_per_second": 4.647, |
|
"eval_wer": 0.5173725345444624, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.4645760743321719, |
|
"eval_loss": 0.6760829091072083, |
|
"eval_runtime": 151.9712, |
|
"eval_samples_per_second": 37.218, |
|
"eval_steps_per_second": 4.652, |
|
"eval_wer": 0.5050312143923223, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.48393341076267904, |
|
"grad_norm": 3.791292667388916, |
|
"learning_rate": 0.0002370315789473684, |
|
"loss": 0.681, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.48393341076267904, |
|
"eval_loss": 0.6720712780952454, |
|
"eval_runtime": 152.2414, |
|
"eval_samples_per_second": 37.152, |
|
"eval_steps_per_second": 4.644, |
|
"eval_wer": 0.528718845789668, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.5032907471931862, |
|
"eval_loss": 0.6598270535469055, |
|
"eval_runtime": 151.7192, |
|
"eval_samples_per_second": 37.279, |
|
"eval_steps_per_second": 4.66, |
|
"eval_wer": 0.5195069891351447, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.5226480836236934, |
|
"eval_loss": 0.6555168628692627, |
|
"eval_runtime": 152.5678, |
|
"eval_samples_per_second": 37.072, |
|
"eval_steps_per_second": 4.634, |
|
"eval_wer": 0.4975846961210701, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.5420054200542005, |
|
"eval_loss": 0.6535276770591736, |
|
"eval_runtime": 152.5246, |
|
"eval_samples_per_second": 37.083, |
|
"eval_steps_per_second": 4.635, |
|
"eval_wer": 0.49936608303509816, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.5613627564847077, |
|
"eval_loss": 0.6258506178855896, |
|
"eval_runtime": 151.843, |
|
"eval_samples_per_second": 37.249, |
|
"eval_steps_per_second": 4.656, |
|
"eval_wer": 0.48192133010222915, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.5807200929152149, |
|
"grad_norm": 9.4619779586792, |
|
"learning_rate": 0.00022124210526315786, |
|
"loss": 0.6737, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.5807200929152149, |
|
"eval_loss": 0.629943311214447, |
|
"eval_runtime": 151.8389, |
|
"eval_samples_per_second": 37.25, |
|
"eval_steps_per_second": 4.656, |
|
"eval_wer": 0.48022018584198617, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.6000774293457221, |
|
"eval_loss": 0.6378594636917114, |
|
"eval_runtime": 151.6255, |
|
"eval_samples_per_second": 37.302, |
|
"eval_steps_per_second": 4.663, |
|
"eval_wer": 0.4893197027812104, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.6194347657762292, |
|
"eval_loss": 0.6225672364234924, |
|
"eval_runtime": 153.0144, |
|
"eval_samples_per_second": 36.964, |
|
"eval_steps_per_second": 4.62, |
|
"eval_wer": 0.4806053505801544, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.6387921022067363, |
|
"eval_loss": 0.6088670492172241, |
|
"eval_runtime": 152.2222, |
|
"eval_samples_per_second": 37.156, |
|
"eval_steps_per_second": 4.645, |
|
"eval_wer": 0.4627112387860891, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.6581494386372435, |
|
"eval_loss": 0.6028585433959961, |
|
"eval_runtime": 153.0615, |
|
"eval_samples_per_second": 36.952, |
|
"eval_steps_per_second": 4.619, |
|
"eval_wer": 0.47354399704707034, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.6775067750677507, |
|
"grad_norm": 3.4705822467803955, |
|
"learning_rate": 0.00020545263157894736, |
|
"loss": 0.6419, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.6775067750677507, |
|
"eval_loss": 0.5871421694755554, |
|
"eval_runtime": 152.5739, |
|
"eval_samples_per_second": 37.071, |
|
"eval_steps_per_second": 4.634, |
|
"eval_wer": 0.4592126590810611, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.6968641114982579, |
|
"eval_loss": 0.6001027226448059, |
|
"eval_runtime": 152.1697, |
|
"eval_samples_per_second": 37.169, |
|
"eval_steps_per_second": 4.646, |
|
"eval_wer": 0.4610742886488742, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.716221447928765, |
|
"eval_loss": 0.5848923921585083, |
|
"eval_runtime": 152.6563, |
|
"eval_samples_per_second": 37.051, |
|
"eval_steps_per_second": 4.631, |
|
"eval_wer": 0.4472565036670893, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.7355787843592722, |
|
"eval_loss": 0.5923960208892822, |
|
"eval_runtime": 152.6559, |
|
"eval_samples_per_second": 37.051, |
|
"eval_steps_per_second": 4.631, |
|
"eval_wer": 0.46377044181605176, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.7549361207897793, |
|
"eval_loss": 0.5767965316772461, |
|
"eval_runtime": 152.1652, |
|
"eval_samples_per_second": 37.17, |
|
"eval_steps_per_second": 4.646, |
|
"eval_wer": 0.4584904751969957, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.7742934572202865, |
|
"grad_norm": 3.628082275390625, |
|
"learning_rate": 0.00018966315789473683, |
|
"loss": 0.6183, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.7742934572202865, |
|
"eval_loss": 0.5672534704208374, |
|
"eval_runtime": 152.4329, |
|
"eval_samples_per_second": 37.105, |
|
"eval_steps_per_second": 4.638, |
|
"eval_wer": 0.44531463144549116, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.7936507936507936, |
|
"eval_loss": 0.5575382113456726, |
|
"eval_runtime": 152.2388, |
|
"eval_samples_per_second": 37.152, |
|
"eval_steps_per_second": 4.644, |
|
"eval_wer": 0.4451862431994351, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.8130081300813008, |
|
"eval_loss": 0.5631808042526245, |
|
"eval_runtime": 152.7545, |
|
"eval_samples_per_second": 37.027, |
|
"eval_steps_per_second": 4.628, |
|
"eval_wer": 0.4474972316284444, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.832365466511808, |
|
"eval_loss": 0.5498641729354858, |
|
"eval_runtime": 153.7788, |
|
"eval_samples_per_second": 36.78, |
|
"eval_steps_per_second": 4.598, |
|
"eval_wer": 0.44008281041870617, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.8517228029423152, |
|
"eval_loss": 0.5662574172019958, |
|
"eval_runtime": 152.5034, |
|
"eval_samples_per_second": 37.088, |
|
"eval_steps_per_second": 4.636, |
|
"eval_wer": 0.43101539054099597, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.8710801393728222, |
|
"grad_norm": 2.376349925994873, |
|
"learning_rate": 0.0001738736842105263, |
|
"loss": 0.5877, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.8710801393728222, |
|
"eval_loss": 0.5584732294082642, |
|
"eval_runtime": 152.1714, |
|
"eval_samples_per_second": 37.169, |
|
"eval_steps_per_second": 4.646, |
|
"eval_wer": 0.4317215258943044, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.8904374758033294, |
|
"eval_loss": 0.5463821291923523, |
|
"eval_runtime": 152.4923, |
|
"eval_samples_per_second": 37.09, |
|
"eval_steps_per_second": 4.636, |
|
"eval_wer": 0.41997400138017366, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.9097948122338366, |
|
"eval_loss": 0.5381494164466858, |
|
"eval_runtime": 153.2139, |
|
"eval_samples_per_second": 36.916, |
|
"eval_steps_per_second": 4.614, |
|
"eval_wer": 0.4192197204345942, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.9291521486643438, |
|
"eval_loss": 0.5453722476959229, |
|
"eval_runtime": 151.9737, |
|
"eval_samples_per_second": 37.217, |
|
"eval_steps_per_second": 4.652, |
|
"eval_wer": 0.4201986808107718, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.948509485094851, |
|
"eval_loss": 0.5237515568733215, |
|
"eval_runtime": 151.8558, |
|
"eval_samples_per_second": 37.246, |
|
"eval_steps_per_second": 4.656, |
|
"eval_wer": 0.41241514339362234, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.9678668215253581, |
|
"grad_norm": 2.5489518642425537, |
|
"learning_rate": 0.0001581157894736842, |
|
"loss": 0.5621, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.9678668215253581, |
|
"eval_loss": 0.5303541421890259, |
|
"eval_runtime": 152.515, |
|
"eval_samples_per_second": 37.085, |
|
"eval_steps_per_second": 4.636, |
|
"eval_wer": 0.41353854054661293, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.9872241579558653, |
|
"eval_loss": 0.5163344740867615, |
|
"eval_runtime": 156.7945, |
|
"eval_samples_per_second": 36.073, |
|
"eval_steps_per_second": 4.509, |
|
"eval_wer": 0.4061080708061177, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 1.0065814943863725, |
|
"eval_loss": 0.51596599817276, |
|
"eval_runtime": 153.2891, |
|
"eval_samples_per_second": 36.898, |
|
"eval_steps_per_second": 4.612, |
|
"eval_wer": 0.39927139670363176, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 1.0259388308168795, |
|
"eval_loss": 0.5088583827018738, |
|
"eval_runtime": 152.7112, |
|
"eval_samples_per_second": 37.037, |
|
"eval_steps_per_second": 4.63, |
|
"eval_wer": 0.3898509091492674, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 1.0452961672473868, |
|
"eval_loss": 0.5110610723495483, |
|
"eval_runtime": 152.5555, |
|
"eval_samples_per_second": 37.075, |
|
"eval_steps_per_second": 4.634, |
|
"eval_wer": 0.3985652613503234, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 1.064653503677894, |
|
"grad_norm": 1.1362248659133911, |
|
"learning_rate": 0.0001423578947368421, |
|
"loss": 0.4882, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.064653503677894, |
|
"eval_loss": 0.5010027885437012, |
|
"eval_runtime": 152.1249, |
|
"eval_samples_per_second": 37.18, |
|
"eval_steps_per_second": 4.647, |
|
"eval_wer": 0.38574248527547306, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.084010840108401, |
|
"eval_loss": 0.49406561255455017, |
|
"eval_runtime": 151.5623, |
|
"eval_samples_per_second": 37.318, |
|
"eval_steps_per_second": 4.665, |
|
"eval_wer": 0.3858548249907721, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 1.1033681765389083, |
|
"eval_loss": 0.49403733015060425, |
|
"eval_runtime": 152.7631, |
|
"eval_samples_per_second": 37.025, |
|
"eval_steps_per_second": 4.628, |
|
"eval_wer": 0.3813451878480525, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 1.1227255129694154, |
|
"eval_loss": 0.4913772642612457, |
|
"eval_runtime": 152.1406, |
|
"eval_samples_per_second": 37.176, |
|
"eval_steps_per_second": 4.647, |
|
"eval_wer": 0.37815153022740766, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 1.1420828493999227, |
|
"eval_loss": 0.48747047781944275, |
|
"eval_runtime": 151.3195, |
|
"eval_samples_per_second": 37.378, |
|
"eval_steps_per_second": 4.672, |
|
"eval_wer": 0.3745406108070806, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 1.1614401858304297, |
|
"grad_norm": 1.0150744915008545, |
|
"learning_rate": 0.00012656842105263156, |
|
"loss": 0.4569, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.1614401858304297, |
|
"eval_loss": 0.4841971695423126, |
|
"eval_runtime": 151.8567, |
|
"eval_samples_per_second": 37.246, |
|
"eval_steps_per_second": 4.656, |
|
"eval_wer": 0.38071929514852915, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.1807975222609368, |
|
"eval_loss": 0.48611822724342346, |
|
"eval_runtime": 150.971, |
|
"eval_samples_per_second": 37.464, |
|
"eval_steps_per_second": 4.683, |
|
"eval_wer": 0.37370608720771614, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 1.2001548586914441, |
|
"eval_loss": 0.48144644498825073, |
|
"eval_runtime": 151.4548, |
|
"eval_samples_per_second": 37.344, |
|
"eval_steps_per_second": 4.668, |
|
"eval_wer": 0.3760973182905105, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 1.2195121951219512, |
|
"eval_loss": 0.47813892364501953, |
|
"eval_runtime": 151.1935, |
|
"eval_samples_per_second": 37.409, |
|
"eval_steps_per_second": 4.676, |
|
"eval_wer": 0.37409125194588433, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 1.2388695315524583, |
|
"eval_loss": 0.4771001935005188, |
|
"eval_runtime": 151.1732, |
|
"eval_samples_per_second": 37.414, |
|
"eval_steps_per_second": 4.677, |
|
"eval_wer": 0.36815329556579096, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 1.2582268679829656, |
|
"grad_norm": 1.3292571306228638, |
|
"learning_rate": 0.00011077894736842105, |
|
"loss": 0.4416, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.2582268679829656, |
|
"eval_loss": 0.47095027565956116, |
|
"eval_runtime": 151.5037, |
|
"eval_samples_per_second": 37.332, |
|
"eval_steps_per_second": 4.667, |
|
"eval_wer": 0.37338511659257595, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.2775842044134726, |
|
"eval_loss": 0.47211408615112305, |
|
"eval_runtime": 150.9455, |
|
"eval_samples_per_second": 37.47, |
|
"eval_steps_per_second": 4.684, |
|
"eval_wer": 0.3659706953828377, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 1.29694154084398, |
|
"eval_loss": 0.4679400622844696, |
|
"eval_runtime": 151.4191, |
|
"eval_samples_per_second": 37.353, |
|
"eval_steps_per_second": 4.669, |
|
"eval_wer": 0.3638843863844265, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 1.316298877274487, |
|
"eval_loss": 0.46228036284446716, |
|
"eval_runtime": 151.3839, |
|
"eval_samples_per_second": 37.362, |
|
"eval_steps_per_second": 4.67, |
|
"eval_wer": 0.366532393959333, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 1.3356562137049943, |
|
"eval_loss": 0.46108925342559814, |
|
"eval_runtime": 151.8163, |
|
"eval_samples_per_second": 37.256, |
|
"eval_steps_per_second": 4.657, |
|
"eval_wer": 0.3601771757795574, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 1.3550135501355014, |
|
"grad_norm": 0.8062695860862732, |
|
"learning_rate": 9.498947368421052e-05, |
|
"loss": 0.4324, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.3550135501355014, |
|
"eval_loss": 0.46888086199760437, |
|
"eval_runtime": 152.4379, |
|
"eval_samples_per_second": 37.104, |
|
"eval_steps_per_second": 4.638, |
|
"eval_wer": 0.3609314567251368, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.3743708865660085, |
|
"eval_loss": 0.4573034346103668, |
|
"eval_runtime": 151.3077, |
|
"eval_samples_per_second": 37.381, |
|
"eval_steps_per_second": 4.673, |
|
"eval_wer": 0.3602574184333424, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 1.3937282229965158, |
|
"eval_loss": 0.45749789476394653, |
|
"eval_runtime": 151.5824, |
|
"eval_samples_per_second": 37.313, |
|
"eval_steps_per_second": 4.664, |
|
"eval_wer": 0.3546083356068752, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 1.4130855594270229, |
|
"eval_loss": 0.4555954933166504, |
|
"eval_runtime": 151.6035, |
|
"eval_samples_per_second": 37.308, |
|
"eval_steps_per_second": 4.663, |
|
"eval_wer": 0.35836369180401534, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 1.43244289585753, |
|
"eval_loss": 0.4495578408241272, |
|
"eval_runtime": 152.5621, |
|
"eval_samples_per_second": 37.073, |
|
"eval_steps_per_second": 4.634, |
|
"eval_wer": 0.350724591163679, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 1.4518002322880372, |
|
"grad_norm": 0.7916799187660217, |
|
"learning_rate": 7.92e-05, |
|
"loss": 0.4255, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.4518002322880372, |
|
"eval_loss": 0.44609567523002625, |
|
"eval_runtime": 151.8498, |
|
"eval_samples_per_second": 37.247, |
|
"eval_steps_per_second": 4.656, |
|
"eval_wer": 0.34671245847442667, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.4711575687185443, |
|
"eval_loss": 0.44341230392456055, |
|
"eval_runtime": 152.528, |
|
"eval_samples_per_second": 37.082, |
|
"eval_steps_per_second": 4.635, |
|
"eval_wer": 0.3462470510824734, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 1.4905149051490514, |
|
"eval_loss": 0.44362780451774597, |
|
"eval_runtime": 152.5253, |
|
"eval_samples_per_second": 37.082, |
|
"eval_steps_per_second": 4.635, |
|
"eval_wer": 0.3516393574168285, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 1.5098722415795587, |
|
"eval_loss": 0.4406072199344635, |
|
"eval_runtime": 152.4039, |
|
"eval_samples_per_second": 37.112, |
|
"eval_steps_per_second": 4.639, |
|
"eval_wer": 0.34579769222127715, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 1.5292295780100658, |
|
"eval_loss": 0.43874725699424744, |
|
"eval_runtime": 152.6604, |
|
"eval_samples_per_second": 37.05, |
|
"eval_steps_per_second": 4.631, |
|
"eval_wer": 0.3439360626534641, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 1.5485869144405728, |
|
"grad_norm": 0.7491864562034607, |
|
"learning_rate": 6.344210526315788e-05, |
|
"loss": 0.4094, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.5485869144405728, |
|
"eval_loss": 0.43253499269485474, |
|
"eval_runtime": 153.8006, |
|
"eval_samples_per_second": 36.775, |
|
"eval_steps_per_second": 4.597, |
|
"eval_wer": 0.3409831329941744, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.5679442508710801, |
|
"eval_loss": 0.4359830617904663, |
|
"eval_runtime": 153.3674, |
|
"eval_samples_per_second": 36.879, |
|
"eval_steps_per_second": 4.61, |
|
"eval_wer": 0.3419299963088379, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 1.5873015873015874, |
|
"eval_loss": 0.4285949170589447, |
|
"eval_runtime": 153.3711, |
|
"eval_samples_per_second": 36.878, |
|
"eval_steps_per_second": 4.61, |
|
"eval_wer": 0.3377252812505015, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 1.6066589237320945, |
|
"eval_loss": 0.43007034063339233, |
|
"eval_runtime": 152.2201, |
|
"eval_samples_per_second": 37.157, |
|
"eval_steps_per_second": 4.645, |
|
"eval_wer": 0.3335526632536791, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 1.6260162601626016, |
|
"eval_loss": 0.42966797947883606, |
|
"eval_runtime": 152.0163, |
|
"eval_samples_per_second": 37.207, |
|
"eval_steps_per_second": 4.651, |
|
"eval_wer": 0.3322848293238754, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 1.645373596593109, |
|
"grad_norm": 1.047472596168518, |
|
"learning_rate": 4.765263157894736e-05, |
|
"loss": 0.4018, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.645373596593109, |
|
"eval_loss": 0.4270441234111786, |
|
"eval_runtime": 152.8058, |
|
"eval_samples_per_second": 37.014, |
|
"eval_steps_per_second": 4.627, |
|
"eval_wer": 0.3338575853380623, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.664730933023616, |
|
"eval_loss": 0.4267289638519287, |
|
"eval_runtime": 152.5032, |
|
"eval_samples_per_second": 37.088, |
|
"eval_steps_per_second": 4.636, |
|
"eval_wer": 0.3319959557702492, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 1.684088269454123, |
|
"eval_loss": 0.4224300980567932, |
|
"eval_runtime": 152.5862, |
|
"eval_samples_per_second": 37.068, |
|
"eval_steps_per_second": 4.633, |
|
"eval_wer": 0.33275023671582865, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 1.7034456058846303, |
|
"eval_loss": 0.4207303822040558, |
|
"eval_runtime": 154.5205, |
|
"eval_samples_per_second": 36.604, |
|
"eval_steps_per_second": 4.575, |
|
"eval_wer": 0.32984545264881, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 1.7228029423151374, |
|
"eval_loss": 0.4197385013103485, |
|
"eval_runtime": 152.0624, |
|
"eval_samples_per_second": 37.195, |
|
"eval_steps_per_second": 4.649, |
|
"eval_wer": 0.32978125852578194, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 1.7421602787456445, |
|
"grad_norm": 1.4507739543914795, |
|
"learning_rate": 3.189473684210526e-05, |
|
"loss": 0.3899, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.7421602787456445, |
|
"eval_loss": 0.4183507561683655, |
|
"eval_runtime": 157.4278, |
|
"eval_samples_per_second": 35.928, |
|
"eval_steps_per_second": 4.491, |
|
"eval_wer": 0.3258493684903147, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.7615176151761518, |
|
"eval_loss": 0.4164830148220062, |
|
"eval_runtime": 153.0475, |
|
"eval_samples_per_second": 36.956, |
|
"eval_steps_per_second": 4.619, |
|
"eval_wer": 0.3262024361669689, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 1.7808749516066589, |
|
"eval_loss": 0.41182050108909607, |
|
"eval_runtime": 152.4839, |
|
"eval_samples_per_second": 37.092, |
|
"eval_steps_per_second": 4.637, |
|
"eval_wer": 0.322864341769511, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 1.800232288037166, |
|
"eval_loss": 0.4134317636489868, |
|
"eval_runtime": 152.6353, |
|
"eval_samples_per_second": 37.056, |
|
"eval_steps_per_second": 4.632, |
|
"eval_wer": 0.3232334579769222, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 1.8195896244676733, |
|
"eval_loss": 0.4126824736595154, |
|
"eval_runtime": 152.5246, |
|
"eval_samples_per_second": 37.083, |
|
"eval_steps_per_second": 4.635, |
|
"eval_wer": 0.3209064210171559, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 1.8389469608981805, |
|
"grad_norm": 1.0012460947036743, |
|
"learning_rate": 1.6105263157894736e-05, |
|
"loss": 0.3665, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.8389469608981805, |
|
"eval_loss": 0.41083237528800964, |
|
"eval_runtime": 152.9993, |
|
"eval_samples_per_second": 36.967, |
|
"eval_steps_per_second": 4.621, |
|
"eval_wer": 0.32109900338624, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.8583042973286876, |
|
"eval_loss": 0.4090138077735901, |
|
"eval_runtime": 152.5291, |
|
"eval_samples_per_second": 37.081, |
|
"eval_steps_per_second": 4.635, |
|
"eval_wer": 0.3199114121102213, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 1.8776616337591947, |
|
"eval_loss": 0.407578706741333, |
|
"eval_runtime": 153.0711, |
|
"eval_samples_per_second": 36.95, |
|
"eval_steps_per_second": 4.619, |
|
"eval_wer": 0.32087432395564186, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 1.897018970189702, |
|
"eval_loss": 0.40649694204330444, |
|
"eval_runtime": 154.4136, |
|
"eval_samples_per_second": 36.629, |
|
"eval_steps_per_second": 4.579, |
|
"eval_wer": 0.31981512092567926, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 1.916376306620209, |
|
"eval_loss": 0.40620651841163635, |
|
"eval_runtime": 153.7508, |
|
"eval_samples_per_second": 36.787, |
|
"eval_steps_per_second": 4.598, |
|
"eval_wer": 0.31923737381842693, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 1.9357336430507162, |
|
"grad_norm": 0.7244949340820312, |
|
"learning_rate": 3.157894736842105e-07, |
|
"loss": 0.3698, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.9357336430507162, |
|
"eval_loss": 0.4060620963573456, |
|
"eval_runtime": 153.976, |
|
"eval_samples_per_second": 36.733, |
|
"eval_steps_per_second": 4.592, |
|
"eval_wer": 0.31928551941069794, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.9550909794812235, |
|
"eval_loss": 0.45229342579841614, |
|
"eval_runtime": 154.2948, |
|
"eval_samples_per_second": 36.657, |
|
"eval_steps_per_second": 4.582, |
|
"eval_wer": 0.3406140167867632, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 1.9744483159117305, |
|
"eval_loss": 0.4579542577266693, |
|
"eval_runtime": 151.5074, |
|
"eval_samples_per_second": 37.331, |
|
"eval_steps_per_second": 4.666, |
|
"eval_wer": 0.3517837941936416, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 1.9938056523422376, |
|
"eval_loss": 0.46043792366981506, |
|
"eval_runtime": 151.4438, |
|
"eval_samples_per_second": 37.347, |
|
"eval_steps_per_second": 4.668, |
|
"eval_wer": 0.35115790149411824, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 2.013162988772745, |
|
"eval_loss": 0.46549099683761597, |
|
"eval_runtime": 151.5994, |
|
"eval_samples_per_second": 37.309, |
|
"eval_steps_per_second": 4.664, |
|
"eval_wer": 0.3552181797756415, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 2.032520325203252, |
|
"grad_norm": 0.703632652759552, |
|
"learning_rate": 0.0001463076923076923, |
|
"loss": 0.3624, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 2.032520325203252, |
|
"eval_loss": 0.4670031666755676, |
|
"eval_runtime": 151.5063, |
|
"eval_samples_per_second": 37.332, |
|
"eval_steps_per_second": 4.666, |
|
"eval_wer": 0.35144677504774435, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 2.051877661633759, |
|
"eval_loss": 0.459250271320343, |
|
"eval_runtime": 153.0971, |
|
"eval_samples_per_second": 36.944, |
|
"eval_steps_per_second": 4.618, |
|
"eval_wer": 0.3628251833544639, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 2.0712349980642664, |
|
"eval_loss": 0.46061432361602783, |
|
"eval_runtime": 152.0732, |
|
"eval_samples_per_second": 37.193, |
|
"eval_steps_per_second": 4.649, |
|
"eval_wer": 0.3545922870761182, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 2.0905923344947737, |
|
"eval_loss": 0.46500489115715027, |
|
"eval_runtime": 151.985, |
|
"eval_samples_per_second": 37.214, |
|
"eval_steps_per_second": 4.652, |
|
"eval_wer": 0.35905377862656673, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 2.1099496709252805, |
|
"eval_loss": 0.46085453033447266, |
|
"eval_runtime": 152.4835, |
|
"eval_samples_per_second": 37.093, |
|
"eval_steps_per_second": 4.637, |
|
"eval_wer": 0.35483301503747333, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 2.129307007355788, |
|
"grad_norm": 0.5008242726325989, |
|
"learning_rate": 0.00013863076923076922, |
|
"loss": 0.3755, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 2.129307007355788, |
|
"eval_loss": 0.4708138406276703, |
|
"eval_runtime": 152.3457, |
|
"eval_samples_per_second": 37.126, |
|
"eval_steps_per_second": 4.641, |
|
"eval_wer": 0.35573173275986586, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 2.148664343786295, |
|
"eval_loss": 0.4649392366409302, |
|
"eval_runtime": 152.7087, |
|
"eval_samples_per_second": 37.038, |
|
"eval_steps_per_second": 4.63, |
|
"eval_wer": 0.3548009179759593, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 2.168021680216802, |
|
"eval_loss": 0.4624271094799042, |
|
"eval_runtime": 153.153, |
|
"eval_samples_per_second": 36.93, |
|
"eval_steps_per_second": 4.616, |
|
"eval_wer": 0.355956412190464, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 2.1873790166473093, |
|
"eval_loss": 0.45822229981422424, |
|
"eval_runtime": 156.1964, |
|
"eval_samples_per_second": 36.211, |
|
"eval_steps_per_second": 4.526, |
|
"eval_wer": 0.35229734717786587, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 2.2067363530778166, |
|
"eval_loss": 0.466250479221344, |
|
"eval_runtime": 152.6707, |
|
"eval_samples_per_second": 37.047, |
|
"eval_steps_per_second": 4.631, |
|
"eval_wer": 0.3586044197653705, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 2.226093689508324, |
|
"grad_norm": 0.9631055593490601, |
|
"learning_rate": 0.00013093846153846151, |
|
"loss": 0.3891, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 2.226093689508324, |
|
"eval_loss": 0.46153655648231506, |
|
"eval_runtime": 153.1909, |
|
"eval_samples_per_second": 36.921, |
|
"eval_steps_per_second": 4.615, |
|
"eval_wer": 0.3552181797756415, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 2.2454510259388307, |
|
"eval_loss": 0.4631531238555908, |
|
"eval_runtime": 152.9395, |
|
"eval_samples_per_second": 36.982, |
|
"eval_steps_per_second": 4.623, |
|
"eval_wer": 0.35886119625748264, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 2.264808362369338, |
|
"eval_loss": 0.4495234191417694, |
|
"eval_runtime": 153.0237, |
|
"eval_samples_per_second": 36.962, |
|
"eval_steps_per_second": 4.62, |
|
"eval_wer": 0.3425398404776043, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 2.2841656987998453, |
|
"eval_loss": 0.462666779756546, |
|
"eval_runtime": 152.4714, |
|
"eval_samples_per_second": 37.095, |
|
"eval_steps_per_second": 4.637, |
|
"eval_wer": 0.34942466017236123, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 2.303523035230352, |
|
"eval_loss": 0.4550352096557617, |
|
"eval_runtime": 152.8072, |
|
"eval_samples_per_second": 37.014, |
|
"eval_steps_per_second": 4.627, |
|
"eval_wer": 0.3451717995217538, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 2.3228803716608595, |
|
"grad_norm": 0.7961182594299316, |
|
"learning_rate": 0.00012324615384615384, |
|
"loss": 0.3946, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.3228803716608595, |
|
"eval_loss": 0.44988927245140076, |
|
"eval_runtime": 152.9644, |
|
"eval_samples_per_second": 36.976, |
|
"eval_steps_per_second": 4.622, |
|
"eval_wer": 0.3462310025517164, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.3422377080913668, |
|
"eval_loss": 0.4501667320728302, |
|
"eval_runtime": 153.061, |
|
"eval_samples_per_second": 36.953, |
|
"eval_steps_per_second": 4.619, |
|
"eval_wer": 0.341978141901109, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 2.3615950445218736, |
|
"eval_loss": 0.4580215513706207, |
|
"eval_runtime": 153.2108, |
|
"eval_samples_per_second": 36.916, |
|
"eval_steps_per_second": 4.615, |
|
"eval_wer": 0.3412399094862865, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 2.380952380952381, |
|
"eval_loss": 0.4506891667842865, |
|
"eval_runtime": 153.6611, |
|
"eval_samples_per_second": 36.808, |
|
"eval_steps_per_second": 4.601, |
|
"eval_wer": 0.34339041260772574, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 2.4003097173828882, |
|
"eval_loss": 0.44618555903434753, |
|
"eval_runtime": 153.273, |
|
"eval_samples_per_second": 36.901, |
|
"eval_steps_per_second": 4.613, |
|
"eval_wer": 0.34475453772207154, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 2.419667053813395, |
|
"grad_norm": 0.828158974647522, |
|
"learning_rate": 0.00011556923076923076, |
|
"loss": 0.3824, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 2.419667053813395, |
|
"eval_loss": 0.44126543402671814, |
|
"eval_runtime": 153.3979, |
|
"eval_samples_per_second": 36.871, |
|
"eval_steps_per_second": 4.609, |
|
"eval_wer": 0.34127200654780054, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 2.4390243902439024, |
|
"eval_loss": 0.44880929589271545, |
|
"eval_runtime": 153.7143, |
|
"eval_samples_per_second": 36.796, |
|
"eval_steps_per_second": 4.599, |
|
"eval_wer": 0.3443212273916323, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 2.4583817266744097, |
|
"eval_loss": 0.44148463010787964, |
|
"eval_runtime": 153.647, |
|
"eval_samples_per_second": 36.812, |
|
"eval_steps_per_second": 4.601, |
|
"eval_wer": 0.3431657331771276, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 2.4777390631049165, |
|
"eval_loss": 0.44202256202697754, |
|
"eval_runtime": 153.5743, |
|
"eval_samples_per_second": 36.829, |
|
"eval_steps_per_second": 4.604, |
|
"eval_wer": 0.34093498740190337, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 2.497096399535424, |
|
"eval_loss": 0.4379221200942993, |
|
"eval_runtime": 153.5736, |
|
"eval_samples_per_second": 36.829, |
|
"eval_steps_per_second": 4.604, |
|
"eval_wer": 0.3361204281748006, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 2.516453735965931, |
|
"grad_norm": 1.2163615226745605, |
|
"learning_rate": 0.00010787692307692307, |
|
"loss": 0.372, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 2.516453735965931, |
|
"eval_loss": 0.43855318427085876, |
|
"eval_runtime": 153.5476, |
|
"eval_samples_per_second": 36.835, |
|
"eval_steps_per_second": 4.604, |
|
"eval_wer": 0.3334884691306511, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 2.535811072396438, |
|
"eval_loss": 0.44449883699417114, |
|
"eval_runtime": 153.7016, |
|
"eval_samples_per_second": 36.799, |
|
"eval_steps_per_second": 4.6, |
|
"eval_wer": 0.3397794931873987, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 2.5551684088269453, |
|
"eval_loss": 0.4401286542415619, |
|
"eval_runtime": 154.2488, |
|
"eval_samples_per_second": 36.668, |
|
"eval_steps_per_second": 4.584, |
|
"eval_wer": 0.3392819887339314, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 2.5745257452574526, |
|
"eval_loss": 0.437770813703537, |
|
"eval_runtime": 153.8927, |
|
"eval_samples_per_second": 36.753, |
|
"eval_steps_per_second": 4.594, |
|
"eval_wer": 0.335077273675595, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 2.59388308168796, |
|
"eval_loss": 0.4315861463546753, |
|
"eval_runtime": 153.7886, |
|
"eval_samples_per_second": 36.778, |
|
"eval_steps_per_second": 4.597, |
|
"eval_wer": 0.33517356486013705, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 2.6132404181184667, |
|
"grad_norm": 1.084632158279419, |
|
"learning_rate": 0.0001002, |
|
"loss": 0.3521, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 2.6132404181184667, |
|
"eval_loss": 0.43864014744758606, |
|
"eval_runtime": 153.9711, |
|
"eval_samples_per_second": 36.734, |
|
"eval_steps_per_second": 4.592, |
|
"eval_wer": 0.33398597358411836, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 2.632597754548974, |
|
"eval_loss": 0.43551018834114075, |
|
"eval_runtime": 154.3017, |
|
"eval_samples_per_second": 36.655, |
|
"eval_steps_per_second": 4.582, |
|
"eval_wer": 0.33096884980180064, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 2.6519550909794813, |
|
"eval_loss": 0.4325660765171051, |
|
"eval_runtime": 154.4812, |
|
"eval_samples_per_second": 36.613, |
|
"eval_steps_per_second": 4.577, |
|
"eval_wer": 0.3343871868530436, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 2.6713124274099886, |
|
"eval_loss": 0.4263465404510498, |
|
"eval_runtime": 154.0733, |
|
"eval_samples_per_second": 36.71, |
|
"eval_steps_per_second": 4.589, |
|
"eval_wer": 0.32629872735151094, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 2.6906697638404955, |
|
"eval_loss": 0.42636117339134216, |
|
"eval_runtime": 154.1615, |
|
"eval_samples_per_second": 36.689, |
|
"eval_steps_per_second": 4.586, |
|
"eval_wer": 0.32353838006130536, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 2.710027100271003, |
|
"grad_norm": 1.1979655027389526, |
|
"learning_rate": 9.25076923076923e-05, |
|
"loss": 0.3592, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 2.710027100271003, |
|
"eval_loss": 0.4322036802768707, |
|
"eval_runtime": 154.5242, |
|
"eval_samples_per_second": 36.603, |
|
"eval_steps_per_second": 4.575, |
|
"eval_wer": 0.3299738408948661, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 2.72938443670151, |
|
"eval_loss": 0.4294193983078003, |
|
"eval_runtime": 154.4329, |
|
"eval_samples_per_second": 36.624, |
|
"eval_steps_per_second": 4.578, |
|
"eval_wer": 0.3261542905746979, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 2.748741773132017, |
|
"eval_loss": 0.43099814653396606, |
|
"eval_runtime": 154.4209, |
|
"eval_samples_per_second": 36.627, |
|
"eval_steps_per_second": 4.578, |
|
"eval_wer": 0.32329765209995026, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 2.7680991095625243, |
|
"eval_loss": 0.42700281739234924, |
|
"eval_runtime": 155.4008, |
|
"eval_samples_per_second": 36.396, |
|
"eval_steps_per_second": 4.55, |
|
"eval_wer": 0.3268122803357353, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 2.7874564459930316, |
|
"eval_loss": 0.4209098219871521, |
|
"eval_runtime": 156.5271, |
|
"eval_samples_per_second": 36.134, |
|
"eval_steps_per_second": 4.517, |
|
"eval_wer": 0.3254321066906325, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 2.8068137824235384, |
|
"grad_norm": 0.6974443793296814, |
|
"learning_rate": 8.48153846153846e-05, |
|
"loss": 0.3459, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 2.8068137824235384, |
|
"eval_loss": 0.42542555928230286, |
|
"eval_runtime": 157.9392, |
|
"eval_samples_per_second": 35.811, |
|
"eval_steps_per_second": 4.476, |
|
"eval_wer": 0.32729373625844554, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 2.8261711188540457, |
|
"eval_loss": 0.42783817648887634, |
|
"eval_runtime": 155.0217, |
|
"eval_samples_per_second": 36.485, |
|
"eval_steps_per_second": 4.561, |
|
"eval_wer": 0.3231532153231372, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 2.845528455284553, |
|
"eval_loss": 0.4212438464164734, |
|
"eval_runtime": 154.853, |
|
"eval_samples_per_second": 36.525, |
|
"eval_steps_per_second": 4.566, |
|
"eval_wer": 0.3215002166551652, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 2.86488579171506, |
|
"eval_loss": 0.4169256389141083, |
|
"eval_runtime": 154.5142, |
|
"eval_samples_per_second": 36.605, |
|
"eval_steps_per_second": 4.576, |
|
"eval_wer": 0.31928551941069794, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 2.884243128145567, |
|
"eval_loss": 0.42132049798965454, |
|
"eval_runtime": 154.8091, |
|
"eval_samples_per_second": 36.535, |
|
"eval_steps_per_second": 4.567, |
|
"eval_wer": 0.3195262473720531, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 2.9036004645760745, |
|
"grad_norm": 1.099702000617981, |
|
"learning_rate": 7.713846153846152e-05, |
|
"loss": 0.3483, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 2.9036004645760745, |
|
"eval_loss": 0.41696369647979736, |
|
"eval_runtime": 155.3223, |
|
"eval_samples_per_second": 36.415, |
|
"eval_steps_per_second": 4.552, |
|
"eval_wer": 0.31652517212049236, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 2.9229578010065813, |
|
"eval_loss": 0.41230952739715576, |
|
"eval_runtime": 154.9181, |
|
"eval_samples_per_second": 36.51, |
|
"eval_steps_per_second": 4.564, |
|
"eval_wer": 0.31418208662996905, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 2.9423151374370886, |
|
"eval_loss": 0.4116990566253662, |
|
"eval_runtime": 154.97, |
|
"eval_samples_per_second": 36.497, |
|
"eval_steps_per_second": 4.562, |
|
"eval_wer": 0.31337966009211854, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 2.961672473867596, |
|
"eval_loss": 0.410386323928833, |
|
"eval_runtime": 155.0232, |
|
"eval_samples_per_second": 36.485, |
|
"eval_steps_per_second": 4.561, |
|
"eval_wer": 0.31158222464733354, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 2.9810298102981028, |
|
"eval_loss": 0.41244322061538696, |
|
"eval_runtime": 154.4682, |
|
"eval_samples_per_second": 36.616, |
|
"eval_steps_per_second": 4.577, |
|
"eval_wer": 0.31419813516072603, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 3.00038714672861, |
|
"grad_norm": 0.725528359413147, |
|
"learning_rate": 6.946153846153845e-05, |
|
"loss": 0.3501, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 3.00038714672861, |
|
"eval_loss": 0.40684688091278076, |
|
"eval_runtime": 154.5863, |
|
"eval_samples_per_second": 36.588, |
|
"eval_steps_per_second": 4.573, |
|
"eval_wer": 0.31272167033108117, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 3.0197444831591174, |
|
"eval_loss": 0.4200752079486847, |
|
"eval_runtime": 154.5821, |
|
"eval_samples_per_second": 36.589, |
|
"eval_steps_per_second": 4.574, |
|
"eval_wer": 0.3087416347033429, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 3.0391018195896247, |
|
"eval_loss": 0.4186869263648987, |
|
"eval_runtime": 154.7417, |
|
"eval_samples_per_second": 36.551, |
|
"eval_steps_per_second": 4.569, |
|
"eval_wer": 0.3137808733610438, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 3.0584591560201315, |
|
"eval_loss": 0.41133585572242737, |
|
"eval_runtime": 155.21, |
|
"eval_samples_per_second": 36.441, |
|
"eval_steps_per_second": 4.555, |
|
"eval_wer": 0.31092423488629617, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 3.077816492450639, |
|
"eval_loss": 0.4191639721393585, |
|
"eval_runtime": 155.276, |
|
"eval_samples_per_second": 36.425, |
|
"eval_steps_per_second": 4.553, |
|
"eval_wer": 0.30851695527274475, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 3.097173828881146, |
|
"grad_norm": 0.5114701390266418, |
|
"learning_rate": 6.176923076923076e-05, |
|
"loss": 0.2754, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 3.097173828881146, |
|
"eval_loss": 0.4161028265953064, |
|
"eval_runtime": 154.597, |
|
"eval_samples_per_second": 36.585, |
|
"eval_steps_per_second": 4.573, |
|
"eval_wer": 0.30901445972621205, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 3.116531165311653, |
|
"eval_loss": 0.4183988571166992, |
|
"eval_runtime": 155.0124, |
|
"eval_samples_per_second": 36.487, |
|
"eval_steps_per_second": 4.561, |
|
"eval_wer": 0.307152830158399, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 3.1358885017421603, |
|
"eval_loss": 0.4186756908893585, |
|
"eval_runtime": 154.8535, |
|
"eval_samples_per_second": 36.525, |
|
"eval_steps_per_second": 4.566, |
|
"eval_wer": 0.3060936271284364, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 3.1552458381726676, |
|
"eval_loss": 0.4193824827671051, |
|
"eval_runtime": 154.3195, |
|
"eval_samples_per_second": 36.651, |
|
"eval_steps_per_second": 4.581, |
|
"eval_wer": 0.3059652388823803, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 3.1746031746031744, |
|
"eval_loss": 0.40788766741752625, |
|
"eval_runtime": 154.9673, |
|
"eval_samples_per_second": 36.498, |
|
"eval_steps_per_second": 4.562, |
|
"eval_wer": 0.3038949784147261, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 3.1939605110336817, |
|
"grad_norm": 0.5594165325164795, |
|
"learning_rate": 5.4076923076923074e-05, |
|
"loss": 0.2802, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 3.1939605110336817, |
|
"eval_loss": 0.41461309790611267, |
|
"eval_runtime": 154.8662, |
|
"eval_samples_per_second": 36.522, |
|
"eval_steps_per_second": 4.565, |
|
"eval_wer": 0.30424804609138034, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 3.213317847464189, |
|
"eval_loss": 0.4168522357940674, |
|
"eval_runtime": 155.0374, |
|
"eval_samples_per_second": 36.482, |
|
"eval_steps_per_second": 4.56, |
|
"eval_wer": 0.30116672818603457, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 3.2326751838946963, |
|
"eval_loss": 0.40926745533943176, |
|
"eval_runtime": 154.7423, |
|
"eval_samples_per_second": 36.551, |
|
"eval_steps_per_second": 4.569, |
|
"eval_wer": 0.3023864165235673, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 3.252032520325203, |
|
"eval_loss": 0.4115259051322937, |
|
"eval_runtime": 154.7933, |
|
"eval_samples_per_second": 36.539, |
|
"eval_steps_per_second": 4.567, |
|
"eval_wer": 0.3005408354865112, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 3.2713898567557105, |
|
"eval_loss": 0.40197211503982544, |
|
"eval_runtime": 155.5964, |
|
"eval_samples_per_second": 36.35, |
|
"eval_steps_per_second": 4.544, |
|
"eval_wer": 0.30410360931456726, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 3.290747193186218, |
|
"grad_norm": 1.4730154275894165, |
|
"learning_rate": 4.6384615384615385e-05, |
|
"loss": 0.2723, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 3.290747193186218, |
|
"eval_loss": 0.4058869779109955, |
|
"eval_runtime": 155.0898, |
|
"eval_samples_per_second": 36.469, |
|
"eval_steps_per_second": 4.559, |
|
"eval_wer": 0.30442457992970745, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 3.3101045296167246, |
|
"eval_loss": 0.40676185488700867, |
|
"eval_runtime": 155.0576, |
|
"eval_samples_per_second": 36.477, |
|
"eval_steps_per_second": 4.56, |
|
"eval_wer": 0.3013753590858757, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 3.329461866047232, |
|
"eval_loss": 0.40653425455093384, |
|
"eval_runtime": 155.8377, |
|
"eval_samples_per_second": 36.294, |
|
"eval_steps_per_second": 4.537, |
|
"eval_wer": 0.30878978029561394, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 3.3488192024777392, |
|
"eval_loss": 0.4082197844982147, |
|
"eval_runtime": 155.7924, |
|
"eval_samples_per_second": 36.305, |
|
"eval_steps_per_second": 4.538, |
|
"eval_wer": 0.3010543884707355, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 3.368176538908246, |
|
"eval_loss": 0.4083554446697235, |
|
"eval_runtime": 155.6775, |
|
"eval_samples_per_second": 36.332, |
|
"eval_steps_per_second": 4.541, |
|
"eval_wer": 0.3007494663863523, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 3.3875338753387534, |
|
"grad_norm": 0.5211097598075867, |
|
"learning_rate": 3.87076923076923e-05, |
|
"loss": 0.2557, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 3.3875338753387534, |
|
"eval_loss": 0.4009736180305481, |
|
"eval_runtime": 155.105, |
|
"eval_samples_per_second": 36.466, |
|
"eval_steps_per_second": 4.558, |
|
"eval_wer": 0.29924090449519347, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 3.4068912117692607, |
|
"eval_loss": 0.4061805009841919, |
|
"eval_runtime": 154.8792, |
|
"eval_samples_per_second": 36.519, |
|
"eval_steps_per_second": 4.565, |
|
"eval_wer": 0.2999951854407729, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 3.4262485481997675, |
|
"eval_loss": 0.40264037251472473, |
|
"eval_runtime": 155.9957, |
|
"eval_samples_per_second": 36.257, |
|
"eval_steps_per_second": 4.532, |
|
"eval_wer": 0.2980533132191748, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 3.445605884630275, |
|
"eval_loss": 0.40035372972488403, |
|
"eval_runtime": 155.0928, |
|
"eval_samples_per_second": 36.468, |
|
"eval_steps_per_second": 4.559, |
|
"eval_wer": 0.29893598241081026, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 3.464963221060782, |
|
"eval_loss": 0.40443336963653564, |
|
"eval_runtime": 154.9305, |
|
"eval_samples_per_second": 36.507, |
|
"eval_steps_per_second": 4.563, |
|
"eval_wer": 0.29906437065686636, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 3.484320557491289, |
|
"grad_norm": 0.7458967566490173, |
|
"learning_rate": 3.101538461538461e-05, |
|
"loss": 0.2578, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 3.484320557491289, |
|
"eval_loss": 0.4003549814224243, |
|
"eval_runtime": 155.7394, |
|
"eval_samples_per_second": 36.317, |
|
"eval_steps_per_second": 4.54, |
|
"eval_wer": 0.29660894545104394, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 3.5036778939217963, |
|
"eval_loss": 0.40592488646507263, |
|
"eval_runtime": 159.2644, |
|
"eval_samples_per_second": 35.513, |
|
"eval_steps_per_second": 4.439, |
|
"eval_wer": 0.29449053939111874, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 3.5230352303523036, |
|
"eval_loss": 0.4014962613582611, |
|
"eval_runtime": 155.6654, |
|
"eval_samples_per_second": 36.334, |
|
"eval_steps_per_second": 4.542, |
|
"eval_wer": 0.29632007189741777, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 3.5423925667828104, |
|
"eval_loss": 0.396659791469574, |
|
"eval_runtime": 156.1536, |
|
"eval_samples_per_second": 36.221, |
|
"eval_steps_per_second": 4.528, |
|
"eval_wer": 0.29585466450546455, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 3.5617499032133177, |
|
"eval_loss": 0.4001907706260681, |
|
"eval_runtime": 155.7578, |
|
"eval_samples_per_second": 36.313, |
|
"eval_steps_per_second": 4.539, |
|
"eval_wer": 0.29412142318370754, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 3.581107239643825, |
|
"grad_norm": 0.6122294664382935, |
|
"learning_rate": 2.3338461538461535e-05, |
|
"loss": 0.2508, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 3.581107239643825, |
|
"eval_loss": 0.39826107025146484, |
|
"eval_runtime": 155.467, |
|
"eval_samples_per_second": 36.381, |
|
"eval_steps_per_second": 4.548, |
|
"eval_wer": 0.2945547335141468, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 3.600464576074332, |
|
"eval_loss": 0.3958674967288971, |
|
"eval_runtime": 155.8242, |
|
"eval_samples_per_second": 36.297, |
|
"eval_steps_per_second": 4.537, |
|
"eval_wer": 0.29365601579175427, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 3.619821912504839, |
|
"eval_loss": 0.3970955014228821, |
|
"eval_runtime": 155.4329, |
|
"eval_samples_per_second": 36.389, |
|
"eval_steps_per_second": 4.549, |
|
"eval_wer": 0.2942016658374926, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 3.6391792489353465, |
|
"eval_loss": 0.3906669616699219, |
|
"eval_runtime": 155.4929, |
|
"eval_samples_per_second": 36.375, |
|
"eval_steps_per_second": 4.547, |
|
"eval_wer": 0.2923239877389225, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 3.658536585365854, |
|
"eval_loss": 0.39506247639656067, |
|
"eval_runtime": 155.5246, |
|
"eval_samples_per_second": 36.367, |
|
"eval_steps_per_second": 4.546, |
|
"eval_wer": 0.2903981640480814, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 3.6778939217963607, |
|
"grad_norm": 0.33715635538101196, |
|
"learning_rate": 1.5646153846153846e-05, |
|
"loss": 0.2659, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 3.6778939217963607, |
|
"eval_loss": 0.3892674744129181, |
|
"eval_runtime": 155.5533, |
|
"eval_samples_per_second": 36.361, |
|
"eval_steps_per_second": 4.545, |
|
"eval_wer": 0.29309431721525897, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 3.697251258226868, |
|
"eval_loss": 0.39077267050743103, |
|
"eval_runtime": 155.448, |
|
"eval_samples_per_second": 36.385, |
|
"eval_steps_per_second": 4.548, |
|
"eval_wer": 0.2900771934329412, |
|
"step": 19100 |
|
}, |
|
{ |
|
"epoch": 3.7166085946573753, |
|
"eval_loss": 0.39407432079315186, |
|
"eval_runtime": 155.4696, |
|
"eval_samples_per_second": 36.38, |
|
"eval_steps_per_second": 4.548, |
|
"eval_wer": 0.2884241947649693, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 3.7359659310878826, |
|
"eval_loss": 0.3924821615219116, |
|
"eval_runtime": 155.4791, |
|
"eval_samples_per_second": 36.378, |
|
"eval_steps_per_second": 4.547, |
|
"eval_wer": 0.2890019418722216, |
|
"step": 19300 |
|
}, |
|
{ |
|
"epoch": 3.7553232675183894, |
|
"eval_loss": 0.3916691243648529, |
|
"eval_runtime": 155.9516, |
|
"eval_samples_per_second": 36.268, |
|
"eval_steps_per_second": 4.533, |
|
"eval_wer": 0.2892908154258478, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 3.7746806039488967, |
|
"grad_norm": 0.4647356867790222, |
|
"learning_rate": 7.953846153846153e-06, |
|
"loss": 0.2488, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 3.7746806039488967, |
|
"eval_loss": 0.39043277502059937, |
|
"eval_runtime": 155.3552, |
|
"eval_samples_per_second": 36.407, |
|
"eval_steps_per_second": 4.551, |
|
"eval_wer": 0.2884562918264833, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 3.794037940379404, |
|
"eval_loss": 0.39014604687690735, |
|
"eval_runtime": 155.2137, |
|
"eval_samples_per_second": 36.44, |
|
"eval_steps_per_second": 4.555, |
|
"eval_wer": 0.2887933109723805, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 3.813395276809911, |
|
"eval_loss": 0.3883425295352936, |
|
"eval_runtime": 155.5369, |
|
"eval_samples_per_second": 36.364, |
|
"eval_steps_per_second": 4.546, |
|
"eval_wer": 0.28922662130281973, |
|
"step": 19700 |
|
}, |
|
{ |
|
"epoch": 3.832752613240418, |
|
"eval_loss": 0.38913780450820923, |
|
"eval_runtime": 155.8958, |
|
"eval_samples_per_second": 36.281, |
|
"eval_steps_per_second": 4.535, |
|
"eval_wer": 0.28903403893373564, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 3.8521099496709255, |
|
"eval_loss": 0.3888201415538788, |
|
"eval_runtime": 155.6372, |
|
"eval_samples_per_second": 36.341, |
|
"eval_steps_per_second": 4.543, |
|
"eval_wer": 0.2888254080338945, |
|
"step": 19900 |
|
}, |
|
{ |
|
"epoch": 3.8714672861014323, |
|
"grad_norm": 0.3741956055164337, |
|
"learning_rate": 2.615384615384615e-07, |
|
"loss": 0.2602, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 3.8714672861014323, |
|
"eval_loss": 0.38884833455085754, |
|
"eval_runtime": 155.0772, |
|
"eval_samples_per_second": 36.472, |
|
"eval_steps_per_second": 4.559, |
|
"eval_wer": 0.2884883888879973, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 3.8714672861014323, |
|
"step": 20000, |
|
"total_flos": 2.249387574100498e+19, |
|
"train_loss": 0.15996522521972656, |
|
"train_runtime": 19346.8732, |
|
"train_samples_per_second": 8.27, |
|
"train_steps_per_second": 1.034 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 20000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 400, |
|
"total_flos": 2.249387574100498e+19, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|