|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.9357336430507162, |
|
"eval_steps": 100, |
|
"global_step": 10000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.019357336430507164, |
|
"eval_loss": 3.547485828399658, |
|
"eval_runtime": 144.8623, |
|
"eval_samples_per_second": 39.044, |
|
"eval_steps_per_second": 4.88, |
|
"eval_wer": 1.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03871467286101433, |
|
"eval_loss": 3.0259251594543457, |
|
"eval_runtime": 142.9174, |
|
"eval_samples_per_second": 39.575, |
|
"eval_steps_per_second": 4.947, |
|
"eval_wer": 1.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05807200929152149, |
|
"eval_loss": 3.0886833667755127, |
|
"eval_runtime": 141.7177, |
|
"eval_samples_per_second": 39.91, |
|
"eval_steps_per_second": 4.989, |
|
"eval_wer": 1.0, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.07742934572202866, |
|
"eval_loss": 2.3821566104888916, |
|
"eval_runtime": 143.4279, |
|
"eval_samples_per_second": 39.434, |
|
"eval_steps_per_second": 4.929, |
|
"eval_wer": 0.9971915071175234, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.09678668215253582, |
|
"grad_norm": 3.033390760421753, |
|
"learning_rate": 0.0002982, |
|
"loss": 4.0938, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.09678668215253582, |
|
"eval_loss": 1.4546788930892944, |
|
"eval_runtime": 142.7727, |
|
"eval_samples_per_second": 39.615, |
|
"eval_steps_per_second": 4.952, |
|
"eval_wer": 0.9020076711977019, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.11614401858304298, |
|
"eval_loss": 1.2602813243865967, |
|
"eval_runtime": 143.0991, |
|
"eval_samples_per_second": 39.525, |
|
"eval_steps_per_second": 4.941, |
|
"eval_wer": 0.8509733433904126, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.13550135501355012, |
|
"eval_loss": 1.0939536094665527, |
|
"eval_runtime": 145.4158, |
|
"eval_samples_per_second": 38.895, |
|
"eval_steps_per_second": 4.862, |
|
"eval_wer": 0.7654667715170677, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.1548586914440573, |
|
"eval_loss": 1.0704576969146729, |
|
"eval_runtime": 148.1277, |
|
"eval_samples_per_second": 38.183, |
|
"eval_steps_per_second": 4.773, |
|
"eval_wer": 0.7601547078364975, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.17421602787456447, |
|
"eval_loss": 0.9356458187103271, |
|
"eval_runtime": 143.6296, |
|
"eval_samples_per_second": 39.379, |
|
"eval_steps_per_second": 4.922, |
|
"eval_wer": 0.6972926128612925, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.19357336430507163, |
|
"grad_norm": 3.2989861965179443, |
|
"learning_rate": 0.0002843684210526315, |
|
"loss": 1.0597, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.19357336430507163, |
|
"eval_loss": 0.9103516936302185, |
|
"eval_runtime": 146.7237, |
|
"eval_samples_per_second": 38.549, |
|
"eval_steps_per_second": 4.819, |
|
"eval_wer": 0.6765579111232367, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.2129307007355788, |
|
"eval_loss": 0.8879104256629944, |
|
"eval_runtime": 153.6385, |
|
"eval_samples_per_second": 36.814, |
|
"eval_steps_per_second": 4.602, |
|
"eval_wer": 0.6569947521304424, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.23228803716608595, |
|
"eval_loss": 0.8594633936882019, |
|
"eval_runtime": 147.9212, |
|
"eval_samples_per_second": 38.237, |
|
"eval_steps_per_second": 4.78, |
|
"eval_wer": 0.6611834186580219, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.2516453735965931, |
|
"eval_loss": 0.8351845145225525, |
|
"eval_runtime": 148.1861, |
|
"eval_samples_per_second": 38.168, |
|
"eval_steps_per_second": 4.771, |
|
"eval_wer": 0.6075331803373402, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.27100271002710025, |
|
"eval_loss": 0.791232168674469, |
|
"eval_runtime": 148.9749, |
|
"eval_samples_per_second": 37.966, |
|
"eval_steps_per_second": 4.746, |
|
"eval_wer": 0.6033124167482466, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.29036004645760743, |
|
"grad_norm": 6.474522113800049, |
|
"learning_rate": 0.000268578947368421, |
|
"loss": 0.8484, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.29036004645760743, |
|
"eval_loss": 0.7862286567687988, |
|
"eval_runtime": 146.7521, |
|
"eval_samples_per_second": 38.541, |
|
"eval_steps_per_second": 4.818, |
|
"eval_wer": 0.6067468023302467, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.3097173828881146, |
|
"eval_loss": 0.7790109515190125, |
|
"eval_runtime": 147.7062, |
|
"eval_samples_per_second": 38.292, |
|
"eval_steps_per_second": 4.787, |
|
"eval_wer": 0.6009051371346953, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.32907471931862176, |
|
"eval_loss": 0.7678210735321045, |
|
"eval_runtime": 148.3951, |
|
"eval_samples_per_second": 38.114, |
|
"eval_steps_per_second": 4.764, |
|
"eval_wer": 0.5629182648328546, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.34843205574912894, |
|
"eval_loss": 0.7514644861221313, |
|
"eval_runtime": 149.2674, |
|
"eval_samples_per_second": 37.892, |
|
"eval_steps_per_second": 4.736, |
|
"eval_wer": 0.5798655133122562, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.3677893921796361, |
|
"eval_loss": 0.7423551678657532, |
|
"eval_runtime": 149.3427, |
|
"eval_samples_per_second": 37.873, |
|
"eval_steps_per_second": 4.734, |
|
"eval_wer": 0.5859158094076488, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.38714672861014326, |
|
"grad_norm": 2.573913335800171, |
|
"learning_rate": 0.0002527894736842105, |
|
"loss": 0.764, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.38714672861014326, |
|
"eval_loss": 0.7129915356636047, |
|
"eval_runtime": 148.3711, |
|
"eval_samples_per_second": 38.121, |
|
"eval_steps_per_second": 4.765, |
|
"eval_wer": 0.5520855065718734, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.4065040650406504, |
|
"eval_loss": 0.7114368677139282, |
|
"eval_runtime": 148.2007, |
|
"eval_samples_per_second": 38.164, |
|
"eval_steps_per_second": 4.771, |
|
"eval_wer": 0.5407712923881819, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.4258614014711576, |
|
"eval_loss": 0.7228682637214661, |
|
"eval_runtime": 149.1432, |
|
"eval_samples_per_second": 37.923, |
|
"eval_steps_per_second": 4.74, |
|
"eval_wer": 0.5577024923368266, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.4452187379016647, |
|
"eval_loss": 0.6773180961608887, |
|
"eval_runtime": 154.221, |
|
"eval_samples_per_second": 36.675, |
|
"eval_steps_per_second": 4.584, |
|
"eval_wer": 0.5160084094301167, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.4645760743321719, |
|
"eval_loss": 0.6784498691558838, |
|
"eval_runtime": 149.2744, |
|
"eval_samples_per_second": 37.89, |
|
"eval_steps_per_second": 4.736, |
|
"eval_wer": 0.5177897963441447, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.48393341076267904, |
|
"grad_norm": 3.0869553089141846, |
|
"learning_rate": 0.000237, |
|
"loss": 0.6868, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.48393341076267904, |
|
"eval_loss": 0.672030508518219, |
|
"eval_runtime": 149.4453, |
|
"eval_samples_per_second": 37.847, |
|
"eval_steps_per_second": 4.731, |
|
"eval_wer": 0.5261831779300605, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.5032907471931862, |
|
"eval_loss": 0.6804332137107849, |
|
"eval_runtime": 151.0327, |
|
"eval_samples_per_second": 37.449, |
|
"eval_steps_per_second": 4.681, |
|
"eval_wer": 0.5336617932628268, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.5226480836236934, |
|
"eval_loss": 0.6598911285400391, |
|
"eval_runtime": 149.0299, |
|
"eval_samples_per_second": 37.952, |
|
"eval_steps_per_second": 4.744, |
|
"eval_wer": 0.5023832068174159, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.5420054200542005, |
|
"eval_loss": 0.6287100911140442, |
|
"eval_runtime": 149.9845, |
|
"eval_samples_per_second": 37.711, |
|
"eval_steps_per_second": 4.714, |
|
"eval_wer": 0.4902023719728459, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.5613627564847077, |
|
"eval_loss": 0.6304338574409485, |
|
"eval_runtime": 150.016, |
|
"eval_samples_per_second": 37.703, |
|
"eval_steps_per_second": 4.713, |
|
"eval_wer": 0.49471200911556545, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.5807200929152149, |
|
"grad_norm": 5.678714275360107, |
|
"learning_rate": 0.00022121052631578946, |
|
"loss": 0.6761, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.5807200929152149, |
|
"eval_loss": 0.6258472204208374, |
|
"eval_runtime": 149.8088, |
|
"eval_samples_per_second": 37.755, |
|
"eval_steps_per_second": 4.719, |
|
"eval_wer": 0.48513103625363097, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.6000774293457221, |
|
"eval_loss": 0.6310975551605225, |
|
"eval_runtime": 148.9286, |
|
"eval_samples_per_second": 37.978, |
|
"eval_steps_per_second": 4.747, |
|
"eval_wer": 0.4989809182969299, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.6194347657762292, |
|
"eval_loss": 0.6171565651893616, |
|
"eval_runtime": 148.6924, |
|
"eval_samples_per_second": 38.038, |
|
"eval_steps_per_second": 4.755, |
|
"eval_wer": 0.4901060807883038, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.6387921022067363, |
|
"eval_loss": 0.6187321543693542, |
|
"eval_runtime": 149.7679, |
|
"eval_samples_per_second": 37.765, |
|
"eval_steps_per_second": 4.721, |
|
"eval_wer": 0.46661103176004237, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.6581494386372435, |
|
"eval_loss": 0.6044796109199524, |
|
"eval_runtime": 149.5983, |
|
"eval_samples_per_second": 37.808, |
|
"eval_steps_per_second": 4.726, |
|
"eval_wer": 0.4725489881401358, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.6775067750677507, |
|
"grad_norm": 4.122500419616699, |
|
"learning_rate": 0.00020542105263157893, |
|
"loss": 0.6462, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.6775067750677507, |
|
"eval_loss": 0.5950499773025513, |
|
"eval_runtime": 148.2511, |
|
"eval_samples_per_second": 38.151, |
|
"eval_steps_per_second": 4.769, |
|
"eval_wer": 0.4716823674792573, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.6968641114982579, |
|
"eval_loss": 0.5902624726295471, |
|
"eval_runtime": 149.3094, |
|
"eval_samples_per_second": 37.881, |
|
"eval_steps_per_second": 4.735, |
|
"eval_wer": 0.4602237165187527, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.716221447928765, |
|
"eval_loss": 0.5864866375923157, |
|
"eval_runtime": 149.6434, |
|
"eval_samples_per_second": 37.797, |
|
"eval_steps_per_second": 4.725, |
|
"eval_wer": 0.47267737638619184, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.7355787843592722, |
|
"eval_loss": 0.5820363759994507, |
|
"eval_runtime": 148.886, |
|
"eval_samples_per_second": 37.989, |
|
"eval_steps_per_second": 4.749, |
|
"eval_wer": 0.459036125242734, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.7549361207897793, |
|
"eval_loss": 0.6025602221488953, |
|
"eval_runtime": 148.9627, |
|
"eval_samples_per_second": 37.969, |
|
"eval_steps_per_second": 4.746, |
|
"eval_wer": 0.48296448460143476, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.7742934572202865, |
|
"grad_norm": 5.146019458770752, |
|
"learning_rate": 0.0001896315789473684, |
|
"loss": 0.6193, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.7742934572202865, |
|
"eval_loss": 0.5807139277458191, |
|
"eval_runtime": 147.9966, |
|
"eval_samples_per_second": 38.217, |
|
"eval_steps_per_second": 4.777, |
|
"eval_wer": 0.44963168621912664, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.7936507936507936, |
|
"eval_loss": 0.5620962977409363, |
|
"eval_runtime": 148.8391, |
|
"eval_samples_per_second": 38.001, |
|
"eval_steps_per_second": 4.75, |
|
"eval_wer": 0.44857248318916404, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.8130081300813008, |
|
"eval_loss": 0.5730157494544983, |
|
"eval_runtime": 148.9808, |
|
"eval_samples_per_second": 37.965, |
|
"eval_steps_per_second": 4.746, |
|
"eval_wer": 0.4593410473271172, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.832365466511808, |
|
"eval_loss": 0.5592055916786194, |
|
"eval_runtime": 147.8897, |
|
"eval_samples_per_second": 38.245, |
|
"eval_steps_per_second": 4.781, |
|
"eval_wer": 0.43741875431304267, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.8517228029423152, |
|
"eval_loss": 0.5621338486671448, |
|
"eval_runtime": 148.7799, |
|
"eval_samples_per_second": 38.016, |
|
"eval_steps_per_second": 4.752, |
|
"eval_wer": 0.42387379435412686, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.8710801393728222, |
|
"grad_norm": 2.8218295574188232, |
|
"learning_rate": 0.0001738421052631579, |
|
"loss": 0.59, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.8710801393728222, |
|
"eval_loss": 0.545798659324646, |
|
"eval_runtime": 150.2397, |
|
"eval_samples_per_second": 37.647, |
|
"eval_steps_per_second": 4.706, |
|
"eval_wer": 0.4304055463722296, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.8904374758033294, |
|
"eval_loss": 0.5406409502029419, |
|
"eval_runtime": 148.931, |
|
"eval_samples_per_second": 37.977, |
|
"eval_steps_per_second": 4.747, |
|
"eval_wer": 0.4270674519747717, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.9097948122338366, |
|
"eval_loss": 0.5268651247024536, |
|
"eval_runtime": 148.7725, |
|
"eval_samples_per_second": 38.018, |
|
"eval_steps_per_second": 4.752, |
|
"eval_wer": 0.41315337580844474, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.9291521486643438, |
|
"eval_loss": 0.5362106561660767, |
|
"eval_runtime": 147.8165, |
|
"eval_samples_per_second": 38.264, |
|
"eval_steps_per_second": 4.783, |
|
"eval_wer": 0.4214665147405755, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.948509485094851, |
|
"eval_loss": 0.5226009488105774, |
|
"eval_runtime": 149.4387, |
|
"eval_samples_per_second": 37.848, |
|
"eval_steps_per_second": 4.731, |
|
"eval_wer": 0.41626679077530454, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.9678668215253581, |
|
"grad_norm": 7.241621017456055, |
|
"learning_rate": 0.00015808421052631577, |
|
"loss": 0.5636, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.9678668215253581, |
|
"eval_loss": 0.5297274589538574, |
|
"eval_runtime": 149.3726, |
|
"eval_samples_per_second": 37.865, |
|
"eval_steps_per_second": 4.733, |
|
"eval_wer": 0.4148384715379307, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.9872241579558653, |
|
"eval_loss": 0.5225785970687866, |
|
"eval_runtime": 149.3039, |
|
"eval_samples_per_second": 37.882, |
|
"eval_steps_per_second": 4.735, |
|
"eval_wer": 0.413634831731155, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 1.0065814943863725, |
|
"eval_loss": 0.5239331722259521, |
|
"eval_runtime": 149.3008, |
|
"eval_samples_per_second": 37.883, |
|
"eval_steps_per_second": 4.735, |
|
"eval_wer": 0.4054179839835663, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 1.0259388308168795, |
|
"eval_loss": 0.5382751226425171, |
|
"eval_runtime": 148.0837, |
|
"eval_samples_per_second": 38.195, |
|
"eval_steps_per_second": 4.774, |
|
"eval_wer": 0.4057871001909775, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 1.0452961672473868, |
|
"eval_loss": 0.5125272274017334, |
|
"eval_runtime": 149.1205, |
|
"eval_samples_per_second": 37.929, |
|
"eval_steps_per_second": 4.741, |
|
"eval_wer": 0.4067179149748841, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 1.064653503677894, |
|
"grad_norm": 1.124423623085022, |
|
"learning_rate": 0.00014232631578947366, |
|
"loss": 0.4924, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.064653503677894, |
|
"eval_loss": 0.5029215812683105, |
|
"eval_runtime": 147.7988, |
|
"eval_samples_per_second": 38.268, |
|
"eval_steps_per_second": 4.784, |
|
"eval_wer": 0.39533950666816453, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.084010840108401, |
|
"eval_loss": 0.505442202091217, |
|
"eval_runtime": 149.3815, |
|
"eval_samples_per_second": 37.863, |
|
"eval_steps_per_second": 4.733, |
|
"eval_wer": 0.3932050520774823, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 1.1033681765389083, |
|
"eval_loss": 0.4968744218349457, |
|
"eval_runtime": 149.7956, |
|
"eval_samples_per_second": 37.758, |
|
"eval_steps_per_second": 4.72, |
|
"eval_wer": 0.3894015502880711, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 1.1227255129694154, |
|
"eval_loss": 0.49354633688926697, |
|
"eval_runtime": 148.4196, |
|
"eval_samples_per_second": 38.108, |
|
"eval_steps_per_second": 4.764, |
|
"eval_wer": 0.38508449551443563, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 1.1420828493999227, |
|
"eval_loss": 0.49766939878463745, |
|
"eval_runtime": 149.5302, |
|
"eval_samples_per_second": 37.825, |
|
"eval_steps_per_second": 4.728, |
|
"eval_wer": 0.3816501099324357, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 1.1614401858304297, |
|
"grad_norm": 1.9677255153656006, |
|
"learning_rate": 0.00012653684210526316, |
|
"loss": 0.4602, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.1614401858304297, |
|
"eval_loss": 0.4862758219242096, |
|
"eval_runtime": 150.9135, |
|
"eval_samples_per_second": 37.478, |
|
"eval_steps_per_second": 4.685, |
|
"eval_wer": 0.387395483943445, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.1807975222609368, |
|
"eval_loss": 0.4906172454357147, |
|
"eval_runtime": 148.5353, |
|
"eval_samples_per_second": 38.078, |
|
"eval_steps_per_second": 4.76, |
|
"eval_wer": 0.3776700743046974, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 1.2001548586914441, |
|
"eval_loss": 0.4891129434108734, |
|
"eval_runtime": 149.6289, |
|
"eval_samples_per_second": 37.8, |
|
"eval_steps_per_second": 4.725, |
|
"eval_wer": 0.3763861918441367, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 1.2195121951219512, |
|
"eval_loss": 0.488125741481781, |
|
"eval_runtime": 148.288, |
|
"eval_samples_per_second": 38.142, |
|
"eval_steps_per_second": 4.768, |
|
"eval_wer": 0.3800934024490058, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 1.2388695315524583, |
|
"eval_loss": 0.48135778307914734, |
|
"eval_runtime": 147.6048, |
|
"eval_samples_per_second": 38.319, |
|
"eval_steps_per_second": 4.79, |
|
"eval_wer": 0.37266293270851053, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 1.2582268679829656, |
|
"grad_norm": 1.2907174825668335, |
|
"learning_rate": 0.00011074736842105263, |
|
"loss": 0.4407, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.2582268679829656, |
|
"eval_loss": 0.47142112255096436, |
|
"eval_runtime": 147.9064, |
|
"eval_samples_per_second": 38.24, |
|
"eval_steps_per_second": 4.78, |
|
"eval_wer": 0.37723676397425815, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.2775842044134726, |
|
"eval_loss": 0.47389352321624756, |
|
"eval_runtime": 146.8545, |
|
"eval_samples_per_second": 38.514, |
|
"eval_steps_per_second": 4.814, |
|
"eval_wer": 0.3705605751793423, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 1.29694154084398, |
|
"eval_loss": 0.4691925644874573, |
|
"eval_runtime": 146.6568, |
|
"eval_samples_per_second": 38.566, |
|
"eval_steps_per_second": 4.821, |
|
"eval_wer": 0.3713790502479498, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 1.316298877274487, |
|
"eval_loss": 0.4672953486442566, |
|
"eval_runtime": 146.8165, |
|
"eval_samples_per_second": 38.524, |
|
"eval_steps_per_second": 4.816, |
|
"eval_wer": 0.3728073694853236, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 1.3356562137049943, |
|
"eval_loss": 0.46098417043685913, |
|
"eval_runtime": 147.3051, |
|
"eval_samples_per_second": 38.397, |
|
"eval_steps_per_second": 4.8, |
|
"eval_wer": 0.36780022788913674, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 1.3550135501355014, |
|
"grad_norm": 0.8472552299499512, |
|
"learning_rate": 9.49578947368421e-05, |
|
"loss": 0.4284, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.3550135501355014, |
|
"eval_loss": 0.47299668192863464, |
|
"eval_runtime": 151.323, |
|
"eval_samples_per_second": 37.377, |
|
"eval_steps_per_second": 4.672, |
|
"eval_wer": 0.36531270562180035, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.3743708865660085, |
|
"eval_loss": 0.46056076884269714, |
|
"eval_runtime": 146.0139, |
|
"eval_samples_per_second": 38.736, |
|
"eval_steps_per_second": 4.842, |
|
"eval_wer": 0.36399672609972555, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 1.3937282229965158, |
|
"eval_loss": 0.4571812152862549, |
|
"eval_runtime": 146.7792, |
|
"eval_samples_per_second": 38.534, |
|
"eval_steps_per_second": 4.817, |
|
"eval_wer": 0.3620067082858564, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 1.4130855594270229, |
|
"eval_loss": 0.45746785402297974, |
|
"eval_runtime": 146.8097, |
|
"eval_samples_per_second": 38.526, |
|
"eval_steps_per_second": 4.816, |
|
"eval_wer": 0.362969620131277, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 1.43244289585753, |
|
"eval_loss": 0.45778077840805054, |
|
"eval_runtime": 146.9433, |
|
"eval_samples_per_second": 38.491, |
|
"eval_steps_per_second": 4.811, |
|
"eval_wer": 0.3590216815650527, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 1.4518002322880372, |
|
"grad_norm": 0.9635696411132812, |
|
"learning_rate": 7.916842105263156e-05, |
|
"loss": 0.4299, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.4518002322880372, |
|
"eval_loss": 0.4477390646934509, |
|
"eval_runtime": 146.7454, |
|
"eval_samples_per_second": 38.543, |
|
"eval_steps_per_second": 4.818, |
|
"eval_wer": 0.3569193240358845, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.4711575687185443, |
|
"eval_loss": 0.4441732168197632, |
|
"eval_runtime": 147.4263, |
|
"eval_samples_per_second": 38.365, |
|
"eval_steps_per_second": 4.796, |
|
"eval_wer": 0.3551700341833705, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 1.4905149051490514, |
|
"eval_loss": 0.4420062303543091, |
|
"eval_runtime": 146.725, |
|
"eval_samples_per_second": 38.548, |
|
"eval_steps_per_second": 4.819, |
|
"eval_wer": 0.3546083356068752, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 1.5098722415795587, |
|
"eval_loss": 0.4436999559402466, |
|
"eval_runtime": 145.7818, |
|
"eval_samples_per_second": 38.798, |
|
"eval_steps_per_second": 4.85, |
|
"eval_wer": 0.3482531174270995, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 1.5292295780100658, |
|
"eval_loss": 0.43728071451187134, |
|
"eval_runtime": 146.721, |
|
"eval_samples_per_second": 38.549, |
|
"eval_steps_per_second": 4.819, |
|
"eval_wer": 0.3485740880422397, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 1.5485869144405728, |
|
"grad_norm": 1.1358890533447266, |
|
"learning_rate": 6.341052631578946e-05, |
|
"loss": 0.408, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.5485869144405728, |
|
"eval_loss": 0.4335756301879883, |
|
"eval_runtime": 146.7599, |
|
"eval_samples_per_second": 38.539, |
|
"eval_steps_per_second": 4.817, |
|
"eval_wer": 0.3464075363900435, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.5679442508710801, |
|
"eval_loss": 0.4347936511039734, |
|
"eval_runtime": 146.7423, |
|
"eval_samples_per_second": 38.544, |
|
"eval_steps_per_second": 4.818, |
|
"eval_wer": 0.34475453772207154, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 1.5873015873015874, |
|
"eval_loss": 0.42762240767478943, |
|
"eval_runtime": 151.2432, |
|
"eval_samples_per_second": 37.397, |
|
"eval_steps_per_second": 4.675, |
|
"eval_wer": 0.34180160806278187, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 1.6066589237320945, |
|
"eval_loss": 0.42939648032188416, |
|
"eval_runtime": 146.0228, |
|
"eval_samples_per_second": 38.734, |
|
"eval_steps_per_second": 4.842, |
|
"eval_wer": 0.3399078814334548, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 1.6260162601626016, |
|
"eval_loss": 0.42716294527053833, |
|
"eval_runtime": 145.901, |
|
"eval_samples_per_second": 38.766, |
|
"eval_steps_per_second": 4.846, |
|
"eval_wer": 0.3387523872189501, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 1.645373596593109, |
|
"grad_norm": 1.037522792816162, |
|
"learning_rate": 4.762105263157894e-05, |
|
"loss": 0.3964, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.645373596593109, |
|
"eval_loss": 0.4310940206050873, |
|
"eval_runtime": 145.7507, |
|
"eval_samples_per_second": 38.806, |
|
"eval_steps_per_second": 4.851, |
|
"eval_wer": 0.3408707932788753, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.664730933023616, |
|
"eval_loss": 0.4260464608669281, |
|
"eval_runtime": 146.3966, |
|
"eval_samples_per_second": 38.635, |
|
"eval_steps_per_second": 4.829, |
|
"eval_wer": 0.3381264945194267, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 1.684088269454123, |
|
"eval_loss": 0.4260489046573639, |
|
"eval_runtime": 146.6331, |
|
"eval_samples_per_second": 38.572, |
|
"eval_steps_per_second": 4.822, |
|
"eval_wer": 0.3370672914894641, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 1.7034456058846303, |
|
"eval_loss": 0.4259546101093292, |
|
"eval_runtime": 146.1762, |
|
"eval_samples_per_second": 38.693, |
|
"eval_steps_per_second": 4.837, |
|
"eval_wer": 0.33636115613615575, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 1.7228029423151374, |
|
"eval_loss": 0.42149877548217773, |
|
"eval_runtime": 147.5316, |
|
"eval_samples_per_second": 38.338, |
|
"eval_steps_per_second": 4.792, |
|
"eval_wer": 0.335109370737109, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 1.7421602787456445, |
|
"grad_norm": 1.4968059062957764, |
|
"learning_rate": 3.186315789473684e-05, |
|
"loss": 0.3866, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.7421602787456445, |
|
"eval_loss": 0.4234353303909302, |
|
"eval_runtime": 146.8779, |
|
"eval_samples_per_second": 38.508, |
|
"eval_steps_per_second": 4.814, |
|
"eval_wer": 0.3330391102694548, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.7615176151761518, |
|
"eval_loss": 0.4210032522678375, |
|
"eval_runtime": 146.0169, |
|
"eval_samples_per_second": 38.735, |
|
"eval_steps_per_second": 4.842, |
|
"eval_wer": 0.3318515189934362, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 1.7808749516066589, |
|
"eval_loss": 0.41560646891593933, |
|
"eval_runtime": 145.9957, |
|
"eval_samples_per_second": 38.741, |
|
"eval_steps_per_second": 4.843, |
|
"eval_wer": 0.3300540835486511, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 1.800232288037166, |
|
"eval_loss": 0.41584905982017517, |
|
"eval_runtime": 147.0182, |
|
"eval_samples_per_second": 38.471, |
|
"eval_steps_per_second": 4.809, |
|
"eval_wer": 0.33032690857152025, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 1.8195896244676733, |
|
"eval_loss": 0.41545388102531433, |
|
"eval_runtime": 147.1819, |
|
"eval_samples_per_second": 38.429, |
|
"eval_steps_per_second": 4.804, |
|
"eval_wer": 0.32944423937988476, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 1.8389469608981805, |
|
"grad_norm": 0.9967782497406006, |
|
"learning_rate": 1.6073684210526313e-05, |
|
"loss": 0.37, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.8389469608981805, |
|
"eval_loss": 0.41372692584991455, |
|
"eval_runtime": 146.2893, |
|
"eval_samples_per_second": 38.663, |
|
"eval_steps_per_second": 4.833, |
|
"eval_wer": 0.32921955994928664, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.8583042973286876, |
|
"eval_loss": 0.4120025932788849, |
|
"eval_runtime": 146.1391, |
|
"eval_samples_per_second": 38.703, |
|
"eval_steps_per_second": 4.838, |
|
"eval_wer": 0.3284492304729502, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 1.8776616337591947, |
|
"eval_loss": 0.4108966886997223, |
|
"eval_runtime": 146.9334, |
|
"eval_samples_per_second": 38.494, |
|
"eval_steps_per_second": 4.812, |
|
"eval_wer": 0.3300701320794081, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 1.897018970189702, |
|
"eval_loss": 0.4100329577922821, |
|
"eval_runtime": 146.8452, |
|
"eval_samples_per_second": 38.517, |
|
"eval_steps_per_second": 4.815, |
|
"eval_wer": 0.32785543483494084, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 1.916376306620209, |
|
"eval_loss": 0.4094770848751068, |
|
"eval_runtime": 146.5252, |
|
"eval_samples_per_second": 38.601, |
|
"eval_steps_per_second": 4.825, |
|
"eval_wer": 0.3266999406204362, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 1.9357336430507162, |
|
"grad_norm": 0.7779282927513123, |
|
"learning_rate": 2.842105263157894e-07, |
|
"loss": 0.371, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.9357336430507162, |
|
"eval_loss": 0.409473717212677, |
|
"eval_runtime": 146.0247, |
|
"eval_samples_per_second": 38.733, |
|
"eval_steps_per_second": 4.842, |
|
"eval_wer": 0.3270690568278474, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.9357336430507162, |
|
"step": 10000, |
|
"total_flos": 1.1255918428180738e+19, |
|
"train_loss": 0.7365739318847656, |
|
"train_runtime": 19473.5524, |
|
"train_samples_per_second": 4.108, |
|
"train_steps_per_second": 0.514 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 10000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 400, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.1255918428180738e+19, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|