rosyvs
/

benchmark_utils.py

Model card Files Files and versions Community

rosyvs commited on Jan 31

Commit

61c613f

•

1 Parent(s): 4f9e433

Upload https://huggingface.co/levicu/LEVI_whisper_medium.en/tree/main with huggingface_hub

Browse files

Files changed (1) hide show

https:/huggingface.co/levicu/LEVI_whisper_medium.en/tree/main +47 -0

https:/huggingface.co/levicu/LEVI_whisper_medium.en/tree/main ADDED Viewed

	@@ -0,0 +1,47 @@

+#%% imports
+import os
+from benchmark_utils import ASRmanifest, wer_from_csv
+#%% setup paths
+corpora_root = '/shared/corpora/forSAGA/' # root path where audio files are, inserted in palce of $DATAROOT in manifest
+manif_root =  '/shared/corpora/forSAGA/data_manifests/' # path to dir containing data manifest csvs
+output_dir = './ASR_output/' # where to save ASR output
+manifest='LEVI_LoFi_v2_TEST_norm_wer_isat' # name of test manifest
+model_name= 'LEVI_whisper_medium.en' # name of save directory of model you want to evaluate
+hf_org = 'levicu'
+model_path = f'{hf_org}/{model_name}'
+#%% setup paths for Rosy TESTING:
+corpora_root = '/shared/corpora/' # root path where audio files are, inserted in palce of $DATAROOT in manifest
+manif_root =  '/shared/corpora/data_manifests/ASR/' # path to dir containing data manifest csvs
+output_dir = '/home/rosy/whisat-output/' # where to save ASR output
+manifest= 'LEVI_LoFi_v2_TEST_punc+cased' # name of test manifest
+model_name= 'LEVI_LoFi_v2_MediumEN_Lora_Int8' # name of save directory of model you want to evaluate
+model_path='/shared/models/LEVI_LoFi_v2_MediumEN_Lora_Int8/final/'
+model_path='openai/whisper_medium.en'
+#%%
+# generate paths
+manifest_csv=os.path.join(manif_root, f'{manifest}.csv')
+out_csv=os.path.join(output_dir,f'{model_name}_on_{manifest}.csv')
+#%% Inference
+ASRmanifest(
+manifest_csv=manifest_csv,
+out_csv=out_csv,
+corpora_root=corpora_root,
+model_path=model_path,
+)
+#%% Evaluation
+print(f'reading results from {out_csv}')
+print(f'{model_name} on {manifest}')
+wer_meas=wer_from_csv(
+    out_csv,
+    refcol='transcript',
+    hypcol='asr',
+    printout=True,
+    text_norm_method='levi'
+    )