levicu
/

LEVI_whisper_medium.en

Automatic Speech Recognition

Inference Endpoints

Model card Files Files and versions Community

rosyvs commited on Feb 14

Commit

d2b43f3

•

1 Parent(s): 8a14c0d

Remove testing paths

Files changed (1) hide show

LEVI_whisper_benchmark.py +11 -19

LEVI_whisper_benchmark.py CHANGED Viewed

@@ -4,33 +4,24 @@ from benchmark_utils import ASRmanifest, wer_from_csv
 #%% setup paths
-corpora_root = '/shared/corpora/forSAGA/' # root path where audio files are, inserted in palce of $DATAROOT in manifest
-manif_root =  '/shared/corpora/forSAGA/data_manifests/' # path to dir containing data manifest csvs
 output_dir = './ASR_output/' # where to save ASR output
-manifest='LEVI_LoFi_v2_TEST_norm_wer_isat' # name of test manifest
-model_name= 'LEVI_whisper_medium.en' # name of save directory of model you want to evaluate
 hf_org = 'levicu'
 model_path = f'{hf_org}/{model_name}'
-#%% setup paths for Rosy TESTING:
-corpora_root = '/shared/corpora/' # root path where audio files are, inserted in palce of $DATAROOT in manifest
-manif_root =  '/shared/corpora/data_manifests/ASR/' # path to dir containing data manifest csvs
-output_dir = '/home/rosy/whisat-output/' # where to save ASR output
-manifest= 'LEVI_LoFi_v2_TEST_punc+cased' # name of test manifest
-model_name= 'LEVI_LoFi_v2_MediumEN_Lora_Int8' # name of save directory of model you want to evaluate
-model_path='/shared/models/LEVI_LoFi_v2_MediumEN_Lora_Int8/final/'
-model_path='openai/whisper_medium.en'
-#%%
-# generate paths
 manifest_csv=os.path.join(manif_root, f'{manifest}.csv')
 out_csv=os.path.join(output_dir,f'{model_name}_on_{manifest}.csv')
 #%% Inference
 ASRmanifest(
-manifest_csv=manifest_csv,
-out_csv=out_csv,
-corpora_root=corpora_root,
-model_path=model_path,
 )
 #%% Evaluation
@@ -41,7 +32,8 @@ wer_meas=wer_from_csv(
     refcol='transcript',
     hypcol='asr',
     printout=True,
-    text_norm_method='levi'
     )

 #%% setup paths
+corpora_root = '~/corpora/forSAGA/' # root path where audio files are, inserted in palce of $DATAROOT in manifest
+manif_root =  '~/corpora/forSAGA/data_manifests/' # path to dir containing data manifest csvs
 output_dir = './ASR_output/' # where to save ASR output
+manifest='LEVI_LoFi_v2_TEST_punc+cased' # name of test manifest
+model_name= 'LEVI_whisper_medium.en' # name of model you want to evaluate
 hf_org = 'levicu'
 model_path = f'{hf_org}/{model_name}'
+#%% generate paths
 manifest_csv=os.path.join(manif_root, f'{manifest}.csv')
 out_csv=os.path.join(output_dir,f'{model_name}_on_{manifest}.csv')
 #%% Inference
 ASRmanifest(
+    manifest_csv=manifest_csv,
+    out_csv=out_csv,
+    corpora_root=corpora_root,
+    model_path=model_path,
 )
 #%% Evaluation
     refcol='transcript',
     hypcol='asr',
     printout=True,
+    text_norm_method='levi' # 'whisper','levi','none'
     )