File size: 1,666 Bytes
6d504a5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 |
#%% imports
import os
from benchmark_utils import ASRmanifest, wer_from_csv
#%% setup paths
corpora_root = '/shared/corpora/forSAGA/' # root path where audio files are, inserted in palce of $DATAROOT in manifest
manif_root = '/shared/corpora/forSAGA/data_manifests/' # path to dir containing data manifest csvs
output_dir = './ASR_output/' # where to save ASR output
manifest='LEVI_LoFi_v2_TEST_norm_wer_isat' # name of test manifest
model_name= 'LEVI_whisper_medium.en' # name of save directory of model you want to evaluate
hf_org = 'levicu'
model_path = f'{hf_org}/{model_name}'
#%% setup paths for Rosy TESTING:
corpora_root = '/shared/corpora/' # root path where audio files are, inserted in palce of $DATAROOT in manifest
manif_root = '/shared/corpora/data_manifests/ASR/' # path to dir containing data manifest csvs
output_dir = '/home/rosy/whisat-output/' # where to save ASR output
manifest= 'LEVI_LoFi_v2_TEST_punc+cased' # name of test manifest
model_name= 'LEVI_LoFi_v2_MediumEN_Lora_Int8' # name of save directory of model you want to evaluate
model_path='/shared/models/LEVI_LoFi_v2_MediumEN_Lora_Int8/final/'
model_path='openai/whisper_medium.en'
#%%
# generate paths
manifest_csv=os.path.join(manif_root, f'{manifest}.csv')
out_csv=os.path.join(output_dir,f'{model_name}_on_{manifest}.csv')
#%% Inference
ASRmanifest(
manifest_csv=manifest_csv,
out_csv=out_csv,
corpora_root=corpora_root,
model_path=model_path,
)
#%% Evaluation
print(f'reading results from {out_csv}')
print(f'{model_name} on {manifest}')
wer_meas=wer_from_csv(
out_csv,
refcol='transcript',
hypcol='asr',
printout=True,
text_norm_method='levi'
)
|