File size: 1,067 Bytes
286bce6
 
 
 
 
 
02b7b88
 
286bce6
02b7b88
 
286bce6
 
 
02b7b88
286bce6
 
 
 
 
02b7b88
 
 
 
286bce6
 
 
 
 
 
 
 
 
 
02b7b88
286bce6
02b7b88
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
#%% imports
import os
from benchmark_utils import ASRmanifest, wer_from_csv


#%% setup paths
corpora_root = '~/corpora/' # root path where audio files are, inserted in palce of $DATAROOT in manifest
manif_root =  '~/data_manifests/' # path to dir containing data manifest csvs
output_dir = './ASR_output/' # where to save ASR output
manifest='LEVI_LoFi_v2_TEST_punc+cased' # name of test manifest 
model_name= 'LEVI_whisper_medium.en' # name of model you want to evaluate
hf_org = 'levicu'
model_path = f'{hf_org}/{model_name}'

#%% generate paths
manifest_csv=os.path.join(manif_root, f'{manifest}.csv')
out_csv=os.path.join(output_dir,f'{model_name}_on_{manifest}.csv')

#%% Inference
ASRmanifest(
    manifest_csv=manifest_csv,
    out_csv=out_csv,
    corpora_root=corpora_root,
    model_path=model_path,
)

#%% Evaluation
print(f'reading results from {out_csv}')
print(f'{model_name} on {manifest}')
wer_meas=wer_from_csv(
    out_csv,
    refcol='transcript',
    hypcol='asr',
    printout=True,
    text_norm_method='levi' # 'whisper','levi','none'
    )