levicu
/

LEVI_whisper_medium

Automatic Speech Recognition

Inference Endpoints

Model card Files Files and versions Community

LEVI_whisper_medium / LEVI_whisper_benchmark.py

rosyvs's picture

update paths

02b7b88 verified 9 months ago

1.07 kB

	#%% imports
	import os
	from benchmark_utils import ASRmanifest, wer_from_csv


	#%% setup paths
	corpora_root = '~/corpora/' # root path where audio files are, inserted in palce of $DATAROOT in manifest
	manif_root = '~/data_manifests/' # path to dir containing data manifest csvs
	output_dir = './ASR_output/' # where to save ASR output
	manifest='LEVI_LoFi_v2_TEST_punc+cased' # name of test manifest
	model_name= 'LEVI_whisper_medium.en' # name of model you want to evaluate
	hf_org = 'levicu'
	model_path = f'{hf_org}/{model_name}'

	#%% generate paths
	manifest_csv=os.path.join(manif_root, f'{manifest}.csv')
	out_csv=os.path.join(output_dir,f'{model_name}_on_{manifest}.csv')

	#%% Inference
	ASRmanifest(
	manifest_csv=manifest_csv,
	out_csv=out_csv,
	corpora_root=corpora_root,
	model_path=model_path,
	)

	#%% Evaluation
	print(f'reading results from {out_csv}')
	print(f'{model_name} on {manifest}')
	wer_meas=wer_from_csv(
	out_csv,
	refcol='transcript',
	hypcol='asr',
	printout=True,
	text_norm_method='levi' # 'whisper','levi','none'
	)