Spaces:
Runtime error
Runtime error
from pprint import pformat | |
from huggingface_hub import hf_hub_download | |
import librosa | |
import gradio as gr | |
from pipeline import PreTrainedPipeline | |
HF_HUB_URL = 'ales/wav2vec2-cv-be' | |
LM_HUB_FP = 'language_model/cv8be_5gram.bin' | |
MODEL_SAMPLING_RATE = 16_000 # 16kHz | |
# download Language Model from HF Hub | |
lm_fp = hf_hub_download(repo_id=HF_HUB_URL, filename=LM_HUB_FP) | |
# init pipeline | |
pipeline = PreTrainedPipeline(model_path=HF_HUB_URL, language_model_fp=lm_fp) | |
def main(recorded_audio_fp: str, uploaded_audio_fp: str): | |
audio_fp = None | |
if recorded_audio_fp is not None: | |
audio_fp = recorded_audio_fp | |
used_audiofile = 'recorded' | |
elif uploaded_audio_fp is not None: | |
audio_fp = uploaded_audio_fp | |
used_audiofile = 'uploaded' | |
else: | |
return ( | |
'Памылка! Вы мусіце альбо запісаць, альбо запампаваць аўдыяфайл.', | |
'Error! You have to either record or upload an audiofile.' | |
) | |
# read audio file | |
inputs = librosa.load(audio_fp, sr=MODEL_SAMPLING_RATE, mono=True)[0] | |
# recognize speech | |
pipeline_res = pipeline(inputs=inputs) | |
text = pipeline_res['text'][0] # unpack batch of size 1 | |
# add technical information to the output | |
tech_data = pipeline_res | |
del tech_data['text'] | |
tech_data['used_audiofile'] = used_audiofile | |
tech_data['recorded_file_present'] = recorded_audio_fp is not None | |
tech_data['uploaded_file_present'] = uploaded_audio_fp is not None | |
tech_data['audiofile_path'] = audio_fp | |
tech_data['model_sampling_rate'] = MODEL_SAMPLING_RATE | |
tech_data['inputs_shape'] = inputs.shape | |
tech_data['inputs_max'] = inputs.max().item() | |
tech_data['inputs_min'] = inputs.min().item() | |
tech_data_str = pformat(tech_data) | |
return text, tech_data_str | |
article = """ | |
The model used can be found here: [ales/wav2vec2-cv-be](https://huggingface.co/ales/wav2vec2-cv-be) | |
![Page Visits](https://visitor-badge.glitch.me/badge?page_id=huggingface.co/spaces/ales/wav2vec2-cv-be-lm&left_color=darkgray&right_color=crimson&left_text=Page%20Visits) | |
""" | |
iface = gr.Interface( | |
fn=main, | |
inputs=[ | |
gr.inputs.Audio( | |
source='microphone', type='filepath', | |
label='Запішыце аўдыяфайл, каб распазнаць маўленьне', | |
optional=True, | |
), | |
gr.inputs.Audio( | |
source='upload', type='filepath', | |
label='Альбо загрузіце ўжо запісаны аўдыяфайл сюды', | |
optional=True | |
), | |
], | |
outputs=[ | |
gr.outputs.Textbox(type='str', label='Распазнаны тэкст'), | |
gr.outputs.Textbox(type='str', label='Тэхнічная інфармацыя') | |
], | |
title='wav2vec2 fine-tuned on CommonVoice 8 Be + Language Model', | |
description=('Мадэль распазнаваньня беларускага маўленьня, навучаная на датсэце Common Voice 8.\n' | |
'Акустычная мадэль + моўная мадэль.' | |
), | |
article=article | |
) | |
iface.launch(enable_queue=True) | |