comodoro's picture
Simplest speech to text example
07d71a1
raw
history blame
1.05 kB
from transformers import pipeline, AutoFeatureExtractor, AutoTokenizer, Wav2Vec2ForCTC
import gradio as gr
import time
model_id = 'comodoro/wav2vec2-xls-r-300m-cs-250'
feature_extractor = AutoFeatureExtractor.from_pretrained(model_id)
model = Wav2Vec2ForCTC.from_pretrained(model_id)
tokenizer = AutoTokenizer.from_pretrained(model_id)
p = pipeline("automatic-speech-recognition", chunk_length_s=5, model=model,
tokenizer=tokenizer, feature_extractor=feature_extractor)
def transcribe(audio, state=""):
time.sleep(2)
text = p(audio)["text"]
state += text + " "
return state
with gr.Blocks() as blocks:
audio = gr.Audio(source="microphone", type="filepath",
label='Pokud je to třeba, povolte mikrofon pro tuto stránku, \
klikněte na Record from microphone, po dokončení nahrávání na Stop recording a poté na Rozpoznat')
btn = gr.Button('Rozpoznat')
output = gr.Textbox(show_label=False)
btn.click(fn=transcribe, inputs=[audio,],
outputs=[output,])
blocks.launch(enable_queue=True, debug=True)