test / app.py
mskov's picture
Update app.py
e4a4e02
raw
history blame
940 Bytes
import transformers
from transformers import pipeline
import gradio as gr
import os
import sys
os.system("pip install evaluate")
os.system("pip install datasets")
from evaluate import evaluator
from datasets import load_dataset
p = pipeline("automatic-speech-recognition")
task_evaluator = evaluator("automatic-speech-recognition")
data = load_dataset("mskov/miso_test", "en", split="test[:40]")
results = task_evaluator.compute(
model_or_pipeline="https://huggingface.co/mskov/whisper_miso",
data=data,
input_column="path",
label_column="category",
metric="wer",
)
print(results)
def transcribe(audio, state=""):
text = p(audio)["text"]
state += text + " "
return state, state
gr.Interface(
fn=transcribe,
inputs=[
gr.Audio(source="microphone", type="filepath", streaming=True),
"state"
],
outputs=[
"textbox",
"state"
],
live=True).launch()