File size: 972 Bytes
0b5b7f4 973bb39 de07127 a8d3864 0b5b7f4 973bb39 74f5766 e7380d9 e4a4e02 f903e58 e4a4e02 74f5766 973bb39 74f5766 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 |
import transformers
from transformers import pipeline
import gradio as gr
import os
import sys
os.system("pip install evaluate")
os.system("pip install datasets")
os.system("pip install spicy")
from evaluate import evaluator
from datasets import load_dataset
p = pipeline("automatic-speech-recognition")
task_evaluator = evaluator("automatic-speech-recognition")
data = load_dataset("mskov/miso_test", "en", split="test[:40]")
results = task_evaluator.compute(
model_or_pipeline="https://huggingface.co/mskov/whisper_miso",
data=data,
input_column="audio",
label_column="category",
metric="wer",
)
print(results)
def transcribe(audio, state=""):
text = p(audio)["text"]
state += text + " "
return state, state
gr.Interface(
fn=transcribe,
inputs=[
gr.Audio(source="microphone", type="filepath", streaming=True),
"state"
],
outputs=[
"textbox",
"state"
],
live=True).launch()
|