import transformers from transformers import pipeline import gradio as gr import os import sys os.system("pip install evaluate") os.system("pip install datasets") os.system("pip install spicy") from evaluate import evaluator from datasets import load_dataset p = pipeline("automatic-speech-recognition") task_evaluator = evaluator("automatic-speech-recognition") data = load_dataset("mskov/miso_test", "en", split="test[:40]") results = task_evaluator.compute( model_or_pipeline="https://huggingface.co/mskov/whisper_miso", data=data, input_column="audio", label_column="category", metric="wer", ) print(results) def transcribe(audio, state=""): text = p(audio)["text"] state += text + " " return state, state gr.Interface( fn=transcribe, inputs=[ gr.Audio(source="microphone", type="filepath", streaming=True), "state" ], outputs=[ "textbox", "state" ], live=True).launch()