import transformers from transformers import pipeline import gradio as gr import os import sys os.system("pip install evaluate") os.system("pip install datasets") os.system("pip install spicy") os.system("pip install soundfile") os.system("pip install datasets[audio]") from evaluate import evaluator from datasets import load_dataset, Audio p = pipeline("automatic-speech-recognition") task_evaluator = evaluator("automatic-speech-recognition") #url = {"test" : "https://huggingface.co/datasets/mskov/miso_test/blob/main/test_set.parquet"} data = load_dataset("audiofolder", data_dir="mskov/miso_test/test_set") results = task_evaluator.compute( model_or_pipeline="https://huggingface.co/mskov/whisper_miso", data=data, input_column="file_name", label_column="category", metric="wer", ) print(results) def transcribe(audio, state=""): text = p(audio)["text"] state += text + " " return state, state gr.Interface( fn=transcribe, inputs=[ gr.Audio(source="microphone", type="filepath", streaming=True), "state" ], outputs=[ "textbox", "state" ], live=True).launch()