File size: 1,082 Bytes
0b5b7f4
973bb39
 
de07127
 
 
 
a8d3864
2996449
0b5b7f4
 
973bb39
590e8a9
74f5766
e7380d9
e4a4e02
8fdbf39
 
e4a4e02
 
 
674d0ae
e4a4e02
 
 
 
 
 
74f5766
 
 
 
973bb39
74f5766
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import transformers
from transformers import pipeline
import gradio as gr
import os 
import sys 
os.system("pip install evaluate")
os.system("pip install datasets")
os.system("pip install spicy")
os.system("pip install soundfile")
from evaluate import evaluator
from datasets import load_dataset


p = pipeline("automatic-speech-recognition")

task_evaluator = evaluator("automatic-speech-recognition")
#url = {"test" : "https://huggingface.co/datasets/mskov/miso_test/blob/main/test_set.parquet"}
data = load_dataset("mskov/miso_test")
results = task_evaluator.compute(
    model_or_pipeline="https://huggingface.co/mskov/whisper_miso",
    data=data,
    input_column="file_name",
    label_column="category",
    metric="wer",
)
print(results)


def transcribe(audio, state=""):
    text = p(audio)["text"]
    state += text + " "
    return state, state

gr.Interface(
    fn=transcribe, 
    inputs=[
        gr.Audio(source="microphone", type="filepath", streaming=True), 
        "state"
    ],
    outputs=[
        "textbox",
        "state"
    ],
    live=True).launch()