File size: 1,024 Bytes
0b5b7f4
973bb39
 
de07127
 
 
 
a8d3864
0b5b7f4
 
973bb39
590e8a9
74f5766
e7380d9
e4a4e02
590e8a9
 
e4a4e02
 
 
590e8a9
e4a4e02
 
 
 
 
 
74f5766
 
 
 
973bb39
74f5766
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import transformers
from transformers import pipeline
import gradio as gr
import os 
import sys 
os.system("pip install evaluate")
os.system("pip install datasets")
os.system("pip install spicy")
from evaluate import evaluator
from datasets import load_dataset


p = pipeline("automatic-speech-recognition")

task_evaluator = evaluator("automatic-speech-recognition")
url = {"test" : "mskov/miso_test/test_set.parquet"}
data = load_dataset("parquet", data_files=url)
results = task_evaluator.compute(
    model_or_pipeline="https://huggingface.co/mskov/whisper_miso",
    data=data,
    input_column=["filepath","file_name"]
    label_column="category",
    metric="wer",
)
print(results)


def transcribe(audio, state=""):
    text = p(audio)["text"]
    state += text + " "
    return state, state

gr.Interface(
    fn=transcribe, 
    inputs=[
        gr.Audio(source="microphone", type="filepath", streaming=True), 
        "state"
    ],
    outputs=[
        "textbox",
        "state"
    ],
    live=True).launch()