File size: 899 Bytes
6b72e76
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
01a1731
 
 
 
 
6b72e76
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
from transformers import pipeline
import gradio as gr


model = pipeline(
    "automatic-speech-recognition", 
    model='openai/whisper-large-v3',
    chunk_length_s=30,
    generate_kwargs={"task": "transcribe"}
)

def transcribe_audio(mic=None, file=None, return_timestamps=False):
    if mic is not None:
        audio = mic
    elif file is not None:
        audio = file
    else:
        return "You must either provide a mic recording or a file"
        
    result = model(audio, return_timestamps=return_timestamps, batch_size=8)
    if return_timestamps:
        return result['chunks']
    else:
        return result['text']
        

gr.Interface(
    fn=transcribe_audio,
    inputs=[
        gr.Audio(sources=["microphone"], type="filepath"),
        gr.Audio(sources=["upload"], type="filepath"),
        gr.Checkbox(label="Add timestamps?")
    ],
    outputs="text",
).launch()