File size: 1,181 Bytes
b357c71
 
dfc9440
b357c71
 
 
 
 
dfc9440
b357c71
 
dfc9440
 
 
 
 
b357c71
 
 
 
 
 
 
 
 
dfc9440
 
 
 
 
b357c71
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
from transformers import WhisperTokenizer

tokenizer = WhisperTokenizer.from_pretrained("openai/whisper-small") #, language="marathi", task="transcribe"

from transformers import pipeline
import gradio as gr
import torch 

pipe = pipeline(model="thak123/whisper-small-LDC-V1", #"thak123/whisper-small-gom", 
                task="automatic-speech-recognition", tokenizer= tokenizer)  # change to "your-username/the-name-you-picked"

# pipe.model.config.forced_decoder_ids = (
#         pipe.tokenizer.get_decoder_prompt_ids(
#             language="marathi", task="transcribe"
#         )
#     )

def transcribe(audio):
    text = pipe(audio)["text"]
    return text

iface = gr.Interface(
    fn=transcribe, 
    inputs=gr.Audio(source="microphone", type="filepath"), 
    outputs="text",
    examples=[
        [os.path.join(os.path.dirname("."),"audio/chalyaami.mp3")],
        [os.path.join(os.path.dirname("."),"audio/ekdonteen.flac")],
        [os.path.join(os.path.dirname("."),"audio/heyatachadjaale.mp3")],
    ],
    title="Whisper Small Konkani",
    description="Realtime demo for Konkani speech recognition using a fine-tuned Whisper small model.",
)


iface.launch()