|
import warnings |
|
warnings.filterwarnings("ignore") |
|
import os |
|
import re |
|
import gradio as gr |
|
import numpy as np |
|
import torchaudio |
|
import nbimporter |
|
from transformers import pipeline |
|
from transformers import AutoProcessor |
|
from pyctcdecode import build_ctcdecoder |
|
from transformers import Wav2Vec2ProcessorWithLM |
|
from text2int import text_to_int |
|
from isNumber import is_number |
|
from Text2List import text_to_list |
|
from convert2list import convert_to_list |
|
from processDoubles import process_doubles |
|
from replaceWords import replace_words |
|
|
|
transcriber = pipeline(task="automatic-speech-recognition", model="cdactvm/w2v-bert-2.0-hindi_v1") |
|
processor = AutoProcessor.from_pretrained("cdactvm/w2v-bert-2.0-hindi_v1") |
|
|
|
vocab_dict = processor.tokenizer.get_vocab() |
|
|
|
sorted_vocab_dict = {k.lower(): v for k, v in sorted(vocab_dict.items(), key=lambda item: item[1])} |
|
decoder = build_ctcdecoder( |
|
labels=list(sorted_vocab_dict.keys()), |
|
kenlm_model_path="lm.binary", |
|
) |
|
processor_with_lm = Wav2Vec2ProcessorWithLM( |
|
feature_extractor=processor.feature_extractor, |
|
tokenizer=processor.tokenizer, |
|
decoder=decoder |
|
) |
|
processor.feature_extractor._processor_class = "Wav2Vec2ProcessorWithLM" |
|
|
|
|
|
def transcribe(audio): |
|
|
|
transcript = transcriber(audio) |
|
text_value = transcript['text'] |
|
print(text_value) |
|
processd_doubles=process_doubles(text_value) |
|
converted_to_list=convert_to_list(processd_doubles,text_to_list()) |
|
replaced_words = replace_words(converted_to_list) |
|
converted_text=text_to_int(replaced_words) |
|
return converted_text |
|
|
|
|
|
demo = gr.Interface( |
|
transcribe, |
|
gr.Audio(sources="microphone", type="filepath"), |
|
"text", |
|
) |
|
|
|
demo.launch() |