cdactvm commited on
Commit
06cd898
1 Parent(s): 79a00a8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +56 -0
app.py CHANGED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import warnings
2
+ warnings.filterwarnings("ignore")
3
+ import os
4
+ import re
5
+ import gradio as gr
6
+ import numpy as np
7
+ import torchaudio
8
+ import nbimporter
9
+ from transformers import pipeline
10
+ from transformers import AutoProcessor
11
+ from pyctcdecode import build_ctcdecoder
12
+ from transformers import Wav2Vec2ProcessorWithLM
13
+ from text2int import text_to_int
14
+ from isNumber import is_number
15
+ from Text2List import text_to_list
16
+ from convert2list import convert_to_list
17
+ from processDoubles import process_doubles
18
+ from replaceWords import replace_words
19
+
20
+ transcriber = pipeline(task="automatic-speech-recognition", model="cdactvm/w2v-bert-2.0-hindi_v1")
21
+ processor = AutoProcessor.from_pretrained("cdactvm/w2v-bert-2.0-hindi_v1")
22
+
23
+ vocab_dict = processor.tokenizer.get_vocab()
24
+
25
+ sorted_vocab_dict = {k.lower(): v for k, v in sorted(vocab_dict.items(), key=lambda item: item[1])}
26
+ decoder = build_ctcdecoder(
27
+ labels=list(sorted_vocab_dict.keys()),
28
+ kenlm_model_path="lm.binary",
29
+ )
30
+ processor_with_lm = Wav2Vec2ProcessorWithLM(
31
+ feature_extractor=processor.feature_extractor,
32
+ tokenizer=processor.tokenizer,
33
+ decoder=decoder
34
+ )
35
+ processor.feature_extractor._processor_class = "Wav2Vec2ProcessorWithLM"
36
+
37
+
38
+ def transcribe(audio):
39
+ # # Process the audio file
40
+ transcript = transcriber(audio)
41
+ text_value = transcript['text']
42
+ print(text_value)
43
+ processd_doubles=process_doubles(text_value)
44
+ converted_to_list=convert_to_list(processd_doubles,text_to_list())
45
+ replaced_words = replace_words(converted_to_list)
46
+ converted_text=text_to_int(replaced_words)
47
+ return converted_text
48
+
49
+
50
+ demo = gr.Interface(
51
+ transcribe,
52
+ gr.Audio(sources="microphone", type="filepath"),
53
+ "text",
54
+ )
55
+
56
+ demo.launch()