Sajjo commited on
Commit
6d4ade5
1 Parent(s): 3f2215b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +178 -330
app.py CHANGED
@@ -1,273 +1,135 @@
 
 
1
  import warnings
2
  import gradio as gr
3
- from transformers import pipeline
4
- from transformers import AutoProcessor
5
  from pyctcdecode import build_ctcdecoder
6
  from transformers import Wav2Vec2ProcessorWithLM
 
7
 
8
- import os
9
- import re
10
- #import torchaudio
11
-
12
- # Initialize the speech recognition pipeline and transliterator
13
- p1 = pipeline(task="automatic-speech-recognition", model="cdactvm/w2v-bert-odia_v1")
14
- p2 = pipeline(task="automatic-speech-recognition", model="cdactvm/w2v-bert-2.0-hindi_v1")
15
- #p3 = pipeline(task="automatic-speech-recognition", model="cdactvm/kannada_w2v-bert_model")
16
- #p4 = pipeline(task="automatic-speech-recognition", model="cdactvm/telugu_w2v-bert_model")
17
- #p5 = pipeline(task="automatic-speech-recognition", model="Sajjo/w2v-bert-2.0-bangala-gpu-CV16.0_v2")
18
- #p6 = pipeline(task="automatic-speech-recognition", model="cdactvm/hf-open-assames")
19
- p7 = pipeline(task="automatic-speech-recognition", model="cdactvm/w2v-assames")
 
 
20
  processor = AutoProcessor.from_pretrained("cdactvm/w2v-assames")
21
  vocab_dict = processor.tokenizer.get_vocab()
22
  sorted_vocab_dict = {k.lower(): v for k, v in sorted(vocab_dict.items(), key=lambda item: item[1])}
23
- decoder = build_ctcdecoder(
24
- labels=list(sorted_vocab_dict.keys()),
25
- kenlm_model_path="lm.binary",
26
- )
27
- processor_with_lm = Wav2Vec2ProcessorWithLM(
28
- feature_extractor=processor.feature_extractor,
29
- tokenizer=processor.tokenizer,
30
- decoder=decoder
31
- )
32
- processor.feature_extractor._processor_class = "Wav2Vec2ProcessorWithLM"
33
- p8 = pipeline("automatic-speech-recognition", model="cdactvm/w2v-assames", tokenizer=processor_with_lm, feature_extractor=processor_with_lm.feature_extractor, decoder=processor_with_lm.decoder)
34
-
35
-
36
- os.system('git clone https://github.com/irshadbhat/indic-trans.git')
37
- os.system('pip install ./indic-trans/.')
38
-
39
- #HF_TOKEN = os.getenv('HF_TOKEN')
40
- #hf_writer = gr.HuggingFaceDatasetSaver(HF_TOKEN, "asr_demo")
41
-
42
- from indictrans import Transliterator
43
- trn = Transliterator(source='ori', target='eng', build_lookup=True)
44
- def transcribe_odiya(speech):
45
- text = p1(speech)["text"]
46
- if text is None:
47
- return "Error: ASR returned None"
48
- return text
49
-
50
-
51
  def cleanhtml(raw_html):
52
- cleantext = re.sub(r'<.*?>', '', raw_html)
53
- return cleantext
54
 
55
- def transcribe_hindi(speech):
56
- text = p2(speech)["text"]
57
- if text is None:
58
- return "Error: ASR returned None"
59
- return text
60
- def transcribe_kannada(speech):
61
- text = p3(speech)["text"]
62
- if text is None:
63
- return "Error: ASR returned None"
64
- return text
65
- def transcribe_telugu(speech):
66
- text = p4(speech)["text"]
67
- if text is None:
68
- return "Error: ASR returned None"
69
- return text
70
-
71
- def transcribe_bangala(speech):
72
- text = p5(speech)["text"]
73
- if text is None:
74
- return "Error: ASR returned None"
75
- return text
76
 
77
- def transcribe_assamese_LM(speech):
78
- text = p8(speech)["text"]
79
- text = cleanhtml(text)
80
  if text is None:
81
  return "Error: ASR returned None"
82
- return text
83
-
84
- def transcribe_assamese_model2(speech):
85
- text = p7(speech)["text"]
86
- text = cleanhtml(text)
87
- if text is None:
88
- return "Error: ASR returned None"
89
- return text
90
-
91
- def transcribe_odiya_eng(speech):
92
- trn = Transliterator(source='ori', target='eng', build_lookup=True)
93
- text = p1(speech)["text"]
94
- if text is None:
95
- return "Error: ASR returned None"
96
- sentence = trn.transform(text)
97
- if sentence is None:
98
- return "Error: Transliteration returned None"
99
- replaced_words = replace_words(sentence)
100
- processed_sentence = process_doubles(replaced_words)
101
- return process_transcription(processed_sentence)
102
-
103
- def transcribe_ban_eng(speech):
104
- trn = Transliterator(source='ben', target='eng', build_lookup=True)
105
- text = p5(speech)["text"]
106
- if text is None:
107
- return "Error: ASR returned None"
108
- sentence = trn.transform(text)
109
- if sentence is None:
110
- return "Error: Transliteration returned None"
111
- replaced_words = replace_words(sentence)
112
- processed_sentence = process_doubles(replaced_words)
113
- return process_transcription(processed_sentence)
114
 
115
- def transcribe_hin_eng(speech):
116
- trn = Transliterator(source='hin', target='eng', build_lookup=True)
117
- text = p2(speech)["text"]
118
- if text is None:
119
- return "Error: ASR returned None"
120
- sentence = trn.transform(text)
121
- if sentence is None:
122
- return "Error: Transliteration returned None"
123
- replaced_words = replace_words(sentence)
124
- processed_sentence = process_doubles(replaced_words)
125
- return process_transcription(processed_sentence)
126
 
127
- def transcribe_kan_eng(speech):
128
- trn = Transliterator(source='kan', target='eng', build_lookup=True)
129
- text = p3(speech)["text"]
130
- if text is None:
131
- return "Error: ASR returned None"
132
- sentence = trn.transform(text)
133
- if sentence is None:
134
- return "Error: Transliteration returned None"
135
- replaced_words = replace_words(sentence)
136
- processed_sentence = process_doubles(replaced_words)
137
- return process_transcription(processed_sentence)
138
-
139
- def transcribe_tel_eng(speech):
140
- trn = Transliterator(source='tel', target='eng', build_lookup=True)
141
- text = p4(speech)["text"]
142
- if text is None:
143
- return "Error: ASR returned None"
144
- sentence = trn.transform(text)
145
- if sentence is None:
146
- return "Error: Transliteration returned None"
147
- replaced_words = replace_words(sentence)
148
- processed_sentence = process_doubles(replaced_words)
149
- return process_transcription(processed_sentence)
150
-
151
-
152
- def process_transcription(input_sentence):
153
- word_to_code_map = {}
154
- code_to_word_map = {}
155
 
156
- transcript_1 = sentence_to_transcript(input_sentence, word_to_code_map)
157
- if transcript_1 is None:
158
- return "Error: Transcript conversion returned None"
159
 
160
- numbers = text2int(transcript_1)
161
- if numbers is None:
162
- return "Error: Text to number conversion returned None"
 
 
163
 
164
- code_to_word_map = {v: k for k, v in word_to_code_map.items()}
165
- text = transcript_to_sentence(numbers, code_to_word_map)
166
- return text
167
-
168
- def sel_lng(lng, mic=None, file=None):
169
- if mic is not None:
170
- audio = mic
171
- elif file is not None:
172
- audio = file
173
- else:
174
- return "You must either provide a mic recording or a file"
175
-
176
- if lng == "Odiya":
177
- return transcribe_odiya(audio)
178
- elif lng == "Odiya-trans":
179
- return transcribe_odiya_eng(audio)
180
- elif lng == "Hindi-trans":
181
- return transcribe_hin_eng(audio)
182
- elif lng == "Hindi":
183
- return transcribe_hindi(audio)
184
- elif lng == "Kannada-trans":
185
- return transcribe_kan_eng(audio)
186
- elif lng == "Kannada":
187
- return transcribe_kannada(audio)
188
- elif lng == "Telugu-trans":
189
- return transcribe_tel_eng(audio)
190
- elif lng == "Telugu":
191
- return transcribe_telugu(audio)
192
- elif lng == "Bangala-trans":
193
- return transcribe_ban_eng(audio)
194
- elif lng == "Bangala":
195
- return transcribe_bangala(audio)
196
- elif lng == "Assamese-LM":
197
- return transcribe_assamese_LM(audio)
198
- elif lng == "Assamese-Model2":
199
- return transcribe_assamese_model2(audio)
200
-
201
-
202
- # Function to replace incorrectly spelled words
203
  def replace_words(sentence):
204
  replacements = [
205
- (r'\bjiro\b', 'zero'), (r'\bjero\b', 'zero'), (r'\bnn\b', 'one'),
206
- (r'\bn\b', 'one'), (r'\bna\b', 'one'), (r'\btu\b', 'two'),
207
- (r'\btoo\b', 'two'), (r'\bthiri\b', 'three'), (r'\bfor\b', 'four'),
208
- (r'\bfore\b', 'four'), (r'\bfib\b', 'five'), (r'\bdublseven\b', 'double seven'),
209
- (r'\bdubalathri\b', 'double three'), (r'\bnineeit\b', 'nine eight'),
 
 
 
 
 
 
 
210
  (r'\bfipeit\b', 'five eight'), (r'\bdubal\b', 'double'), (r'\bsevenatu\b', 'seven two'),
211
  ]
212
  for pattern, replacement in replacements:
213
  sentence = re.sub(pattern, replacement, sentence)
214
  return sentence
215
 
216
- # Function to process "double" followed by a number
217
  def process_doubles(sentence):
218
  tokens = sentence.split()
219
  result = []
220
  i = 0
221
  while i < len(tokens):
222
- if tokens[i] in ("double", "dubal"):
223
- if i + 1 < len(tokens):
224
- result.append(tokens[i + 1])
225
- result.append(tokens[i + 1])
226
- i += 2
227
- else:
228
- result.append(tokens[i])
229
- i += 1
230
  else:
231
  result.append(tokens[i])
232
  i += 1
233
  return ' '.join(result)
234
 
235
- # Function to generate Soundex code for a word
236
- def soundex(word):
237
- word = word.upper()
238
- word = ''.join(filter(str.isalpha, word))
239
- if not word:
240
- return None
241
- soundex_mapping = {
242
- 'B': '1', 'F': '1', 'P': '1', 'V': '1',
243
- 'C': '2', 'G': '2', 'J': '2', 'K': '2', 'Q': '2', 'S': '2', 'X': '2', 'Z': '2',
244
- 'D': '3', 'T': '3', 'L': '4', 'M': '5', 'N': '5', 'R': '6'
245
- }
246
- soundex_code = word[0]
247
- for char in word[1:]:
248
- if char not in ('H', 'W'):
249
- soundex_code += soundex_mapping.get(char, '0')
250
- soundex_code = soundex_code[0] + ''.join(c for i, c in enumerate(soundex_code[1:]) if c != soundex_code[i])
251
- soundex_code = soundex_code.replace('0', '') + '000'
252
- return soundex_code[:4]
253
-
254
- # Function to convert text to numerical representation
255
- def is_number(x):
256
- if type(x) == str:
257
- x = x.replace(',', '')
258
- try:
259
- float(x)
260
- except:
261
- return False
262
- return True
263
 
 
 
 
 
264
  def text2int(textnum, numwords={}):
265
- units = ['Z600', 'O500','T000','T600','F600','F100','S220','S150','E300','N500',
266
- 'T500', 'E415', 'T410', 'T635', 'F635', 'F135', 'S235', 'S153', 'E235','N535']
267
  tens = ['', '', 'T537', 'T637', 'F637', 'F137', 'S230', 'S153', 'E230', 'N530']
268
  scales = ['H536', 'T253', 'M450', 'C600']
269
  ordinal_words = {'oh': 'Z600', 'first': 'O500', 'second': 'T000', 'third': 'T600', 'fourth': 'F600', 'fifth': 'F100',
270
- 'sixth': 'S200','seventh': 'S150','eighth': 'E230', 'ninth': 'N500', 'twelfth': 'T410'}
271
  ordinal_endings = [('ieth', 'y'), ('th', '')]
272
  if not numwords:
273
  numwords['and'] = (1, 0)
@@ -304,18 +166,20 @@ def text2int(textnum, numwords={}):
304
  if scale > 100:
305
  result += current
306
  current = 0
307
- onnumber = True
308
- lastunit = False
309
  lastscale = False
 
310
  else:
311
  for ending, replacement in ordinal_endings:
312
  if word.endswith(ending):
313
  word = "%s%s" % (word[:-len(ending)], replacement)
314
 
315
- if (not is_numword(word)) or (word == 'and' and not lastscale):
316
  if onnumber:
317
  curstring += repr(result + current) + " "
318
- curstring += word + " "
 
 
319
  result = current = 0
320
  onnumber = False
321
  lastunit = False
@@ -323,118 +187,102 @@ def text2int(textnum, numwords={}):
323
  else:
324
  scale, increment = from_numword(word)
325
  onnumber = True
326
-
327
- if lastunit and (word not in scales):
328
  curstring += repr(result + current)
329
  result = current = 0
330
-
331
  if scale > 1:
332
  current = max(1, current)
333
-
334
  current = current * scale + increment
335
  if scale > 100:
336
  result += current
337
  current = 0
338
-
339
- lastscale = False
340
- lastunit = False
341
- if word in scales:
342
- lastscale = True
343
- elif word in units:
344
- lastunit = True
345
 
346
  if onnumber:
347
  curstring += repr(result + current)
348
 
349
  return curstring
350
 
 
 
 
 
 
 
 
 
351
  # Convert sentence to transcript using Soundex
352
  def sentence_to_transcript(sentence, word_to_code_map):
353
- words = sentence.split()
354
- transcript_codes = []
355
-
356
- for word in words:
357
- if word not in word_to_code_map:
358
- word_to_code_map[word] = soundex(word)
359
- transcript_codes.append(word_to_code_map[word])
360
-
361
- transcript = ' '.join(transcript_codes)
362
- return transcript
363
-
364
- # Convert transcript back to sentence using mapping
365
- def transcript_to_sentence(transcript, code_to_word_map):
366
- codes = transcript.split()
367
- sentence_words = []
368
 
369
- for code in codes:
370
- sentence_words.append(code_to_word_map.get(code, code))
371
-
372
- sentence = ' '.join(sentence_words)
373
- return sentence
374
 
375
- # # Process the audio file
376
- # transcript = pipe("./odia_recorded/AUD-20240614-WA0004.wav")
377
- # text_value = transcript['text']
378
- # sentence = trn.transform(text_value)
379
- # replaced_words = replace_words(sentence)
380
- # processed_sentence = process_doubles(replaced_words)
381
-
382
- # input_sentence_1 = processed_sentence
383
-
384
- # Create empty mappings
385
- word_to_code_map = {}
386
- code_to_word_map = {}
387
-
388
- # Convert sentence to transcript
389
- # transcript_1 = sentence_to_transcript(input_sentence_1, word_to_code_map)
390
-
391
- # Convert transcript to numerical representation
392
- # numbers = text2int(transcript_1)
393
-
394
- # Create reverse mapping
395
- code_to_word_map = {v: k for k, v in word_to_code_map.items()}
396
-
397
- # Convert transcript back to sentence
398
- # reconstructed_sentence_1 = transcript_to_sentence(numbers, code_to_word_map)
399
-
400
- # demo=gr.Interface(
401
- # fn=sel_lng,
402
-
403
- # inputs=[
404
-
405
- # gr.Dropdown(["Hindi","Hindi-trans","Odiya","Odiya-trans"],value="Hindi",label="Select Language"),
406
- # gr.Audio(source="microphone", type="filepath"),
407
- # gr.Audio(source= "upload", type="filepath"),
408
- # #gr.Audio(sources="upload", type="filepath"),
409
- # #"state"
410
- # ],
411
- # outputs=[
412
- # "textbox"
413
- # # #"state"
414
- # ],
415
- # title="Automatic Speech Recognition",
416
- # description = "Demo for Automatic Speech Recognition. Use microphone to record speech. Please press Record button. Initially it will take some time to load the model. The recognized text will appear in the output textbox",
417
- # ).launch()
418
-
419
- ######################################################
420
- demo=gr.Interface(
421
- fn=sel_lng,
422
-
423
- inputs=[
424
-
425
- #gr.Dropdown(["Hindi","Hindi-trans","Odiya","Odiya-trans","Kannada","Kannada-trans","Telugu","Telugu-trans","Bangala","Bangala-trans"],value="Hindi",label="Select Language"),
426
- gr.Dropdown(["Hindi","Hindi-trans","Odiya","Odiya-trans","Assamese-LM","Assamese-Model2"],value="Hindi",label="Select Language"),
427
- gr.Audio(sources=["microphone","upload"], type="filepath"),
428
- #gr.Audio(sources="upload", type="filepath"),
429
- #"state"
430
- ],
431
- outputs=[
432
- "textbox"
433
- # #"state"
434
- ],
435
- allow_flagging="auto",
436
- #flagging_options=["Language error", "English transliteration error", "Other"],
437
- #flagging_callback=hf_writer,
438
- title="Automatic Speech Recognition",
439
- description = "Demo for Automatic Speech Recognition. Use microphone to record speech. Please press Record button. Initially it will take some time to load the model. The recognized text will appear in the output textbox",
440
- ).launch()
 
1
+ import os
2
+ import re
3
  import warnings
4
  import gradio as gr
5
+ from transformers import pipeline, AutoProcessor
 
6
  from pyctcdecode import build_ctcdecoder
7
  from transformers import Wav2Vec2ProcessorWithLM
8
+ from indictrans import Transliterator
9
 
10
+ # Initialize ASR pipelines
11
+ asr_models = {
12
+ "Odiya": pipeline(task="automatic-speech-recognition", model="cdactvm/w2v-bert-2.0-odia_v1"),
13
+ "Odiya-trans": pipeline(task="automatic-speech-recognition", model="cdactvm/w2v-bert-2.0-odia_v1"),
14
+ "Hindi": pipeline(task="automatic-speech-recognition", model="cdactvm/w2v-bert-2.0-hindi_v1"),
15
+ "Hindi-trans": pipeline(task="automatic-speech-recognition", model="cdactvm/w2v-bert-2.0-hindi_v1"),
16
+ # Add other models similarly
17
+ # "Kannada": pipeline(...),
18
+ # "Telugu": pipeline(...),
19
+ # "Bangala": pipeline(...),
20
+ "Assamese-Model2": pipeline(task="automatic-speech-recognition", model="cdactvm/w2v-assames"),
21
+ }
22
+
23
+ # Initialize Assamese model with Language Model
24
  processor = AutoProcessor.from_pretrained("cdactvm/w2v-assames")
25
  vocab_dict = processor.tokenizer.get_vocab()
26
  sorted_vocab_dict = {k.lower(): v for k, v in sorted(vocab_dict.items(), key=lambda item: item[1])}
27
+ decoder = build_ctcdecoder(labels=list(sorted_vocab_dict.keys()), kenlm_model_path="lm.binary")
28
+ processor_with_lm = Wav2Vec2ProcessorWithLM(feature_extractor=processor.feature_extractor,
29
+ tokenizer=processor.tokenizer,
30
+ decoder=decoder)
31
+ asr_models["Assamese-LM"] = pipeline("automatic-speech-recognition", model="cdactvm/w2v-assames",
32
+ tokenizer=processor_with_lm,
33
+ feature_extractor=processor_with_lm.feature_extractor,
34
+ decoder=processor_with_lm.decoder)
35
+
36
+ # Initialize Transliterator
37
+ transliterators = {
38
+ "Odiya-trans": Transliterator(source='ori', target='eng', build_lookup=True),
39
+ "Hindi-trans": Transliterator(source='hin', target='eng', build_lookup=True),
40
+ # Add other transliterators similarly
41
+ }
42
+
43
+ # Function to clean HTML tags from text
 
 
 
 
 
 
 
 
 
 
 
44
  def cleanhtml(raw_html):
45
+ return re.sub(r'<.*?>', '', raw_html)
 
46
 
47
+ # Transcribe audio using the selected model
48
+ def transcribe(lng, speech, transliterate=False):
49
+ model = asr_models.get(lng)
50
+ if not model:
51
+ return f"Unsupported language: {lng}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
 
53
+ result = model(speech)
54
+ text = result.get("text")
 
55
  if text is None:
56
  return "Error: ASR returned None"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
 
58
+ if transliterate:
59
+ trn = transliterators.get(lng + "-trans")
60
+ if not trn:
61
+ return f"Transliterator not available for: {lng}"
 
 
 
 
 
 
 
62
 
63
+ sentence = trn.transform(text)
64
+ if sentence is None:
65
+ return "Error: Transliteration returned None"
66
+ return process_transcription(sentence)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
 
68
+ return cleanhtml(text)
 
 
69
 
70
+ # Function to process and correct transcriptions
71
+ def process_transcription(sentence):
72
+ replaced_words = replace_words(sentence)
73
+ processed_sentence = process_doubles(replaced_words)
74
+ return convert_to_text(processed_sentence)
75
 
76
+ # Replace incorrectly spelled words
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
  def replace_words(sentence):
78
  replacements = [
79
+ (r'\bjiro\b', 'zero'), (r'\bjero\b', 'zero'),
80
+ (r'\bnn\b', 'one'), (r'\bn\b', 'one'), (r'\bvan\b', 'one'), (r'\bna\b', 'one'), (r'\bek\b', 'one'),
81
+ (r'\btu\b', 'two'), (r'\btoo\b', 'two'), (r'\bdo\b', 'two'),
82
+ (r'\bthiri\b', 'three'), (r'\btiri\b', 'three'), (r'\bdubalathri\b', 'double three'), (r'\btin\b', 'three'),
83
+ (r'\bfor\b', 'four'), (r'\bfore\b', 'four'),
84
+ (r'\bfib\b', 'five'), (r'\bpaanch\b', 'five'),
85
+ (r'\bchha\b', 'six'), (r'\bchhah\b', 'six'), (r'\bchau\b', 'six'),
86
+ (r'\bdublseven\b', 'double seven'), (r'\bsath\b', 'seven'),
87
+ (r'\baath\b', 'eight'),
88
+ (r'\bnau\b', 'nine'),
89
+ (r'\bdas\b', 'ten'),
90
+ (r'\bnineeit\b', 'nine eight'),
91
  (r'\bfipeit\b', 'five eight'), (r'\bdubal\b', 'double'), (r'\bsevenatu\b', 'seven two'),
92
  ]
93
  for pattern, replacement in replacements:
94
  sentence = re.sub(pattern, replacement, sentence)
95
  return sentence
96
 
97
+ # Process "double" followed by a number
98
  def process_doubles(sentence):
99
  tokens = sentence.split()
100
  result = []
101
  i = 0
102
  while i < len(tokens):
103
+ if tokens[i] in ("double", "dubal") and i + 1 < len(tokens):
104
+ result.extend([tokens[i + 1]] * 2)
105
+ i += 2
 
 
 
 
 
106
  else:
107
  result.append(tokens[i])
108
  i += 1
109
  return ' '.join(result)
110
 
111
+ # Convert Soundex code back to text
112
+ def convert_to_text(input_sentence):
113
+ word_to_code_map = {}
114
+ transcript = sentence_to_transcript(input_sentence, word_to_code_map)
115
+ if transcript is None:
116
+ return "Error: Transcript conversion returned None"
117
+
118
+ numbers = text2int(transcript)
119
+ if numbers is None:
120
+ return "Error: Text to number conversion returned None"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
 
122
+ code_to_word_map = {v: k for k, v in word_to_code_map.items()}
123
+ return transcript_to_sentence(numbers, code_to_word_map)
124
+
125
+ # Convert text to numerical representation
126
  def text2int(textnum, numwords={}):
127
+ units = ['Z600', 'O500', 'T000', 'T600', 'F600', 'F100', 'S220', 'S150', 'E300', 'N500',
128
+ 'T500', 'E415', 'T410', 'T635', 'F635', 'F135', 'S235', 'S153', 'E235', 'N535']
129
  tens = ['', '', 'T537', 'T637', 'F637', 'F137', 'S230', 'S153', 'E230', 'N530']
130
  scales = ['H536', 'T253', 'M450', 'C600']
131
  ordinal_words = {'oh': 'Z600', 'first': 'O500', 'second': 'T000', 'third': 'T600', 'fourth': 'F600', 'fifth': 'F100',
132
+ 'sixth': 'S200', 'seventh': 'S150', 'eighth': 'E230', 'ninth': 'N500', 'twelfth': 'T410'}
133
  ordinal_endings = [('ieth', 'y'), ('th', '')]
134
  if not numwords:
135
  numwords['and'] = (1, 0)
 
166
  if scale > 100:
167
  result += current
168
  current = 0
169
+ lastunit = True
 
170
  lastscale = False
171
+ onnumber = True
172
  else:
173
  for ending, replacement in ordinal_endings:
174
  if word.endswith(ending):
175
  word = "%s%s" % (word[:-len(ending)], replacement)
176
 
177
+ if not is_numword(word) or (word == 'and' and not lastscale):
178
  if onnumber:
179
  curstring += repr(result + current) + " "
180
+ curstring += word
181
+ if word[-1] != '-':
182
+ curstring += " "
183
  result = current = 0
184
  onnumber = False
185
  lastunit = False
 
187
  else:
188
  scale, increment = from_numword(word)
189
  onnumber = True
190
+ if lastunit and (word in units or word in ordinal_words):
 
191
  curstring += repr(result + current)
192
  result = current = 0
 
193
  if scale > 1:
194
  current = max(1, current)
 
195
  current = current * scale + increment
196
  if scale > 100:
197
  result += current
198
  current = 0
199
+ lastunit = word in units
200
+ lastscale = word in scales
 
 
 
 
 
201
 
202
  if onnumber:
203
  curstring += repr(result + current)
204
 
205
  return curstring
206
 
207
+ # Check if a word is a number
208
+ def is_number(s):
209
+ try:
210
+ float(s.replace(',', ''))
211
+ return True
212
+ except ValueError:
213
+ return False
214
+
215
  # Convert sentence to transcript using Soundex
216
  def sentence_to_transcript(sentence, word_to_code_map):
217
+ with warnings.catch_warnings():
218
+ warnings.simplefilter("ignore")
219
+ from metaphone import doublemetaphone
 
 
 
 
 
 
 
 
 
 
 
 
220
 
221
+ transcript = []
222
+ for word in sentence.split():
223
+ codes = doublemetaphone(word)
224
+ word_code = next((code for code in codes if code), None)
 
225
 
226
+ if not word_code:
227
+ continue
228
+
229
+ if word_code not in word_to_code_map:
230
+ word_to_code_map[word] = word_code
231
+ transcript.append(word_code)
232
+
233
+ return ' '.join(transcript)
234
+
235
+ # Convert transcript back to sentence using Soundex
236
+ def transcript_to_sentence(transcript, code_to_word_map):
237
+ sentence = []
238
+ for code in transcript.split():
239
+ word = code_to_word_map.get(code, '')
240
+ if word:
241
+ sentence.append(word)
242
+ return ' '.join(sentence)
243
+
244
+ # Set theme colors for Gradio interface
245
+ theme_colors = {
246
+ "bg_color": "#0E1117",
247
+ "bg_secondary_color": "#161A25",
248
+ "input_color": "#161A25",
249
+ "input_text_color": "#C0C0BF",
250
+ "button_color": "#4A6AF2",
251
+ "button_primary_text_color": "#FFFFFF",
252
+ "button_secondary_color": "#A0A0A0",
253
+ "button_secondary_text_color": "#000000"
254
+ }
255
+
256
+ # Apply theme to Gradio blocks
257
+ def apply_theme(demo):
258
+ demo.set_theme({
259
+ "background_color": theme_colors["bg_color"],
260
+ "secondary_background_color": theme_colors["bg_secondary_color"],
261
+ "input_background_color": theme_colors["input_color"],
262
+ "input_text_color": theme_colors["input_text_color"],
263
+ "button_primary_background_color": theme_colors["button_color"],
264
+ "button_primary_text_color": theme_colors["button_primary_text_color"],
265
+ "button_secondary_background_color": theme_colors["button_secondary_color"],
266
+ "button_secondary_text_color": theme_colors["button_secondary_text_color"]
267
+ })
268
+
269
+ # Create Gradio interface
270
+ with gr.Blocks() as demo:
271
+ apply_theme(demo)
272
+
273
+ gr.Markdown("<h1><center>Test</center></h1>")
274
+ with gr.Row():
275
+ language = gr.Dropdown(list(asr_models.keys()), label="Language", value="Hindi")
276
+ speech_input = gr.Audio(source="microphone", type="filepath", label="Speech")
277
+
278
+ text_output = gr.Textbox(label="Output")
279
+ submit_btn = gr.Button("Submit")
280
+
281
+ def process_audio(lang, speech):
282
+ transliterate = lang.endswith("-trans")
283
+ return transcribe(lang, speech, transliterate)
284
+
285
+ submit_btn.click(process_audio, inputs=[language, speech_input], outputs=text_output)
286
+
287
+ # Launch the Gradio app on a different port
288
+ demo.launch(server_port=7861)