Hindi_ASR / processDoubles.py
cdactvm's picture
Upload 12 files
bfde6e2 verified
raw
history blame
1.51 kB
#!/usr/bin/env python
# coding: utf-8
# In[2]:
# # Function to process "double" followed by a number
# def process_doubles(sentence):
# tokens = sentence.split()
# result = []
# i = 0
# while i < len(tokens):
# if tokens[i] == "डबल":
# if i + 1 < len(tokens):
# result.append(tokens[i + 1])
# result.append(tokens[i + 1])
# i += 2
# else:
# result.append(tokens[i])
# i += 1
# else:
# result.append(tokens[i])
# i += 1
# return ' '.join(result)
# In[ ]:
import re
def process_doubles(sentence):
# Use regex to split 'डबल' followed by numbers/words without space (e.g., "डबलवन" -> "डबल वन")
sentence = re.sub(r'(डबल)(\S+)', r'\1 \2', sentence)
tokens = sentence.split()
result = []
i = 0
while i < len(tokens):
if tokens[i] == "डबल":
if i + 1 < len(tokens):
result.append(tokens[i + 1]) # Append the next word/number
result.append(tokens[i + 1]) # Append the next word/number again to duplicate
i += 2 # Skip over the next word since it's already processed
else:
result.append(tokens[i])
i += 1
else:
result.append(tokens[i])
i += 1
return ' '.join(result)