Hindi_ASR / processDoubles.py
cdactvm's picture
Update processDoubles.py
c9e64c7 verified
raw
history blame
827 Bytes
import re
def process_doubles(sentence):
# Use regex to split 'डबल' followed by numbers/words without space (e.g., "डबलवन" -> "डबल वन")
sentence = re.sub(r'(डबल)(\S+)', r'\1 \2', sentence)
tokens = sentence.split()
result = []
i = 0
while i < len(tokens):
if tokens[i] == "डबल":
if i + 1 < len(tokens):
result.append(tokens[i + 1]) # Append the next word/number
result.append(tokens[i + 1]) # Append the next word/number again to duplicate
i += 2 # Skip over the next word since it's already processed
else:
result.append(tokens[i])
i += 1
else:
result.append(tokens[i])
i += 1
return ' '.join(result)