Spaces:

cdactvm
/

Hindi_ASR

Running

Hindi_ASR / processDoubles.py

Upload 12 files

bfde6e2 verified about 1 month ago

1.51 kB

	#!/usr/bin/env python
	# coding: utf-8

	# In[2]:


	# # Function to process "double" followed by a number
	# def process_doubles(sentence):
	# tokens = sentence.split()
	# result = []
	# i = 0
	# while i < len(tokens):
	# if tokens[i] == "डबल":
	# if i + 1 < len(tokens):
	# result.append(tokens[i + 1])
	# result.append(tokens[i + 1])
	# i += 2
	# else:
	# result.append(tokens[i])
	# i += 1
	# else:
	# result.append(tokens[i])
	# i += 1
	# return ' '.join(result)


	# In[ ]:


	import re

	def process_doubles(sentence):
	# Use regex to split 'डबल' followed by numbers/words without space (e.g., "डबलवन" -> "डबल वन")
	sentence = re.sub(r'(डबल)(\S+)', r'\1 \2', sentence)

	tokens = sentence.split()
	result = []
	i = 0

	while i < len(tokens):
	if tokens[i] == "डबल":
	if i + 1 < len(tokens):
	result.append(tokens[i + 1]) # Append the next word/number
	result.append(tokens[i + 1]) # Append the next word/number again to duplicate
	i += 2 # Skip over the next word since it's already processed
	else:
	result.append(tokens[i])
	i += 1
	else:
	result.append(tokens[i])
	i += 1

	return ' '.join(result)