cdactvm commited on
Commit
c9e64c7
1 Parent(s): af7ecee

Update processDoubles.py

Browse files
Files changed (1) hide show
  1. processDoubles.py +24 -25
processDoubles.py CHANGED
@@ -1,25 +1,24 @@
1
- #!/usr/bin/env python
2
- # coding: utf-8
3
-
4
- # In[ ]:
5
-
6
-
7
- # Function to process "double" followed by a number
8
- def process_doubles(sentence):
9
- tokens = sentence.split()
10
- result = []
11
- i = 0
12
- while i < len(tokens):
13
- if tokens[i] == "डबल":
14
- if i + 1 < len(tokens):
15
- result.append(tokens[i + 1])
16
- result.append(tokens[i + 1])
17
- i += 2
18
- else:
19
- result.append(tokens[i])
20
- i += 1
21
- else:
22
- result.append(tokens[i])
23
- i += 1
24
- return ' '.join(result)
25
-
 
1
+ import re
2
+
3
+ def process_doubles(sentence):
4
+ # Use regex to split 'डबल' followed by numbers/words without space (e.g., "डबलवन" -> "डबल वन")
5
+ sentence = re.sub(r'(डबल)(\S+)', r'\1 \2', sentence)
6
+
7
+ tokens = sentence.split()
8
+ result = []
9
+ i = 0
10
+
11
+ while i < len(tokens):
12
+ if tokens[i] == "डबल":
13
+ if i + 1 < len(tokens):
14
+ result.append(tokens[i + 1]) # Append the next word/number
15
+ result.append(tokens[i + 1]) # Append the next word/number again to duplicate
16
+ i += 2 # Skip over the next word since it's already processed
17
+ else:
18
+ result.append(tokens[i])
19
+ i += 1
20
+ else:
21
+ result.append(tokens[i])
22
+ i += 1
23
+
24
+ return ' '.join(result)