Spaces:

cdactvm
/

Hindi_ASR

Running

cdactvm commited on Sep 27

Commit

c9e64c7

•

1 Parent(s): af7ecee

Update processDoubles.py

Files changed (1) hide show

processDoubles.py CHANGED Viewed

@@ -1,25 +1,24 @@
-#!/usr/bin/env python
-# coding: utf-8
-# In[ ]:
-# Function to process "double" followed by a number
-def process_doubles(sentence):
-    tokens = sentence.split()
-    result = []
-    i = 0
-    while i < len(tokens):
-        if tokens[i] == "डबल":
-            if i + 1 < len(tokens):
-                result.append(tokens[i + 1])
-                result.append(tokens[i + 1])
-                i += 2
-            else:
-                result.append(tokens[i])
-                i += 1
-        else:
-            result.append(tokens[i])
-            i += 1
-    return ' '.join(result)

+import re
+def process_doubles(sentence):
+    # Use regex to split 'डबल' followed by numbers/words without space (e.g., "डबलवन" -> "डबल वन")
+    sentence = re.sub(r'(डबल)(\S+)', r'\1 \2', sentence)
+    tokens = sentence.split()
+    result = []
+    i = 0
+    while i < len(tokens):
+        if tokens[i] == "डबल":
+            if i + 1 < len(tokens):
+                result.append(tokens[i + 1])  # Append the next word/number
+                result.append(tokens[i + 1])  # Append the next word/number again to duplicate
+                i += 2  # Skip over the next word since it's already processed
+            else:
+                result.append(tokens[i])
+                i += 1
+        else:
+            result.append(tokens[i])
+            i += 1
+    return ' '.join(result)