update clean text code
Browse files
utils.py
CHANGED
@@ -27,7 +27,7 @@ def clean_text(x):
|
|
27 |
# x = re.sub(r"\w*\d+\w*", "", x) # numbers
|
28 |
x = re.sub(r"\s{2,}", " ", x) # over spaces
|
29 |
x = emoji_pattern.sub(r"", x) # emojis
|
30 |
-
|
31 |
|
32 |
return x
|
33 |
|
|
|
27 |
# x = re.sub(r"\w*\d+\w*", "", x) # numbers
|
28 |
x = re.sub(r"\s{2,}", " ", x) # over spaces
|
29 |
x = emoji_pattern.sub(r"", x) # emojis
|
30 |
+
x = re.sub("[^.,!?A-Za-z0-9]+", " ", x) # special charachters except .,!?
|
31 |
|
32 |
return x
|
33 |
|