Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -74,9 +74,11 @@ stop_words.update('ask','much','thank','etc.', 'e', 'We', 'In', 'ed','pa', 'This
|
|
74 |
|
75 |
def clean_text(text):
|
76 |
'''
|
77 |
-
|
78 |
'''
|
79 |
text = text.encode("ascii", errors="ignore").decode("ascii") # remove non-asciicharacters
|
|
|
|
|
80 |
text = re.sub(r"\n", " ", text)
|
81 |
text = re.sub(r"\n\n", " ", text)
|
82 |
text = re.sub(r"\t", " ", text)
|
@@ -84,7 +86,7 @@ def clean_text(text):
|
|
84 |
text = text.strip(" ")
|
85 |
text = re.sub(" +", " ", text).strip() # get rid of multiple spaces and replace with a single
|
86 |
|
87 |
-
text = [word for word in text.split() if word not in
|
88 |
text = ' '.join(text)
|
89 |
return text
|
90 |
|
|
|
74 |
|
75 |
def clean_text(text):
|
76 |
'''
|
77 |
+
The function which returns clean text
|
78 |
'''
|
79 |
text = text.encode("ascii", errors="ignore").decode("ascii") # remove non-asciicharacters
|
80 |
+
text=unidecode.unidecode(text)# diacritics remove
|
81 |
+
text=contractions.fix(text) # contraction fix
|
82 |
text = re.sub(r"\n", " ", text)
|
83 |
text = re.sub(r"\n\n", " ", text)
|
84 |
text = re.sub(r"\t", " ", text)
|
|
|
86 |
text = text.strip(" ")
|
87 |
text = re.sub(" +", " ", text).strip() # get rid of multiple spaces and replace with a single
|
88 |
|
89 |
+
text = [word for word in text.split() if word not in stop_words]
|
90 |
text = ' '.join(text)
|
91 |
return text
|
92 |
|