Spaces:
Running
Running
File size: 339 Bytes
19c634e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 |
import re
mappings = [
["\u0069\u0307", "i"], # ["i̇", "i"], handle i with dot, first occurence is two separate symbols
]
def preprocess(text):
text = text.lower() # always treat lowercase
text = " " + text + " "
for mapping in mappings:
text = re.sub(mapping[0], mapping[1], text)
return text[1:-1]
|