Spaces:
Running
Running
import re | |
mappings = [ | |
["\u0069\u0307", "i"], # ["i̇", "i"], handle i with dot, first occurence is two separate symbols | |
] | |
def preprocess(text): | |
text = text.lower() # always treat lowercase | |
text = " " + text + " " | |
for mapping in mappings: | |
text = re.sub(mapping[0], mapping[1], text) | |
return text[1:-1] | |