Yurii Paniv
Handle numbers
5d459a9
raw
history blame
671 Bytes
import re
mappings = [
["\u0069\u0307", "i"], # ["i̇", "i"], handle i with dot, first occurence is two separate symbols
]
def preprocess(text):
text = text.lower() # always treat lowercase
text = " " + text + " "
for mapping in mappings:
text = re.sub(mapping[0], mapping[1], text)
numbers = {
"0": "sıfır",
"1": "bir",
"2": "eki",
"3": "üç",
"4": "dört",
"5": "beş",
"6": "altı",
"7": "yedi",
"8": "sekiz",
"9": "doquz",
}
for number in numbers.keys():
text = text.replace(number, numbers[number] + " ")
return text[1:-1]