File size: 671 Bytes
19c634e
 
 
 
 
 
 
 
 
 
 
 
 
5d459a9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19c634e
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
import re

mappings = [
    ["\u0069\u0307", "i"],  # ["i̇", "i"], handle i with dot, first occurence is two separate symbols
]

def preprocess(text):
    text = text.lower()  # always treat lowercase
    text = " " + text + " "

    for mapping in mappings:
        text = re.sub(mapping[0], mapping[1], text)

    numbers = {
        "0": "sıfır",
        "1": "bir",
        "2": "eki",
        "3": "üç",
        "4": "dört",
        "5": "beş",
        "6": "altı",
        "7": "yedi",
        "8": "sekiz",
        "9": "doquz",
    }

    for number in numbers.keys():
        text = text.replace(number, numbers[number] + " ")

    return text[1:-1]