File size: 940 Bytes
19c634e
 
4468072
 
 
 
 
 
 
 
 
 
 
 
 
 
19c634e
 
 
 
 
4468072
 
19c634e
f5aefe9
 
 
 
5d459a9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19c634e
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
import re

mapping = {
    "n\u0303": "\xf1",
    "g\u0306": "\u011f",
    "i\u0307": "i",
    "u\u0308": "\xfc",
    "o\u0308": "\xf6",
    "\xe7": "\u04ab",
    "c\u0327": "\u04ab",
    "s\u0327": "\u015f",
    "a\u0302": "\xe2",
    "w": "v",
    "x": "ks"
}
 

def preprocess(text):
    text = text.lower()  # always treat lowercase
    text = " " + text + " "

    for symbol in mapping.keys():
        text = re.sub(symbol, mapping[symbol], text)

    separators = "?!" # TODO: add proper symbols to tts
    for symbol in separators:
        text = text.replace(symbol, ".")

    numbers = {
        "0": "sıfır",
        "1": "bir",
        "2": "eki",
        "3": "üç",
        "4": "dört",
        "5": "beş",
        "6": "altı",
        "7": "yedi",
        "8": "sekiz",
        "9": "doquz",
    }

    for number in numbers.keys():
        text = text.replace(number, numbers[number] + " ")

    return text[1:-1]