Spaces:
Running
Running
Yurii Paniv
commited on
Commit
•
8d24ba9
1
Parent(s):
ec8e88d
Add converter to latin
Browse files- .vscode/settings.json +3 -0
- converter.py +44 -1
- tests/test_converter.py +5 -3
.vscode/settings.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"python.formatting.provider": "black"
|
3 |
+
}
|
converter.py
CHANGED
@@ -3,4 +3,47 @@ def to_cyrillic(text):
|
|
3 |
|
4 |
|
5 |
def to_latin(text):
|
6 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
|
4 |
|
5 |
def to_latin(text):
|
6 |
+
text = text.lower()
|
7 |
+
cyrillic_mapping = {
|
8 |
+
"а": "a",
|
9 |
+
"б": "b",
|
10 |
+
"в": "v",
|
11 |
+
"г": "g",
|
12 |
+
"гъ": "ğ",
|
13 |
+
"д": "d",
|
14 |
+
"е": "e",
|
15 |
+
"ё": "ö",
|
16 |
+
"ж": "",
|
17 |
+
"з": "z",
|
18 |
+
"и": "i",
|
19 |
+
"й": "y",
|
20 |
+
"к": "k",
|
21 |
+
"къ": "q",
|
22 |
+
"л": "l",
|
23 |
+
"м": "m",
|
24 |
+
"н": "n",
|
25 |
+
"нъ": "ñ",
|
26 |
+
"о": "o",
|
27 |
+
"п": "p",
|
28 |
+
"р": "r",
|
29 |
+
"с": "s",
|
30 |
+
"т": "t",
|
31 |
+
"у": "u",
|
32 |
+
"ф": "f",
|
33 |
+
"х": "h",
|
34 |
+
"ц": "",
|
35 |
+
"ч": "ç",
|
36 |
+
"дж": "c",
|
37 |
+
"ш": "ş",
|
38 |
+
"щ": "",
|
39 |
+
"ъ": "",
|
40 |
+
"ы": "ı",
|
41 |
+
"ь": "",
|
42 |
+
"э": "e",
|
43 |
+
"ю": "yu",
|
44 |
+
"я": "ya",
|
45 |
+
}
|
46 |
+
|
47 |
+
for key in sorted(cyrillic_mapping.keys(), key=lambda x: len(x), reverse=True):
|
48 |
+
text = text.replace(key, cyrillic_mapping[key])
|
49 |
+
return text
|
tests/test_converter.py
CHANGED
@@ -11,7 +11,7 @@ def test_latin_converter():
|
|
11 |
cases = _read_test_cases()
|
12 |
print(cases)
|
13 |
for case in cases:
|
14 |
-
assert converter.to_latin(case[1]) == case[0]
|
15 |
|
16 |
|
17 |
def test_letter_coverage():
|
@@ -94,13 +94,15 @@ def test_letter_coverage():
|
|
94 |
cases = _read_test_cases()
|
95 |
missing_letters = []
|
96 |
latin_cases = " ".join([case[0] for case in cases]).lower()
|
97 |
-
for letter in latin_alphabet:
|
98 |
if letter not in latin_cases:
|
99 |
missing_letters.append(letter)
|
|
|
100 |
cyrillic_cases = " ".join([case[1] for case in cases]).lower()
|
101 |
-
for letter in cyrillic_alphabet:
|
102 |
if letter not in cyrillic_cases:
|
103 |
missing_letters.append(letter)
|
|
|
104 |
if len(missing_letters) > 0:
|
105 |
raise Exception(f"'{missing_letters}' not found in test dataset!")
|
106 |
|
|
|
11 |
cases = _read_test_cases()
|
12 |
print(cases)
|
13 |
for case in cases:
|
14 |
+
assert converter.to_latin(case[1]).lower() == case[0].lower()
|
15 |
|
16 |
|
17 |
def test_letter_coverage():
|
|
|
94 |
cases = _read_test_cases()
|
95 |
missing_letters = []
|
96 |
latin_cases = " ".join([case[0] for case in cases]).lower()
|
97 |
+
for letter in sorted(latin_alphabet, key=lambda x: len(x), reverse=True):
|
98 |
if letter not in latin_cases:
|
99 |
missing_letters.append(letter)
|
100 |
+
latin_cases = latin_cases.replace(letter, "")
|
101 |
cyrillic_cases = " ".join([case[1] for case in cases]).lower()
|
102 |
+
for letter in sorted(cyrillic_alphabet, key=lambda x: len(x), reverse=True):
|
103 |
if letter not in cyrillic_cases:
|
104 |
missing_letters.append(letter)
|
105 |
+
cyrillic_cases = cyrillic_cases.replace(letter, "")
|
106 |
if len(missing_letters) > 0:
|
107 |
raise Exception(f"'{missing_letters}' not found in test dataset!")
|
108 |
|