Yurii Paniv commited on
Commit
8d24ba9
1 Parent(s): ec8e88d

Add converter to latin

Browse files
Files changed (3) hide show
  1. .vscode/settings.json +3 -0
  2. converter.py +44 -1
  3. tests/test_converter.py +5 -3
.vscode/settings.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "python.formatting.provider": "black"
3
+ }
converter.py CHANGED
@@ -3,4 +3,47 @@ def to_cyrillic(text):
3
 
4
 
5
  def to_latin(text):
6
- pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
 
5
  def to_latin(text):
6
+ text = text.lower()
7
+ cyrillic_mapping = {
8
+ "а": "a",
9
+ "б": "b",
10
+ "в": "v",
11
+ "г": "g",
12
+ "гъ": "ğ",
13
+ "д": "d",
14
+ "е": "e",
15
+ "ё": "ö",
16
+ "ж": "",
17
+ "з": "z",
18
+ "и": "i",
19
+ "й": "y",
20
+ "к": "k",
21
+ "къ": "q",
22
+ "л": "l",
23
+ "м": "m",
24
+ "н": "n",
25
+ "нъ": "ñ",
26
+ "о": "o",
27
+ "п": "p",
28
+ "р": "r",
29
+ "с": "s",
30
+ "т": "t",
31
+ "у": "u",
32
+ "ф": "f",
33
+ "х": "h",
34
+ "ц": "",
35
+ "ч": "ç",
36
+ "дж": "c",
37
+ "ш": "ş",
38
+ "щ": "",
39
+ "ъ": "",
40
+ "ы": "ı",
41
+ "ь": "",
42
+ "э": "e",
43
+ "ю": "yu",
44
+ "я": "ya",
45
+ }
46
+
47
+ for key in sorted(cyrillic_mapping.keys(), key=lambda x: len(x), reverse=True):
48
+ text = text.replace(key, cyrillic_mapping[key])
49
+ return text
tests/test_converter.py CHANGED
@@ -11,7 +11,7 @@ def test_latin_converter():
11
  cases = _read_test_cases()
12
  print(cases)
13
  for case in cases:
14
- assert converter.to_latin(case[1]) == case[0]
15
 
16
 
17
  def test_letter_coverage():
@@ -94,13 +94,15 @@ def test_letter_coverage():
94
  cases = _read_test_cases()
95
  missing_letters = []
96
  latin_cases = " ".join([case[0] for case in cases]).lower()
97
- for letter in latin_alphabet:
98
  if letter not in latin_cases:
99
  missing_letters.append(letter)
 
100
  cyrillic_cases = " ".join([case[1] for case in cases]).lower()
101
- for letter in cyrillic_alphabet:
102
  if letter not in cyrillic_cases:
103
  missing_letters.append(letter)
 
104
  if len(missing_letters) > 0:
105
  raise Exception(f"'{missing_letters}' not found in test dataset!")
106
 
 
11
  cases = _read_test_cases()
12
  print(cases)
13
  for case in cases:
14
+ assert converter.to_latin(case[1]).lower() == case[0].lower()
15
 
16
 
17
  def test_letter_coverage():
 
94
  cases = _read_test_cases()
95
  missing_letters = []
96
  latin_cases = " ".join([case[0] for case in cases]).lower()
97
+ for letter in sorted(latin_alphabet, key=lambda x: len(x), reverse=True):
98
  if letter not in latin_cases:
99
  missing_letters.append(letter)
100
+ latin_cases = latin_cases.replace(letter, "")
101
  cyrillic_cases = " ".join([case[1] for case in cases]).lower()
102
+ for letter in sorted(cyrillic_alphabet, key=lambda x: len(x), reverse=True):
103
  if letter not in cyrillic_cases:
104
  missing_letters.append(letter)
105
+ cyrillic_cases = cyrillic_cases.replace(letter, "")
106
  if len(missing_letters) > 0:
107
  raise Exception(f"'{missing_letters}' not found in test dataset!")
108