Yurii Paniv commited on
Commit
9caae98
1 Parent(s): df56c7b

Add ukrainian-accentor support

Browse files
Files changed (6) hide show
  1. app.py +6 -4
  2. formatter.py +4 -4
  3. requirements.txt +2 -1
  4. stress.py +14 -9
  5. stress_with_model.py +33 -0
  6. ukrainian-accentor +1 -0
app.py CHANGED
@@ -12,7 +12,8 @@ import torch
12
 
13
 
14
  class StressOption(Enum):
15
- AutomaticStress = "Автоматичні наголоси"
 
16
 
17
 
18
  class VoiceOption(Enum):
@@ -66,9 +67,9 @@ def tts(text: str, voice: str, stress: str):
66
  print("Voice", voice)
67
  print("Stress:", stress)
68
  print("Time:", datetime.utcnow())
69
- autostress = True if stress == StressOption.AutomaticStress.value else False
70
  speaker_name = "male1" if voice == VoiceOption.MaleVoice.value else "female3"
71
- text = preprocess_text(text, autostress)
72
  text_limit = 1200
73
  text = (
74
  text if len(text) < text_limit else text[0:text_limit]
@@ -110,7 +111,8 @@ iface = gr.Interface(
110
  + "Github: [https://github.com/robinhad/ukrainian-tts](https://github.com/robinhad/ukrainian-tts) \n"
111
  + "Model training - [Yurii Paniv @robinhad](https://github.com/robinhad) \n"
112
  + "Mykyta and Olena dataset - [Yehor Smoliakov @egorsmkv](https://github.com/egorsmkv) \n"
113
- + "Autostress using [ukrainian-word-stress](https://github.com/lang-uk/ukrainian-word-stress) - [Oleksiy Syvokon @asivokon](https://github.com/asivokon) \n"
 
114
  + f'<center><img src="{badge}" alt="visitors badge"/></center>',
115
  examples=[
116
  [
 
12
 
13
 
14
  class StressOption(Enum):
15
+ AutomaticStress = "Автоматичні наголоси (за словником)"
16
+ AutomaticStressWithModel = "Автоматичні наголоси (за допомогою моделі)"
17
 
18
 
19
  class VoiceOption(Enum):
 
67
  print("Voice", voice)
68
  print("Stress:", stress)
69
  print("Time:", datetime.utcnow())
70
+ autostress_with_model = True if stress == StressOption.AutomaticStressWithModel.value else False
71
  speaker_name = "male1" if voice == VoiceOption.MaleVoice.value else "female3"
72
+ text = preprocess_text(text, autostress_with_model)
73
  text_limit = 1200
74
  text = (
75
  text if len(text) < text_limit else text[0:text_limit]
 
111
  + "Github: [https://github.com/robinhad/ukrainian-tts](https://github.com/robinhad/ukrainian-tts) \n"
112
  + "Model training - [Yurii Paniv @robinhad](https://github.com/robinhad) \n"
113
  + "Mykyta and Olena dataset - [Yehor Smoliakov @egorsmkv](https://github.com/egorsmkv) \n"
114
+ + "Autostress (with dictionary) using [ukrainian-word-stress](https://github.com/lang-uk/ukrainian-word-stress) - [Oleksiy Syvokon @asivokon](https://github.com/asivokon) \n"
115
+ + "Autostress (with model) using [ukrainian-accentor](https://github.com/egorsmkv/ukrainian-accentor) - [Bohdan Mykhailenko @NeonBohdan](https://github.com/NeonBohdan) + [Yehor Smoliakov @egorsmkv](https://github.com/egorsmkv) \n"
116
  + f'<center><img src="{badge}" alt="visitors badge"/></center>',
117
  examples=[
118
  [
formatter.py CHANGED
@@ -1,9 +1,10 @@
1
  import num2words
2
  import re
3
- from stress import sentence_to_stress
 
4
 
5
 
6
- def preprocess_text(text, autostress=False):
7
  # currencies
8
  text = text.replace("$", "долар")
9
  text = text.replace("₴", "гривня")
@@ -77,8 +78,7 @@ def preprocess_text(text, autostress=False):
77
  text = text.replace(english_char.upper(), english[english_char].upper())
78
  text = text.replace(english_char, english[english_char])
79
 
80
- if autostress:
81
- text = sentence_to_stress(text)
82
 
83
  return text
84
 
 
1
  import num2words
2
  import re
3
+ from stress import sentence_to_stress, stress_dict
4
+ from stress_with_model import stress_with_model
5
 
6
 
7
+ def preprocess_text(text, use_autostress_model=False):
8
  # currencies
9
  text = text.replace("$", "долар")
10
  text = text.replace("₴", "гривня")
 
78
  text = text.replace(english_char.upper(), english[english_char].upper())
79
  text = text.replace(english_char, english[english_char])
80
 
81
+ text = sentence_to_stress(text, stress_with_model if use_autostress_model else stress_dict)
 
82
 
83
  return text
84
 
requirements.txt CHANGED
@@ -1,2 +1,3 @@
1
  TTS==0.7.1
2
- ukrainian-word-stress==1.0.0
 
 
1
  TTS==0.7.1
2
+ ukrainian-word-stress==1.0.0
3
+ -r ukrainian-accentor/requirements.txt
stress.py CHANGED
@@ -4,15 +4,7 @@ from ukrainian_word_stress import Stressifier, StressSymbol
4
  stressify = Stressifier(stress_symbol=StressSymbol.CombiningAcuteAccent)
5
 
6
 
7
- def sentence_to_stress(sentence: str) -> str:
8
- # save custom stress positions
9
- all_stresses = []
10
- orig_words = sentence.split(" ")
11
- for i in range(0, len(orig_words)):
12
- if "+" in orig_words[i]:
13
- all_stresses.append(i)
14
-
15
- # add stress before vowel
16
  stressed = stressify(sentence.replace("+", "")).replace(StressSymbol.CombiningAcuteAccent, "+")
17
  new_stressed = ""
18
  start = 0
@@ -29,6 +21,19 @@ def sentence_to_stress(sentence: str) -> str:
29
  else:
30
  new_stressed += stressed[last:]
31
  break
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
  # replace already stressed words
34
  if len(all_stresses) > 0:
 
4
  stressify = Stressifier(stress_symbol=StressSymbol.CombiningAcuteAccent)
5
 
6
 
7
+ def stress_dict(sentence: str):
 
 
 
 
 
 
 
 
8
  stressed = stressify(sentence.replace("+", "")).replace(StressSymbol.CombiningAcuteAccent, "+")
9
  new_stressed = ""
10
  start = 0
 
21
  else:
22
  new_stressed += stressed[last:]
23
  break
24
+ return new_stressed
25
+
26
+
27
+ def sentence_to_stress(sentence: str, stress_function=stress_dict) -> str:
28
+ # save custom stress positions
29
+ all_stresses = []
30
+ orig_words = sentence.split(" ")
31
+ for i in range(0, len(orig_words)):
32
+ if "+" in orig_words[i]:
33
+ all_stresses.append(i)
34
+
35
+ # add stress before vowel
36
+ new_stressed = stress_function(sentence)
37
 
38
  # replace already stressed words
39
  if len(all_stresses) > 0:
stress_with_model.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+
3
+ # import
4
+ importer = torch.package.PackageImporter("ukrainian-accentor/accentor-lite.pt")
5
+ accentor = importer.load_pickle("uk-accentor", "model")
6
+
7
+ # run
8
+ def stress_with_model(text: str):
9
+ text = text.lower()
10
+ try:
11
+ result = accentor.process(text, mode='plus')
12
+ except ValueError: # TODO: apply fix for cases when there are no vowels
13
+ return text
14
+ return result
15
+
16
+
17
+ if __name__ == "__main__":
18
+ sentence = "Кам'янець-Подільський - місто в Хмельницькій області України, центр Кам'янець-Подільської міської об'єднаної територіальної громади і Кам'янець-Подільського району."
19
+ print(stress_with_model(sentence))
20
+ sentence = "Привіт, як тебе звати?"
21
+ print(stress_with_model(sentence))
22
+ sentence = "АННА - український панк-рок гурт"
23
+ print(stress_with_model(sentence))
24
+ sentence = "Не тільки в Україні таке може бути."
25
+ print(stress_with_model(sentence))
26
+ sentence = "Не тільки в +Укра+їні т+аке може бути."
27
+ print(stress_with_model(sentence))
28
+ sentence = "два + два"
29
+ print(stress_with_model(sentence))
30
+ sentence = "Н тльк в крн тк мж бт."
31
+ print(stress_with_model(sentence))
32
+ sentence = "Н тльк в крн тк мж бт."
33
+ print(stress_with_model(sentence))
ukrainian-accentor ADDED
@@ -0,0 +1 @@
 
 
1
+ Subproject commit 44f178282efd7eb3770fd082cab2b795351efe76