Spaces:

robinhad
/

ukrainian-tts

Running

App Files Files Community

Yurii Paniv commited on Jul 22, 2022

Commit

9caae98

•

1 Parent(s): df56c7b

Add ukrainian-accentor support

Browse files

Files changed (6) hide show

app.py +6 -4
formatter.py +4 -4
requirements.txt +2 -1
stress.py +14 -9
stress_with_model.py +33 -0
ukrainian-accentor +1 -0

app.py CHANGED Viewed

@@ -12,7 +12,8 @@ import torch
 class StressOption(Enum):
-    AutomaticStress = "Автоматичні наголоси"
 class VoiceOption(Enum):
@@ -66,9 +67,9 @@ def tts(text: str, voice: str, stress: str):
     print("Voice", voice)
     print("Stress:", stress)
     print("Time:", datetime.utcnow())
-    autostress = True if stress == StressOption.AutomaticStress.value else False
     speaker_name = "male1" if voice == VoiceOption.MaleVoice.value else "female3"
-    text = preprocess_text(text, autostress)
     text_limit = 1200
     text = (
         text if len(text) < text_limit else text[0:text_limit]
@@ -110,7 +111,8 @@ iface = gr.Interface(
     + "Github: [https://github.com/robinhad/ukrainian-tts](https://github.com/robinhad/ukrainian-tts)   \n"
     + "Model training - [Yurii Paniv @robinhad](https://github.com/robinhad)   \n"
     + "Mykyta and Olena dataset - [Yehor Smoliakov @egorsmkv](https://github.com/egorsmkv)   \n"
-    + "Autostress using [ukrainian-word-stress](https://github.com/lang-uk/ukrainian-word-stress) - [Oleksiy Syvokon @asivokon](https://github.com/asivokon)    \n"
     + f'<center><img src="{badge}" alt="visitors badge"/></center>',
     examples=[
         [

 class StressOption(Enum):
+    AutomaticStress = "Автоматичні наголоси (за словником)"
+    AutomaticStressWithModel = "Автоматичні наголоси (за допомогою моделі)"
 class VoiceOption(Enum):
     print("Voice", voice)
     print("Stress:", stress)
     print("Time:", datetime.utcnow())
+    autostress_with_model = True if stress == StressOption.AutomaticStressWithModel.value else False
     speaker_name = "male1" if voice == VoiceOption.MaleVoice.value else "female3"
+    text = preprocess_text(text, autostress_with_model)
     text_limit = 1200
     text = (
         text if len(text) < text_limit else text[0:text_limit]
     + "Github: [https://github.com/robinhad/ukrainian-tts](https://github.com/robinhad/ukrainian-tts)   \n"
     + "Model training - [Yurii Paniv @robinhad](https://github.com/robinhad)   \n"
     + "Mykyta and Olena dataset - [Yehor Smoliakov @egorsmkv](https://github.com/egorsmkv)   \n"
+    + "Autostress (with dictionary) using [ukrainian-word-stress](https://github.com/lang-uk/ukrainian-word-stress) - [Oleksiy Syvokon @asivokon](https://github.com/asivokon)    \n"
+    + "Autostress (with model) using [ukrainian-accentor](https://github.com/egorsmkv/ukrainian-accentor) - [Bohdan Mykhailenko @NeonBohdan](https://github.com/NeonBohdan) + [Yehor Smoliakov @egorsmkv](https://github.com/egorsmkv)    \n"
     + f'<center><img src="{badge}" alt="visitors badge"/></center>',
     examples=[
         [

formatter.py CHANGED Viewed

@@ -1,9 +1,10 @@
 import num2words
 import re
-from stress import sentence_to_stress
-def preprocess_text(text, autostress=False):
     # currencies
     text = text.replace("$", "долар")
     text = text.replace("₴", "гривня")
@@ -77,8 +78,7 @@ def preprocess_text(text, autostress=False):
         text = text.replace(english_char.upper(), english[english_char].upper())
         text = text.replace(english_char, english[english_char])
-    if autostress:
-        text = sentence_to_stress(text)
     return text

 import num2words
 import re
+from stress import sentence_to_stress, stress_dict
+from stress_with_model import stress_with_model
+def preprocess_text(text, use_autostress_model=False):
     # currencies
     text = text.replace("$", "долар")
     text = text.replace("₴", "гривня")
         text = text.replace(english_char.upper(), english[english_char].upper())
         text = text.replace(english_char, english[english_char])
+    text = sentence_to_stress(text, stress_with_model if use_autostress_model else stress_dict)
     return text

requirements.txt CHANGED Viewed

@@ -1,2 +1,3 @@
 TTS==0.7.1
-ukrainian-word-stress==1.0.0

 TTS==0.7.1
+ukrainian-word-stress==1.0.0
+-r ukrainian-accentor/requirements.txt

stress.py CHANGED Viewed

@@ -4,15 +4,7 @@ from ukrainian_word_stress import Stressifier, StressSymbol
 stressify = Stressifier(stress_symbol=StressSymbol.CombiningAcuteAccent)
-def sentence_to_stress(sentence: str) -> str:
-    # save custom stress positions
-    all_stresses = []
-    orig_words = sentence.split(" ")
-    for i in range(0, len(orig_words)):
-        if "+" in orig_words[i]:
-            all_stresses.append(i)
-    # add stress before vowel
     stressed = stressify(sentence.replace("+", "")).replace(StressSymbol.CombiningAcuteAccent, "+")
     new_stressed = ""
     start = 0
@@ -29,6 +21,19 @@ def sentence_to_stress(sentence: str) -> str:
         else:
             new_stressed += stressed[last:]
             break
     # replace already stressed words
     if len(all_stresses) > 0:

 stressify = Stressifier(stress_symbol=StressSymbol.CombiningAcuteAccent)
+def stress_dict(sentence: str):
     stressed = stressify(sentence.replace("+", "")).replace(StressSymbol.CombiningAcuteAccent, "+")
     new_stressed = ""
     start = 0
         else:
             new_stressed += stressed[last:]
             break
+    return new_stressed
+def sentence_to_stress(sentence: str, stress_function=stress_dict) -> str:
+    # save custom stress positions
+    all_stresses = []
+    orig_words = sentence.split(" ")
+    for i in range(0, len(orig_words)):
+        if "+" in orig_words[i]:
+            all_stresses.append(i)
+    # add stress before vowel
+    new_stressed = stress_function(sentence)
     # replace already stressed words
     if len(all_stresses) > 0:

stress_with_model.py ADDED Viewed

	@@ -0,0 +1,33 @@

+import torch
+# import
+importer = torch.package.PackageImporter("ukrainian-accentor/accentor-lite.pt")
+accentor = importer.load_pickle("uk-accentor", "model")
+# run
+def stress_with_model(text: str):
+    text = text.lower()
+    try:
+        result = accentor.process(text, mode='plus')
+    except ValueError: # TODO: apply fix for cases when there are no vowels
+        return text
+    return result
+if __name__ == "__main__":
+    sentence = "Кам'янець-Подільський - місто в Хмельницькій області України, центр Кам'янець-Подільської міської об'єднаної територіальної громади і Кам'янець-Подільського району."
+    print(stress_with_model(sentence))
+    sentence = "Привіт, як тебе звати?"
+    print(stress_with_model(sentence))
+    sentence = "АННА - український панк-рок гурт"
+    print(stress_with_model(sentence))
+    sentence = "Не тільки в Україні таке може бути."
+    print(stress_with_model(sentence))
+    sentence = "Не тільки в +Укра+їні т+аке може бути."
+    print(stress_with_model(sentence))
+    sentence = "два + два"
+    print(stress_with_model(sentence))
+    sentence = "Н тльк в крн тк мж бт."
+    print(stress_with_model(sentence))
+    sentence = "Н тльк в крн тк мж бт."
+    print(stress_with_model(sentence))

ukrainian-accentor ADDED Viewed

	@@ -0,0 +1 @@


1	+ Subproject commit 44f178282efd7eb3770fd082cab2b795351efe76