Spaces:
Running
Running
from typing import List | |
from ukrainian_word_stress import Stressifier, StressSymbol | |
import ukrainian_accentor as accentor | |
stressify = Stressifier(stress_symbol=StressSymbol.CombiningAcuteAccent) | |
vowels = "аеєиіїоуюя" | |
consonants = "бвгґджзйклмнпрстфхцчшщь" | |
special = "'-" | |
alphabet = vowels + consonants + special + "+" | |
def _shift_stress(stressed): | |
new_stressed = "" | |
start = 0 | |
last = 0 | |
# shift stress symbol by one "при+віт" -> "пр+ивіт" | |
while True: | |
plus_position = stressed.find("+", start) | |
if plus_position != -1: | |
new_stressed += ( | |
stressed[last : plus_position - 1] + "+" + stressed[plus_position - 1] | |
) | |
start = plus_position + 1 | |
last = start | |
else: | |
new_stressed += stressed[last:] | |
break | |
return new_stressed | |
def stress_with_model(text: str): | |
text = text.lower() | |
result = accentor.process(text, mode="plus") | |
return result | |
def stress_dict(sentence: str): | |
stressed = stressify(sentence.replace("+", "")).replace( | |
StressSymbol.CombiningAcuteAccent, "+" | |
) | |
return _shift_stress(stressed) | |
def sentence_to_stress(sentence: str, stress_function=stress_dict) -> str: | |
# save custom stress positions | |
all_stresses = [] | |
orig_words = sentence.split(" ") | |
for i in range(0, len(orig_words)): | |
if "+" in orig_words[i]: | |
all_stresses.append(i) | |
# add stress before vowel | |
new_stressed = stress_function(sentence) | |
# stress single vowel words | |
new_list = [] | |
# if letter is not in alphabet, then consider it an end of the word | |
previous = 0 | |
for i, letter in enumerate(new_stressed): | |
if letter.lower() not in alphabet: | |
if previous == i: | |
new_list.append(new_stressed[i]) | |
else: | |
new_list.append(new_stressed[previous:i]) | |
new_list.append(new_stressed[i]) | |
previous = i + 1 | |
# add remainder | |
if previous != len(new_stressed): | |
new_list.append(new_stressed[previous:]) | |
# add stress to single-vowel words | |
for word_index in range(0, len(new_list)): | |
element: str = new_list[word_index] | |
vowels_in_words = list(map(lambda letter: letter in vowels, element.lower())) | |
if "+" in element: | |
if element.count("+") > 1: | |
first = element.find("+") | |
new_list[word_index] = new_list[word_index][: first + 1] + new_list[ | |
word_index | |
][first + 1 :].replace("+", "") | |
continue | |
if vowels_in_words.count(True) == 0: | |
continue | |
elif vowels_in_words.count(True) == 1: | |
vowel_index = vowels_in_words.index(True) | |
new_list[word_index] = element[0:vowel_index] + "+" + element[vowel_index::] | |
elif vowels_in_words.count(True) > 1: | |
new_list[word_index] = stress_with_model(element) | |
new_stressed = "".join(new_list) | |
# replace already stressed words | |
if len(all_stresses) > 0: | |
words = new_stressed.split(" ") | |
for stressed in all_stresses: | |
words[stressed] = orig_words[stressed] | |
return " ".join(words) | |
return new_stressed | |