Spaces:

robinhad
/

ukrainian-tts

Running

File size: 3,292 Bytes

from typing import List
from ukrainian_word_stress import Stressifier, StressSymbol
import ukrainian_accentor as accentor

stressify = Stressifier(stress_symbol=StressSymbol.CombiningAcuteAccent)

vowels = "аеєиіїоуюя"
consonants = "бвгґджзйклмнпрстфхцчшщь"
special = "'-"
alphabet = vowels + consonants + special + "+"


def _shift_stress(stressed):
    new_stressed = ""
    start = 0
    last = 0

    # shift stress symbol by one "при+віт" -> "пр+ивіт"
    while True:
        plus_position = stressed.find("+", start)
        if plus_position != -1:
            new_stressed += (
                stressed[last : plus_position - 1] + "+" + stressed[plus_position - 1]
            )
            start = plus_position + 1
            last = start
        else:
            new_stressed += stressed[last:]
            break
    return new_stressed


def stress_with_model(text: str):
    text = text.lower()
    result = accentor.process(text, mode="plus")
    return result


def stress_dict(sentence: str):
    stressed = stressify(sentence.replace("+", "")).replace(
        StressSymbol.CombiningAcuteAccent, "+"
    )
    return _shift_stress(stressed)


def sentence_to_stress(sentence: str, stress_function=stress_dict) -> str:
    # save custom stress positions
    all_stresses = []
    orig_words = sentence.split(" ")
    for i in range(0, len(orig_words)):
        if "+" in orig_words[i]:
            all_stresses.append(i)

    # add stress before vowel
    new_stressed = stress_function(sentence)

    # stress single vowel words
    new_list = []
    # if letter is not in alphabet, then consider it an end of the word
    previous = 0
    for i, letter in enumerate(new_stressed):
        if letter.lower() not in alphabet:
            if previous == i:
                new_list.append(new_stressed[i])
            else:
                new_list.append(new_stressed[previous:i])
                new_list.append(new_stressed[i])
            previous = i + 1
    # add remainder
    if previous != len(new_stressed):
        new_list.append(new_stressed[previous:])

    # add stress to single-vowel words
    for word_index in range(0, len(new_list)):
        element: str = new_list[word_index]
        vowels_in_words = list(map(lambda letter: letter in vowels, element.lower()))
        if "+" in element:
            if element.count("+") > 1:
                first = element.find("+")
                new_list[word_index] = new_list[word_index][: first + 1] + new_list[
                    word_index
                ][first + 1 :].replace("+", "")
            continue
        if vowels_in_words.count(True) == 0:
            continue
        elif vowels_in_words.count(True) == 1:
            vowel_index = vowels_in_words.index(True)
            new_list[word_index] = element[0:vowel_index] + "+" + element[vowel_index::]
        elif vowels_in_words.count(True) > 1:
            new_list[word_index] = stress_with_model(element)

    new_stressed = "".join(new_list)

    # replace already stressed words
    if len(all_stresses) > 0:
        words = new_stressed.split(" ")
        for stressed in all_stresses:
            words[stressed] = orig_words[stressed]
        return " ".join(words)
    return new_stressed