File size: 3,292 Bytes
de21232
1a75086
c49c056
f9e5028
1a75086
f9e5028
de21232
 
956ea88
1db5fd0
de21232
f9e5028
c49c056
1a75086
 
 
a4249a1
de21232
1a75086
 
 
 
 
 
 
 
f9e5028
1a75086
 
9caae98
 
 
c49c056
 
 
 
 
 
 
 
 
 
 
 
 
9caae98
 
 
 
 
 
 
 
 
 
de21232
 
1db5fd0
 
 
 
 
 
 
 
 
 
 
 
 
 
2ffc7e7
1db5fd0
de21232
0e41b64
de21232
 
0e41b64
 
03e1349
 
 
de21232
 
 
 
 
 
0e41b64
 
03e1349
1db5fd0
de21232
a4249a1
 
 
 
 
 
2ffc7e7
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
from typing import List
from ukrainian_word_stress import Stressifier, StressSymbol
import ukrainian_accentor as accentor

stressify = Stressifier(stress_symbol=StressSymbol.CombiningAcuteAccent)

vowels = "аеєиіїоуюя"
consonants = "бвгґджзйклмнпрстфхцчшщь"
special = "'-"
alphabet = vowels + consonants + special + "+"


def _shift_stress(stressed):
    new_stressed = ""
    start = 0
    last = 0

    # shift stress symbol by one "при+віт" -> "пр+ивіт"
    while True:
        plus_position = stressed.find("+", start)
        if plus_position != -1:
            new_stressed += (
                stressed[last : plus_position - 1] + "+" + stressed[plus_position - 1]
            )
            start = plus_position + 1
            last = start
        else:
            new_stressed += stressed[last:]
            break
    return new_stressed


def stress_with_model(text: str):
    text = text.lower()
    result = accentor.process(text, mode="plus")
    return result


def stress_dict(sentence: str):
    stressed = stressify(sentence.replace("+", "")).replace(
        StressSymbol.CombiningAcuteAccent, "+"
    )
    return _shift_stress(stressed)


def sentence_to_stress(sentence: str, stress_function=stress_dict) -> str:
    # save custom stress positions
    all_stresses = []
    orig_words = sentence.split(" ")
    for i in range(0, len(orig_words)):
        if "+" in orig_words[i]:
            all_stresses.append(i)

    # add stress before vowel
    new_stressed = stress_function(sentence)

    # stress single vowel words
    new_list = []
    # if letter is not in alphabet, then consider it an end of the word
    previous = 0
    for i, letter in enumerate(new_stressed):
        if letter.lower() not in alphabet:
            if previous == i:
                new_list.append(new_stressed[i])
            else:
                new_list.append(new_stressed[previous:i])
                new_list.append(new_stressed[i])
            previous = i + 1
    # add remainder
    if previous != len(new_stressed):
        new_list.append(new_stressed[previous:])

    # add stress to single-vowel words
    for word_index in range(0, len(new_list)):
        element: str = new_list[word_index]
        vowels_in_words = list(map(lambda letter: letter in vowels, element.lower()))
        if "+" in element:
            if element.count("+") > 1:
                first = element.find("+")
                new_list[word_index] = new_list[word_index][: first + 1] + new_list[
                    word_index
                ][first + 1 :].replace("+", "")
            continue
        if vowels_in_words.count(True) == 0:
            continue
        elif vowels_in_words.count(True) == 1:
            vowel_index = vowels_in_words.index(True)
            new_list[word_index] = element[0:vowel_index] + "+" + element[vowel_index::]
        elif vowels_in_words.count(True) > 1:
            new_list[word_index] = stress_with_model(element)

    new_stressed = "".join(new_list)

    # replace already stressed words
    if len(all_stresses) > 0:
        words = new_stressed.split(" ")
        for stressed in all_stresses:
            words[stressed] = orig_words[stressed]
        return " ".join(words)
    return new_stressed