Spaces:
Running
Running
File size: 3,292 Bytes
de21232 1a75086 c49c056 f9e5028 1a75086 f9e5028 de21232 956ea88 1db5fd0 de21232 f9e5028 c49c056 1a75086 a4249a1 de21232 1a75086 f9e5028 1a75086 9caae98 c49c056 9caae98 de21232 1db5fd0 2ffc7e7 1db5fd0 de21232 0e41b64 de21232 0e41b64 03e1349 de21232 0e41b64 03e1349 1db5fd0 de21232 a4249a1 2ffc7e7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 |
from typing import List
from ukrainian_word_stress import Stressifier, StressSymbol
import ukrainian_accentor as accentor
stressify = Stressifier(stress_symbol=StressSymbol.CombiningAcuteAccent)
vowels = "аеєиіїоуюя"
consonants = "бвгґджзйклмнпрстфхцчшщь"
special = "'-"
alphabet = vowels + consonants + special + "+"
def _shift_stress(stressed):
new_stressed = ""
start = 0
last = 0
# shift stress symbol by one "при+віт" -> "пр+ивіт"
while True:
plus_position = stressed.find("+", start)
if plus_position != -1:
new_stressed += (
stressed[last : plus_position - 1] + "+" + stressed[plus_position - 1]
)
start = plus_position + 1
last = start
else:
new_stressed += stressed[last:]
break
return new_stressed
def stress_with_model(text: str):
text = text.lower()
result = accentor.process(text, mode="plus")
return result
def stress_dict(sentence: str):
stressed = stressify(sentence.replace("+", "")).replace(
StressSymbol.CombiningAcuteAccent, "+"
)
return _shift_stress(stressed)
def sentence_to_stress(sentence: str, stress_function=stress_dict) -> str:
# save custom stress positions
all_stresses = []
orig_words = sentence.split(" ")
for i in range(0, len(orig_words)):
if "+" in orig_words[i]:
all_stresses.append(i)
# add stress before vowel
new_stressed = stress_function(sentence)
# stress single vowel words
new_list = []
# if letter is not in alphabet, then consider it an end of the word
previous = 0
for i, letter in enumerate(new_stressed):
if letter.lower() not in alphabet:
if previous == i:
new_list.append(new_stressed[i])
else:
new_list.append(new_stressed[previous:i])
new_list.append(new_stressed[i])
previous = i + 1
# add remainder
if previous != len(new_stressed):
new_list.append(new_stressed[previous:])
# add stress to single-vowel words
for word_index in range(0, len(new_list)):
element: str = new_list[word_index]
vowels_in_words = list(map(lambda letter: letter in vowels, element.lower()))
if "+" in element:
if element.count("+") > 1:
first = element.find("+")
new_list[word_index] = new_list[word_index][: first + 1] + new_list[
word_index
][first + 1 :].replace("+", "")
continue
if vowels_in_words.count(True) == 0:
continue
elif vowels_in_words.count(True) == 1:
vowel_index = vowels_in_words.index(True)
new_list[word_index] = element[0:vowel_index] + "+" + element[vowel_index::]
elif vowels_in_words.count(True) > 1:
new_list[word_index] = stress_with_model(element)
new_stressed = "".join(new_list)
# replace already stressed words
if len(all_stresses) > 0:
words = new_stressed.split(" ")
for stressed in all_stresses:
words[stressed] = orig_words[stressed]
return " ".join(words)
return new_stressed
|