voicevox / test /test_kana_parser.py
2ndelement's picture
init
f1f433f
raw
history blame
25.2 kB
from typing import List
from unittest import TestCase
from voicevox_engine import kana_parser
from voicevox_engine.kana_parser import create_kana
from voicevox_engine.model import AccentPhrase, Mora, ParseKanaError, ParseKanaErrorCode
def parse_kana(text: str) -> List[AccentPhrase]:
accent_phrases = kana_parser.parse_kana(text)
return accent_phrases
class TestParseKana(TestCase):
def test_phrase_length(self):
self.assertEqual(len(parse_kana("ア'/ア'")), 2)
self.assertEqual(len(parse_kana("ア'、ア'")), 2)
self.assertEqual(len(parse_kana("ア'/ア'/ア'/ア'/ア'")), 5)
self.assertEqual(len(parse_kana("ス'")), 1)
self.assertEqual(len(parse_kana("_ス'")), 1)
self.assertEqual(len(parse_kana("ギェ'")), 1)
self.assertEqual(len(parse_kana("ギェ'、ギェ'/ギェ'")), 3)
def test_accent(self):
self.assertEqual(parse_kana("シャ'シシュシェショ")[0].accent, 1)
self.assertEqual(parse_kana("シャ'_シシュシェショ")[0].accent, 1)
self.assertEqual(parse_kana("シャシ'シュシェショ")[0].accent, 2)
self.assertEqual(parse_kana("シャ_シ'シュシェショ")[0].accent, 2)
self.assertEqual(parse_kana("シャシシュ'シェショ")[0].accent, 3)
self.assertEqual(parse_kana("シャ_シシュ'シェショ")[0].accent, 3)
self.assertEqual(parse_kana("シャシシュシェショ'")[0].accent, 5)
self.assertEqual(parse_kana("シャ_シシュシェショ'")[0].accent, 5)
def test_mora_length(self):
self.assertEqual(len(parse_kana("シャ'シシュシェショ")[0].moras), 5)
self.assertEqual(len(parse_kana("シャ'_シシュシェショ")[0].moras), 5)
self.assertEqual(len(parse_kana("シャシ'シュシェショ")[0].moras), 5)
self.assertEqual(len(parse_kana("シャ_シ'シュシェショ")[0].moras), 5)
self.assertEqual(len(parse_kana("シャシシュシェショ'")[0].moras), 5)
self.assertEqual(len(parse_kana("シャ_シシュシェショ'")[0].moras), 5)
def test_pause(self):
self.assertIsNone(parse_kana("ア'/ア'")[0].pause_mora)
self.assertIsNone(parse_kana("ア'/ア'")[1].pause_mora)
self.assertIsNotNone(parse_kana("ア'、ア'")[0].pause_mora)
self.assertIsNone(parse_kana("ア'、ア'")[1].pause_mora)
def test_unvoice(self):
self.assertEqual(parse_kana("ス'")[0].moras[0].vowel, "u")
self.assertEqual(parse_kana("_ス'")[0].moras[0].vowel, "U")
def test_roundtrip(self):
for text in ["コンニチワ'", "ワタシワ'/シャチョオデ'_ス", "トテモ'、エラ'インデス"]:
self.assertEqual(create_kana(parse_kana(text)), text)
for text in ["ヲ'", "ェ'"]:
self.assertEqual(create_kana(parse_kana(text)), text)
def _accent_phrase_marks_base(
self, text: str, expected_accent_phrases: List[AccentPhrase]
) -> None:
accent_phrases = kana_parser.parse_kana(text)
self.assertEqual(expected_accent_phrases, accent_phrases)
def test_accent_phrase_marks(self):
def a_slash_a_accent_phrases():
return [
AccentPhrase(
moras=[
Mora(
text="ア",
consonant=None,
consonant_length=None,
vowel="a",
vowel_length=0.0,
pitch=0.0,
),
],
accent=1,
pause_mora=None,
),
AccentPhrase(
moras=[
Mora(
text="ア",
consonant=None,
consonant_length=None,
vowel="a",
vowel_length=0.0,
pitch=0.0,
),
],
accent=1,
pause_mora=None,
),
]
expected_accent_phrases = a_slash_a_accent_phrases()
self._accent_phrase_marks_base(
text="ア'/ア'",
expected_accent_phrases=expected_accent_phrases,
)
def a_jp_comma_a_accent_phrases():
return [
AccentPhrase(
moras=[
Mora(
text="ア",
consonant=None,
consonant_length=None,
vowel="a",
vowel_length=0.0,
pitch=0.0,
),
],
accent=1,
pause_mora=Mora(
text="、",
consonant=None,
consonant_length=None,
vowel="pau",
vowel_length=0.0,
pitch=0.0,
),
),
AccentPhrase(
moras=[
Mora(
text="ア",
consonant=None,
consonant_length=None,
vowel="a",
vowel_length=0.0,
pitch=0.0,
),
],
accent=1,
pause_mora=None,
),
]
expected_accent_phrases = a_jp_comma_a_accent_phrases()
self._accent_phrase_marks_base(
text="ア'、ア'",
expected_accent_phrases=expected_accent_phrases,
)
def a_slash_a_slash_a_slash_a_slash_a_accent_phrases():
return [
AccentPhrase(
moras=[
Mora(
text="ア",
consonant=None,
consonant_length=None,
vowel="a",
vowel_length=0.0,
pitch=0.0,
),
],
accent=1,
pause_mora=None,
),
AccentPhrase(
moras=[
Mora(
text="ア",
consonant=None,
consonant_length=None,
vowel="a",
vowel_length=0.0,
pitch=0.0,
),
],
accent=1,
pause_mora=None,
),
AccentPhrase(
moras=[
Mora(
text="ア",
consonant=None,
consonant_length=None,
vowel="a",
vowel_length=0.0,
pitch=0.0,
),
],
accent=1,
pause_mora=None,
),
AccentPhrase(
moras=[
Mora(
text="ア",
consonant=None,
consonant_length=None,
vowel="a",
vowel_length=0.0,
pitch=0.0,
),
],
accent=1,
pause_mora=None,
),
AccentPhrase(
moras=[
Mora(
text="ア",
consonant=None,
consonant_length=None,
vowel="a",
vowel_length=0.0,
pitch=0.0,
),
],
accent=1,
pause_mora=None,
),
]
expected_accent_phrases = a_slash_a_slash_a_slash_a_slash_a_accent_phrases()
self._accent_phrase_marks_base(
text="ア'/ア'/ア'/ア'/ア'",
expected_accent_phrases=expected_accent_phrases,
)
def su_accent_phrases():
return [
AccentPhrase(
moras=[
Mora(
text="ス",
consonant="s",
consonant_length=0.0,
vowel="u",
vowel_length=0.0,
pitch=0.0,
),
],
accent=1,
pause_mora=None,
),
]
expected_accent_phrases = su_accent_phrases()
self._accent_phrase_marks_base(
text="ス'",
expected_accent_phrases=expected_accent_phrases,
)
def under_score_su_accent_phrases():
return [
AccentPhrase(
moras=[
Mora(
text="ス",
consonant="s",
consonant_length=0.0,
vowel="U",
vowel_length=0.0,
pitch=0.0,
),
],
accent=1,
pause_mora=None,
),
]
expected_accent_phrases = under_score_su_accent_phrases()
self._accent_phrase_marks_base(
text="_ス'",
expected_accent_phrases=expected_accent_phrases,
)
def gye_accent_phrases():
return [
AccentPhrase(
moras=[
Mora(
text="ギェ",
consonant="gy",
consonant_length=0.0,
vowel="e",
vowel_length=0.0,
pitch=0.0,
),
],
accent=1,
pause_mora=None,
),
]
expected_accent_phrases = gye_accent_phrases()
self._accent_phrase_marks_base(
text="ギェ'",
expected_accent_phrases=expected_accent_phrases,
)
def gye_gye_gye_accent_phrases():
return [
AccentPhrase(
moras=[
Mora(
text="ギェ",
consonant="gy",
consonant_length=0.0,
vowel="e",
vowel_length=0.0,
pitch=0.0,
),
],
accent=1,
pause_mora=Mora(
text="、",
consonant=None,
consonant_length=None,
vowel="pau",
vowel_length=0.0,
pitch=0.0,
),
),
AccentPhrase(
moras=[
Mora(
text="ギェ",
consonant="gy",
consonant_length=0.0,
vowel="e",
vowel_length=0.0,
pitch=0.0,
),
],
accent=1,
pause_mora=None,
),
AccentPhrase(
moras=[
Mora(
text="ギェ",
consonant="gy",
consonant_length=0.0,
vowel="e",
vowel_length=0.0,
pitch=0.0,
),
],
accent=1,
pause_mora=None,
),
]
expected_accent_phrases = gye_gye_gye_accent_phrases()
self._accent_phrase_marks_base(
text="ギェ'、ギェ'/ギェ'",
expected_accent_phrases=expected_accent_phrases,
)
def test_interrogative_accent_phrase_marks(self):
def a_question_mark_accent_phrases():
return [
AccentPhrase(
moras=[
Mora(
text="ア",
consonant=None,
consonant_length=None,
vowel="a",
vowel_length=0.0,
pitch=0.0,
),
],
accent=1,
pause_mora=None,
is_interrogative=True,
),
]
expected_accent_phrases = a_question_mark_accent_phrases()
self._accent_phrase_marks_base(
text="ア'?",
expected_accent_phrases=expected_accent_phrases,
)
def gye_gye_gye_question_mark_accent_phrases():
return [
AccentPhrase(
moras=[
Mora(
text="ギェ",
consonant="gy",
consonant_length=0.0,
vowel="e",
vowel_length=0.0,
pitch=0.0,
),
],
accent=1,
pause_mora=Mora(
text="、",
consonant=None,
consonant_length=None,
vowel="pau",
vowel_length=0.0,
pitch=0.0,
),
),
AccentPhrase(
moras=[
Mora(
text="ギェ",
consonant="gy",
consonant_length=0.0,
vowel="e",
vowel_length=0.0,
pitch=0.0,
),
],
accent=1,
pause_mora=None,
),
AccentPhrase(
moras=[
Mora(
text="ギェ",
consonant="gy",
consonant_length=0.0,
vowel="e",
vowel_length=0.0,
pitch=0.0,
),
],
accent=1,
pause_mora=None,
is_interrogative=True,
),
]
expected_accent_phrases = gye_gye_gye_question_mark_accent_phrases()
self._accent_phrase_marks_base(
text="ギェ'、ギェ'/ギェ'?",
expected_accent_phrases=expected_accent_phrases,
)
def a_pause_a_question_pause_a_question_a_question_mark_accent_phrases():
return [
AccentPhrase(
moras=[
Mora(
text="ア",
consonant=None,
consonant_length=None,
vowel="a",
vowel_length=0.0,
pitch=0.0,
),
],
accent=1,
pause_mora=Mora(
text="、",
consonant=None,
consonant_length=None,
vowel="pau",
vowel_length=0.0,
pitch=0.0,
),
),
AccentPhrase(
moras=[
Mora(
text="ア",
consonant=None,
consonant_length=None,
vowel="a",
vowel_length=0.0,
pitch=0.0,
),
],
accent=1,
pause_mora=Mora(
text="、",
consonant=None,
consonant_length=None,
vowel="pau",
vowel_length=0.0,
pitch=0.0,
),
is_interrogative=True,
),
AccentPhrase(
moras=[
Mora(
text="ア",
consonant=None,
consonant_length=None,
vowel="a",
vowel_length=0.0,
pitch=0.0,
),
],
accent=1,
pause_mora=None,
is_interrogative=True,
),
AccentPhrase(
moras=[
Mora(
text="ア",
consonant=None,
consonant_length=None,
vowel="a",
vowel_length=0.0,
pitch=0.0,
),
],
accent=1,
pause_mora=None,
is_interrogative=True,
),
]
expected_accent_phrases = (
a_pause_a_question_pause_a_question_a_question_mark_accent_phrases()
)
self._accent_phrase_marks_base(
text="ア'、ア'?、ア'?/ア'?",
expected_accent_phrases=expected_accent_phrases,
)
class TestParseKanaException(TestCase):
def _assert_error_code(self, kana: str, code: ParseKanaErrorCode):
with self.assertRaises(ParseKanaError) as err:
parse_kana(kana)
self.assertEqual(err.exception.errcode, code)
def test_exceptions(self):
self._assert_error_code("アクセント", ParseKanaErrorCode.ACCENT_NOTFOUND)
self._assert_error_code("'アクセント", ParseKanaErrorCode.ACCENT_TOP)
self._assert_error_code("ア'ク'セント", ParseKanaErrorCode.ACCENT_TWICE)
self._assert_error_code("ひ'らがな", ParseKanaErrorCode.UNKNOWN_TEXT)
self._assert_error_code("__ス'", ParseKanaErrorCode.UNKNOWN_TEXT)
self._assert_error_code("ア'/", ParseKanaErrorCode.EMPTY_PHRASE)
self._assert_error_code("/ア'", ParseKanaErrorCode.EMPTY_PHRASE)
self._assert_error_code("", ParseKanaErrorCode.EMPTY_PHRASE)
with self.assertRaises(ParseKanaError) as err:
parse_kana("ヒト'ツメ/フタツメ")
self.assertEqual(err.exception.errcode, ParseKanaErrorCode.ACCENT_NOTFOUND)
self.assertEqual(err.exception.kwargs, {"text": "フタツメ"})
with self.assertRaises(ParseKanaError) as err:
parse_kana("ア'/")
self.assertEqual(err.exception.errcode, ParseKanaErrorCode.EMPTY_PHRASE)
self.assertEqual(err.exception.kwargs, {"position": "2"})
with self.assertRaises(ParseKanaError) as err:
kana_parser.parse_kana("ア?ア'")
self.assertEqual(
err.exception.errcode, ParseKanaErrorCode.INTERROGATION_MARK_NOT_AT_END
)
class TestCreateKana(TestCase):
def test_create_kana_interrogative(self):
def koreha_arimasuka_accent_phrases():
return [
AccentPhrase(
moras=[
Mora(
text="コ",
consonant="k",
consonant_length=2.5,
vowel="o",
vowel_length=2.5,
pitch=2.5,
),
Mora(
text="レ",
consonant="r",
consonant_length=2.5,
vowel="e",
vowel_length=2.5,
pitch=2.5,
),
Mora(
text="ワ",
consonant="w",
consonant_length=2.5,
vowel="a",
vowel_length=2.5,
pitch=2.5,
),
],
accent=3,
pause_mora=None,
is_interrogative=False,
),
AccentPhrase(
moras=[
Mora(
text="ア",
consonant=None,
consonant_length=None,
vowel="a",
vowel_length=2.5,
pitch=2.5,
),
Mora(
text="リ",
consonant="r",
consonant_length=2.5,
vowel="i",
vowel_length=2.5,
pitch=2.5,
),
Mora(
text="マ",
consonant="m",
consonant_length=2.5,
vowel="a",
vowel_length=2.5,
pitch=2.5,
),
Mora(
text="ス",
consonant="s",
consonant_length=2.5,
vowel="U",
vowel_length=2.5,
pitch=2.5,
),
Mora(
text="カ",
consonant="k",
consonant_length=2.5,
vowel="a",
vowel_length=2.5,
pitch=2.5,
),
],
accent=3,
pause_mora=None,
is_interrogative=False,
),
]
accent_phrases = koreha_arimasuka_accent_phrases()
self.assertEqual(create_kana(accent_phrases), "コレワ'/アリマ'_スカ")
accent_phrases = koreha_arimasuka_accent_phrases()
accent_phrases[-1].is_interrogative = True
self.assertEqual(create_kana(accent_phrases), "コレワ'/アリマ'_スカ?")
def kya_accent_phrases():
return [
AccentPhrase(
moras=[
Mora(
text="キャ",
consonant="ky",
consonant_length=2.5,
vowel="a",
vowel_length=2.5,
pitch=2.5,
),
Mora(
text="ッ",
consonant=None,
consonant_length=None,
vowel="cl",
vowel_length=0.1,
pitch=0,
),
],
accent=1,
pause_mora=None,
is_interrogative=False,
),
]
accent_phrases = kya_accent_phrases()
self.assertEqual(create_kana(accent_phrases), "キャ'ッ")
accent_phrases = kya_accent_phrases()
accent_phrases[-1].is_interrogative = True
self.assertEqual(create_kana(accent_phrases), "キャ'ッ?")