Spaces:
Sleeping
Sleeping
from typing import List | |
from unittest import TestCase | |
from voicevox_engine import kana_parser | |
from voicevox_engine.kana_parser import create_kana | |
from voicevox_engine.model import AccentPhrase, Mora, ParseKanaError, ParseKanaErrorCode | |
def parse_kana(text: str) -> List[AccentPhrase]: | |
accent_phrases = kana_parser.parse_kana(text) | |
return accent_phrases | |
class TestParseKana(TestCase): | |
def test_phrase_length(self): | |
self.assertEqual(len(parse_kana("ア'/ア'")), 2) | |
self.assertEqual(len(parse_kana("ア'、ア'")), 2) | |
self.assertEqual(len(parse_kana("ア'/ア'/ア'/ア'/ア'")), 5) | |
self.assertEqual(len(parse_kana("ス'")), 1) | |
self.assertEqual(len(parse_kana("_ス'")), 1) | |
self.assertEqual(len(parse_kana("ギェ'")), 1) | |
self.assertEqual(len(parse_kana("ギェ'、ギェ'/ギェ'")), 3) | |
def test_accent(self): | |
self.assertEqual(parse_kana("シャ'シシュシェショ")[0].accent, 1) | |
self.assertEqual(parse_kana("シャ'_シシュシェショ")[0].accent, 1) | |
self.assertEqual(parse_kana("シャシ'シュシェショ")[0].accent, 2) | |
self.assertEqual(parse_kana("シャ_シ'シュシェショ")[0].accent, 2) | |
self.assertEqual(parse_kana("シャシシュ'シェショ")[0].accent, 3) | |
self.assertEqual(parse_kana("シャ_シシュ'シェショ")[0].accent, 3) | |
self.assertEqual(parse_kana("シャシシュシェショ'")[0].accent, 5) | |
self.assertEqual(parse_kana("シャ_シシュシェショ'")[0].accent, 5) | |
def test_mora_length(self): | |
self.assertEqual(len(parse_kana("シャ'シシュシェショ")[0].moras), 5) | |
self.assertEqual(len(parse_kana("シャ'_シシュシェショ")[0].moras), 5) | |
self.assertEqual(len(parse_kana("シャシ'シュシェショ")[0].moras), 5) | |
self.assertEqual(len(parse_kana("シャ_シ'シュシェショ")[0].moras), 5) | |
self.assertEqual(len(parse_kana("シャシシュシェショ'")[0].moras), 5) | |
self.assertEqual(len(parse_kana("シャ_シシュシェショ'")[0].moras), 5) | |
def test_pause(self): | |
self.assertIsNone(parse_kana("ア'/ア'")[0].pause_mora) | |
self.assertIsNone(parse_kana("ア'/ア'")[1].pause_mora) | |
self.assertIsNotNone(parse_kana("ア'、ア'")[0].pause_mora) | |
self.assertIsNone(parse_kana("ア'、ア'")[1].pause_mora) | |
def test_unvoice(self): | |
self.assertEqual(parse_kana("ス'")[0].moras[0].vowel, "u") | |
self.assertEqual(parse_kana("_ス'")[0].moras[0].vowel, "U") | |
def test_roundtrip(self): | |
for text in ["コンニチワ'", "ワタシワ'/シャチョオデ'_ス", "トテモ'、エラ'インデス"]: | |
self.assertEqual(create_kana(parse_kana(text)), text) | |
for text in ["ヲ'", "ェ'"]: | |
self.assertEqual(create_kana(parse_kana(text)), text) | |
def _accent_phrase_marks_base( | |
self, text: str, expected_accent_phrases: List[AccentPhrase] | |
) -> None: | |
accent_phrases = kana_parser.parse_kana(text) | |
self.assertEqual(expected_accent_phrases, accent_phrases) | |
def test_accent_phrase_marks(self): | |
def a_slash_a_accent_phrases(): | |
return [ | |
AccentPhrase( | |
moras=[ | |
Mora( | |
text="ア", | |
consonant=None, | |
consonant_length=None, | |
vowel="a", | |
vowel_length=0.0, | |
pitch=0.0, | |
), | |
], | |
accent=1, | |
pause_mora=None, | |
), | |
AccentPhrase( | |
moras=[ | |
Mora( | |
text="ア", | |
consonant=None, | |
consonant_length=None, | |
vowel="a", | |
vowel_length=0.0, | |
pitch=0.0, | |
), | |
], | |
accent=1, | |
pause_mora=None, | |
), | |
] | |
expected_accent_phrases = a_slash_a_accent_phrases() | |
self._accent_phrase_marks_base( | |
text="ア'/ア'", | |
expected_accent_phrases=expected_accent_phrases, | |
) | |
def a_jp_comma_a_accent_phrases(): | |
return [ | |
AccentPhrase( | |
moras=[ | |
Mora( | |
text="ア", | |
consonant=None, | |
consonant_length=None, | |
vowel="a", | |
vowel_length=0.0, | |
pitch=0.0, | |
), | |
], | |
accent=1, | |
pause_mora=Mora( | |
text="、", | |
consonant=None, | |
consonant_length=None, | |
vowel="pau", | |
vowel_length=0.0, | |
pitch=0.0, | |
), | |
), | |
AccentPhrase( | |
moras=[ | |
Mora( | |
text="ア", | |
consonant=None, | |
consonant_length=None, | |
vowel="a", | |
vowel_length=0.0, | |
pitch=0.0, | |
), | |
], | |
accent=1, | |
pause_mora=None, | |
), | |
] | |
expected_accent_phrases = a_jp_comma_a_accent_phrases() | |
self._accent_phrase_marks_base( | |
text="ア'、ア'", | |
expected_accent_phrases=expected_accent_phrases, | |
) | |
def a_slash_a_slash_a_slash_a_slash_a_accent_phrases(): | |
return [ | |
AccentPhrase( | |
moras=[ | |
Mora( | |
text="ア", | |
consonant=None, | |
consonant_length=None, | |
vowel="a", | |
vowel_length=0.0, | |
pitch=0.0, | |
), | |
], | |
accent=1, | |
pause_mora=None, | |
), | |
AccentPhrase( | |
moras=[ | |
Mora( | |
text="ア", | |
consonant=None, | |
consonant_length=None, | |
vowel="a", | |
vowel_length=0.0, | |
pitch=0.0, | |
), | |
], | |
accent=1, | |
pause_mora=None, | |
), | |
AccentPhrase( | |
moras=[ | |
Mora( | |
text="ア", | |
consonant=None, | |
consonant_length=None, | |
vowel="a", | |
vowel_length=0.0, | |
pitch=0.0, | |
), | |
], | |
accent=1, | |
pause_mora=None, | |
), | |
AccentPhrase( | |
moras=[ | |
Mora( | |
text="ア", | |
consonant=None, | |
consonant_length=None, | |
vowel="a", | |
vowel_length=0.0, | |
pitch=0.0, | |
), | |
], | |
accent=1, | |
pause_mora=None, | |
), | |
AccentPhrase( | |
moras=[ | |
Mora( | |
text="ア", | |
consonant=None, | |
consonant_length=None, | |
vowel="a", | |
vowel_length=0.0, | |
pitch=0.0, | |
), | |
], | |
accent=1, | |
pause_mora=None, | |
), | |
] | |
expected_accent_phrases = a_slash_a_slash_a_slash_a_slash_a_accent_phrases() | |
self._accent_phrase_marks_base( | |
text="ア'/ア'/ア'/ア'/ア'", | |
expected_accent_phrases=expected_accent_phrases, | |
) | |
def su_accent_phrases(): | |
return [ | |
AccentPhrase( | |
moras=[ | |
Mora( | |
text="ス", | |
consonant="s", | |
consonant_length=0.0, | |
vowel="u", | |
vowel_length=0.0, | |
pitch=0.0, | |
), | |
], | |
accent=1, | |
pause_mora=None, | |
), | |
] | |
expected_accent_phrases = su_accent_phrases() | |
self._accent_phrase_marks_base( | |
text="ス'", | |
expected_accent_phrases=expected_accent_phrases, | |
) | |
def under_score_su_accent_phrases(): | |
return [ | |
AccentPhrase( | |
moras=[ | |
Mora( | |
text="ス", | |
consonant="s", | |
consonant_length=0.0, | |
vowel="U", | |
vowel_length=0.0, | |
pitch=0.0, | |
), | |
], | |
accent=1, | |
pause_mora=None, | |
), | |
] | |
expected_accent_phrases = under_score_su_accent_phrases() | |
self._accent_phrase_marks_base( | |
text="_ス'", | |
expected_accent_phrases=expected_accent_phrases, | |
) | |
def gye_accent_phrases(): | |
return [ | |
AccentPhrase( | |
moras=[ | |
Mora( | |
text="ギェ", | |
consonant="gy", | |
consonant_length=0.0, | |
vowel="e", | |
vowel_length=0.0, | |
pitch=0.0, | |
), | |
], | |
accent=1, | |
pause_mora=None, | |
), | |
] | |
expected_accent_phrases = gye_accent_phrases() | |
self._accent_phrase_marks_base( | |
text="ギェ'", | |
expected_accent_phrases=expected_accent_phrases, | |
) | |
def gye_gye_gye_accent_phrases(): | |
return [ | |
AccentPhrase( | |
moras=[ | |
Mora( | |
text="ギェ", | |
consonant="gy", | |
consonant_length=0.0, | |
vowel="e", | |
vowel_length=0.0, | |
pitch=0.0, | |
), | |
], | |
accent=1, | |
pause_mora=Mora( | |
text="、", | |
consonant=None, | |
consonant_length=None, | |
vowel="pau", | |
vowel_length=0.0, | |
pitch=0.0, | |
), | |
), | |
AccentPhrase( | |
moras=[ | |
Mora( | |
text="ギェ", | |
consonant="gy", | |
consonant_length=0.0, | |
vowel="e", | |
vowel_length=0.0, | |
pitch=0.0, | |
), | |
], | |
accent=1, | |
pause_mora=None, | |
), | |
AccentPhrase( | |
moras=[ | |
Mora( | |
text="ギェ", | |
consonant="gy", | |
consonant_length=0.0, | |
vowel="e", | |
vowel_length=0.0, | |
pitch=0.0, | |
), | |
], | |
accent=1, | |
pause_mora=None, | |
), | |
] | |
expected_accent_phrases = gye_gye_gye_accent_phrases() | |
self._accent_phrase_marks_base( | |
text="ギェ'、ギェ'/ギェ'", | |
expected_accent_phrases=expected_accent_phrases, | |
) | |
def test_interrogative_accent_phrase_marks(self): | |
def a_question_mark_accent_phrases(): | |
return [ | |
AccentPhrase( | |
moras=[ | |
Mora( | |
text="ア", | |
consonant=None, | |
consonant_length=None, | |
vowel="a", | |
vowel_length=0.0, | |
pitch=0.0, | |
), | |
], | |
accent=1, | |
pause_mora=None, | |
is_interrogative=True, | |
), | |
] | |
expected_accent_phrases = a_question_mark_accent_phrases() | |
self._accent_phrase_marks_base( | |
text="ア'?", | |
expected_accent_phrases=expected_accent_phrases, | |
) | |
def gye_gye_gye_question_mark_accent_phrases(): | |
return [ | |
AccentPhrase( | |
moras=[ | |
Mora( | |
text="ギェ", | |
consonant="gy", | |
consonant_length=0.0, | |
vowel="e", | |
vowel_length=0.0, | |
pitch=0.0, | |
), | |
], | |
accent=1, | |
pause_mora=Mora( | |
text="、", | |
consonant=None, | |
consonant_length=None, | |
vowel="pau", | |
vowel_length=0.0, | |
pitch=0.0, | |
), | |
), | |
AccentPhrase( | |
moras=[ | |
Mora( | |
text="ギェ", | |
consonant="gy", | |
consonant_length=0.0, | |
vowel="e", | |
vowel_length=0.0, | |
pitch=0.0, | |
), | |
], | |
accent=1, | |
pause_mora=None, | |
), | |
AccentPhrase( | |
moras=[ | |
Mora( | |
text="ギェ", | |
consonant="gy", | |
consonant_length=0.0, | |
vowel="e", | |
vowel_length=0.0, | |
pitch=0.0, | |
), | |
], | |
accent=1, | |
pause_mora=None, | |
is_interrogative=True, | |
), | |
] | |
expected_accent_phrases = gye_gye_gye_question_mark_accent_phrases() | |
self._accent_phrase_marks_base( | |
text="ギェ'、ギェ'/ギェ'?", | |
expected_accent_phrases=expected_accent_phrases, | |
) | |
def a_pause_a_question_pause_a_question_a_question_mark_accent_phrases(): | |
return [ | |
AccentPhrase( | |
moras=[ | |
Mora( | |
text="ア", | |
consonant=None, | |
consonant_length=None, | |
vowel="a", | |
vowel_length=0.0, | |
pitch=0.0, | |
), | |
], | |
accent=1, | |
pause_mora=Mora( | |
text="、", | |
consonant=None, | |
consonant_length=None, | |
vowel="pau", | |
vowel_length=0.0, | |
pitch=0.0, | |
), | |
), | |
AccentPhrase( | |
moras=[ | |
Mora( | |
text="ア", | |
consonant=None, | |
consonant_length=None, | |
vowel="a", | |
vowel_length=0.0, | |
pitch=0.0, | |
), | |
], | |
accent=1, | |
pause_mora=Mora( | |
text="、", | |
consonant=None, | |
consonant_length=None, | |
vowel="pau", | |
vowel_length=0.0, | |
pitch=0.0, | |
), | |
is_interrogative=True, | |
), | |
AccentPhrase( | |
moras=[ | |
Mora( | |
text="ア", | |
consonant=None, | |
consonant_length=None, | |
vowel="a", | |
vowel_length=0.0, | |
pitch=0.0, | |
), | |
], | |
accent=1, | |
pause_mora=None, | |
is_interrogative=True, | |
), | |
AccentPhrase( | |
moras=[ | |
Mora( | |
text="ア", | |
consonant=None, | |
consonant_length=None, | |
vowel="a", | |
vowel_length=0.0, | |
pitch=0.0, | |
), | |
], | |
accent=1, | |
pause_mora=None, | |
is_interrogative=True, | |
), | |
] | |
expected_accent_phrases = ( | |
a_pause_a_question_pause_a_question_a_question_mark_accent_phrases() | |
) | |
self._accent_phrase_marks_base( | |
text="ア'、ア'?、ア'?/ア'?", | |
expected_accent_phrases=expected_accent_phrases, | |
) | |
class TestParseKanaException(TestCase): | |
def _assert_error_code(self, kana: str, code: ParseKanaErrorCode): | |
with self.assertRaises(ParseKanaError) as err: | |
parse_kana(kana) | |
self.assertEqual(err.exception.errcode, code) | |
def test_exceptions(self): | |
self._assert_error_code("アクセント", ParseKanaErrorCode.ACCENT_NOTFOUND) | |
self._assert_error_code("'アクセント", ParseKanaErrorCode.ACCENT_TOP) | |
self._assert_error_code("ア'ク'セント", ParseKanaErrorCode.ACCENT_TWICE) | |
self._assert_error_code("ひ'らがな", ParseKanaErrorCode.UNKNOWN_TEXT) | |
self._assert_error_code("__ス'", ParseKanaErrorCode.UNKNOWN_TEXT) | |
self._assert_error_code("ア'/", ParseKanaErrorCode.EMPTY_PHRASE) | |
self._assert_error_code("/ア'", ParseKanaErrorCode.EMPTY_PHRASE) | |
self._assert_error_code("", ParseKanaErrorCode.EMPTY_PHRASE) | |
with self.assertRaises(ParseKanaError) as err: | |
parse_kana("ヒト'ツメ/フタツメ") | |
self.assertEqual(err.exception.errcode, ParseKanaErrorCode.ACCENT_NOTFOUND) | |
self.assertEqual(err.exception.kwargs, {"text": "フタツメ"}) | |
with self.assertRaises(ParseKanaError) as err: | |
parse_kana("ア'/") | |
self.assertEqual(err.exception.errcode, ParseKanaErrorCode.EMPTY_PHRASE) | |
self.assertEqual(err.exception.kwargs, {"position": "2"}) | |
with self.assertRaises(ParseKanaError) as err: | |
kana_parser.parse_kana("ア?ア'") | |
self.assertEqual( | |
err.exception.errcode, ParseKanaErrorCode.INTERROGATION_MARK_NOT_AT_END | |
) | |
class TestCreateKana(TestCase): | |
def test_create_kana_interrogative(self): | |
def koreha_arimasuka_accent_phrases(): | |
return [ | |
AccentPhrase( | |
moras=[ | |
Mora( | |
text="コ", | |
consonant="k", | |
consonant_length=2.5, | |
vowel="o", | |
vowel_length=2.5, | |
pitch=2.5, | |
), | |
Mora( | |
text="レ", | |
consonant="r", | |
consonant_length=2.5, | |
vowel="e", | |
vowel_length=2.5, | |
pitch=2.5, | |
), | |
Mora( | |
text="ワ", | |
consonant="w", | |
consonant_length=2.5, | |
vowel="a", | |
vowel_length=2.5, | |
pitch=2.5, | |
), | |
], | |
accent=3, | |
pause_mora=None, | |
is_interrogative=False, | |
), | |
AccentPhrase( | |
moras=[ | |
Mora( | |
text="ア", | |
consonant=None, | |
consonant_length=None, | |
vowel="a", | |
vowel_length=2.5, | |
pitch=2.5, | |
), | |
Mora( | |
text="リ", | |
consonant="r", | |
consonant_length=2.5, | |
vowel="i", | |
vowel_length=2.5, | |
pitch=2.5, | |
), | |
Mora( | |
text="マ", | |
consonant="m", | |
consonant_length=2.5, | |
vowel="a", | |
vowel_length=2.5, | |
pitch=2.5, | |
), | |
Mora( | |
text="ス", | |
consonant="s", | |
consonant_length=2.5, | |
vowel="U", | |
vowel_length=2.5, | |
pitch=2.5, | |
), | |
Mora( | |
text="カ", | |
consonant="k", | |
consonant_length=2.5, | |
vowel="a", | |
vowel_length=2.5, | |
pitch=2.5, | |
), | |
], | |
accent=3, | |
pause_mora=None, | |
is_interrogative=False, | |
), | |
] | |
accent_phrases = koreha_arimasuka_accent_phrases() | |
self.assertEqual(create_kana(accent_phrases), "コレワ'/アリマ'_スカ") | |
accent_phrases = koreha_arimasuka_accent_phrases() | |
accent_phrases[-1].is_interrogative = True | |
self.assertEqual(create_kana(accent_phrases), "コレワ'/アリマ'_スカ?") | |
def kya_accent_phrases(): | |
return [ | |
AccentPhrase( | |
moras=[ | |
Mora( | |
text="キャ", | |
consonant="ky", | |
consonant_length=2.5, | |
vowel="a", | |
vowel_length=2.5, | |
pitch=2.5, | |
), | |
Mora( | |
text="ッ", | |
consonant=None, | |
consonant_length=None, | |
vowel="cl", | |
vowel_length=0.1, | |
pitch=0, | |
), | |
], | |
accent=1, | |
pause_mora=None, | |
is_interrogative=False, | |
), | |
] | |
accent_phrases = kya_accent_phrases() | |
self.assertEqual(create_kana(accent_phrases), "キャ'ッ") | |
accent_phrases = kya_accent_phrases() | |
accent_phrases[-1].is_interrogative = True | |
self.assertEqual(create_kana(accent_phrases), "キャ'ッ?") | |