from typing import List from unittest import TestCase from voicevox_engine import kana_parser from voicevox_engine.kana_parser import create_kana from voicevox_engine.model import AccentPhrase, Mora, ParseKanaError, ParseKanaErrorCode def parse_kana(text: str) -> List[AccentPhrase]: accent_phrases = kana_parser.parse_kana(text) return accent_phrases class TestParseKana(TestCase): def test_phrase_length(self): self.assertEqual(len(parse_kana("ア'/ア'")), 2) self.assertEqual(len(parse_kana("ア'、ア'")), 2) self.assertEqual(len(parse_kana("ア'/ア'/ア'/ア'/ア'")), 5) self.assertEqual(len(parse_kana("ス'")), 1) self.assertEqual(len(parse_kana("_ス'")), 1) self.assertEqual(len(parse_kana("ギェ'")), 1) self.assertEqual(len(parse_kana("ギェ'、ギェ'/ギェ'")), 3) def test_accent(self): self.assertEqual(parse_kana("シャ'シシュシェショ")[0].accent, 1) self.assertEqual(parse_kana("シャ'_シシュシェショ")[0].accent, 1) self.assertEqual(parse_kana("シャシ'シュシェショ")[0].accent, 2) self.assertEqual(parse_kana("シャ_シ'シュシェショ")[0].accent, 2) self.assertEqual(parse_kana("シャシシュ'シェショ")[0].accent, 3) self.assertEqual(parse_kana("シャ_シシュ'シェショ")[0].accent, 3) self.assertEqual(parse_kana("シャシシュシェショ'")[0].accent, 5) self.assertEqual(parse_kana("シャ_シシュシェショ'")[0].accent, 5) def test_mora_length(self): self.assertEqual(len(parse_kana("シャ'シシュシェショ")[0].moras), 5) self.assertEqual(len(parse_kana("シャ'_シシュシェショ")[0].moras), 5) self.assertEqual(len(parse_kana("シャシ'シュシェショ")[0].moras), 5) self.assertEqual(len(parse_kana("シャ_シ'シュシェショ")[0].moras), 5) self.assertEqual(len(parse_kana("シャシシュシェショ'")[0].moras), 5) self.assertEqual(len(parse_kana("シャ_シシュシェショ'")[0].moras), 5) def test_pause(self): self.assertIsNone(parse_kana("ア'/ア'")[0].pause_mora) self.assertIsNone(parse_kana("ア'/ア'")[1].pause_mora) self.assertIsNotNone(parse_kana("ア'、ア'")[0].pause_mora) self.assertIsNone(parse_kana("ア'、ア'")[1].pause_mora) def test_unvoice(self): self.assertEqual(parse_kana("ス'")[0].moras[0].vowel, "u") self.assertEqual(parse_kana("_ス'")[0].moras[0].vowel, "U") def test_roundtrip(self): for text in ["コンニチワ'", "ワタシワ'/シャチョオデ'_ス", "トテモ'、エラ'インデス"]: self.assertEqual(create_kana(parse_kana(text)), text) for text in ["ヲ'", "ェ'"]: self.assertEqual(create_kana(parse_kana(text)), text) def _accent_phrase_marks_base( self, text: str, expected_accent_phrases: List[AccentPhrase] ) -> None: accent_phrases = kana_parser.parse_kana(text) self.assertEqual(expected_accent_phrases, accent_phrases) def test_accent_phrase_marks(self): def a_slash_a_accent_phrases(): return [ AccentPhrase( moras=[ Mora( text="ア", consonant=None, consonant_length=None, vowel="a", vowel_length=0.0, pitch=0.0, ), ], accent=1, pause_mora=None, ), AccentPhrase( moras=[ Mora( text="ア", consonant=None, consonant_length=None, vowel="a", vowel_length=0.0, pitch=0.0, ), ], accent=1, pause_mora=None, ), ] expected_accent_phrases = a_slash_a_accent_phrases() self._accent_phrase_marks_base( text="ア'/ア'", expected_accent_phrases=expected_accent_phrases, ) def a_jp_comma_a_accent_phrases(): return [ AccentPhrase( moras=[ Mora( text="ア", consonant=None, consonant_length=None, vowel="a", vowel_length=0.0, pitch=0.0, ), ], accent=1, pause_mora=Mora( text="、", consonant=None, consonant_length=None, vowel="pau", vowel_length=0.0, pitch=0.0, ), ), AccentPhrase( moras=[ Mora( text="ア", consonant=None, consonant_length=None, vowel="a", vowel_length=0.0, pitch=0.0, ), ], accent=1, pause_mora=None, ), ] expected_accent_phrases = a_jp_comma_a_accent_phrases() self._accent_phrase_marks_base( text="ア'、ア'", expected_accent_phrases=expected_accent_phrases, ) def a_slash_a_slash_a_slash_a_slash_a_accent_phrases(): return [ AccentPhrase( moras=[ Mora( text="ア", consonant=None, consonant_length=None, vowel="a", vowel_length=0.0, pitch=0.0, ), ], accent=1, pause_mora=None, ), AccentPhrase( moras=[ Mora( text="ア", consonant=None, consonant_length=None, vowel="a", vowel_length=0.0, pitch=0.0, ), ], accent=1, pause_mora=None, ), AccentPhrase( moras=[ Mora( text="ア", consonant=None, consonant_length=None, vowel="a", vowel_length=0.0, pitch=0.0, ), ], accent=1, pause_mora=None, ), AccentPhrase( moras=[ Mora( text="ア", consonant=None, consonant_length=None, vowel="a", vowel_length=0.0, pitch=0.0, ), ], accent=1, pause_mora=None, ), AccentPhrase( moras=[ Mora( text="ア", consonant=None, consonant_length=None, vowel="a", vowel_length=0.0, pitch=0.0, ), ], accent=1, pause_mora=None, ), ] expected_accent_phrases = a_slash_a_slash_a_slash_a_slash_a_accent_phrases() self._accent_phrase_marks_base( text="ア'/ア'/ア'/ア'/ア'", expected_accent_phrases=expected_accent_phrases, ) def su_accent_phrases(): return [ AccentPhrase( moras=[ Mora( text="ス", consonant="s", consonant_length=0.0, vowel="u", vowel_length=0.0, pitch=0.0, ), ], accent=1, pause_mora=None, ), ] expected_accent_phrases = su_accent_phrases() self._accent_phrase_marks_base( text="ス'", expected_accent_phrases=expected_accent_phrases, ) def under_score_su_accent_phrases(): return [ AccentPhrase( moras=[ Mora( text="ス", consonant="s", consonant_length=0.0, vowel="U", vowel_length=0.0, pitch=0.0, ), ], accent=1, pause_mora=None, ), ] expected_accent_phrases = under_score_su_accent_phrases() self._accent_phrase_marks_base( text="_ス'", expected_accent_phrases=expected_accent_phrases, ) def gye_accent_phrases(): return [ AccentPhrase( moras=[ Mora( text="ギェ", consonant="gy", consonant_length=0.0, vowel="e", vowel_length=0.0, pitch=0.0, ), ], accent=1, pause_mora=None, ), ] expected_accent_phrases = gye_accent_phrases() self._accent_phrase_marks_base( text="ギェ'", expected_accent_phrases=expected_accent_phrases, ) def gye_gye_gye_accent_phrases(): return [ AccentPhrase( moras=[ Mora( text="ギェ", consonant="gy", consonant_length=0.0, vowel="e", vowel_length=0.0, pitch=0.0, ), ], accent=1, pause_mora=Mora( text="、", consonant=None, consonant_length=None, vowel="pau", vowel_length=0.0, pitch=0.0, ), ), AccentPhrase( moras=[ Mora( text="ギェ", consonant="gy", consonant_length=0.0, vowel="e", vowel_length=0.0, pitch=0.0, ), ], accent=1, pause_mora=None, ), AccentPhrase( moras=[ Mora( text="ギェ", consonant="gy", consonant_length=0.0, vowel="e", vowel_length=0.0, pitch=0.0, ), ], accent=1, pause_mora=None, ), ] expected_accent_phrases = gye_gye_gye_accent_phrases() self._accent_phrase_marks_base( text="ギェ'、ギェ'/ギェ'", expected_accent_phrases=expected_accent_phrases, ) def test_interrogative_accent_phrase_marks(self): def a_question_mark_accent_phrases(): return [ AccentPhrase( moras=[ Mora( text="ア", consonant=None, consonant_length=None, vowel="a", vowel_length=0.0, pitch=0.0, ), ], accent=1, pause_mora=None, is_interrogative=True, ), ] expected_accent_phrases = a_question_mark_accent_phrases() self._accent_phrase_marks_base( text="ア'?", expected_accent_phrases=expected_accent_phrases, ) def gye_gye_gye_question_mark_accent_phrases(): return [ AccentPhrase( moras=[ Mora( text="ギェ", consonant="gy", consonant_length=0.0, vowel="e", vowel_length=0.0, pitch=0.0, ), ], accent=1, pause_mora=Mora( text="、", consonant=None, consonant_length=None, vowel="pau", vowel_length=0.0, pitch=0.0, ), ), AccentPhrase( moras=[ Mora( text="ギェ", consonant="gy", consonant_length=0.0, vowel="e", vowel_length=0.0, pitch=0.0, ), ], accent=1, pause_mora=None, ), AccentPhrase( moras=[ Mora( text="ギェ", consonant="gy", consonant_length=0.0, vowel="e", vowel_length=0.0, pitch=0.0, ), ], accent=1, pause_mora=None, is_interrogative=True, ), ] expected_accent_phrases = gye_gye_gye_question_mark_accent_phrases() self._accent_phrase_marks_base( text="ギェ'、ギェ'/ギェ'?", expected_accent_phrases=expected_accent_phrases, ) def a_pause_a_question_pause_a_question_a_question_mark_accent_phrases(): return [ AccentPhrase( moras=[ Mora( text="ア", consonant=None, consonant_length=None, vowel="a", vowel_length=0.0, pitch=0.0, ), ], accent=1, pause_mora=Mora( text="、", consonant=None, consonant_length=None, vowel="pau", vowel_length=0.0, pitch=0.0, ), ), AccentPhrase( moras=[ Mora( text="ア", consonant=None, consonant_length=None, vowel="a", vowel_length=0.0, pitch=0.0, ), ], accent=1, pause_mora=Mora( text="、", consonant=None, consonant_length=None, vowel="pau", vowel_length=0.0, pitch=0.0, ), is_interrogative=True, ), AccentPhrase( moras=[ Mora( text="ア", consonant=None, consonant_length=None, vowel="a", vowel_length=0.0, pitch=0.0, ), ], accent=1, pause_mora=None, is_interrogative=True, ), AccentPhrase( moras=[ Mora( text="ア", consonant=None, consonant_length=None, vowel="a", vowel_length=0.0, pitch=0.0, ), ], accent=1, pause_mora=None, is_interrogative=True, ), ] expected_accent_phrases = ( a_pause_a_question_pause_a_question_a_question_mark_accent_phrases() ) self._accent_phrase_marks_base( text="ア'、ア'?、ア'?/ア'?", expected_accent_phrases=expected_accent_phrases, ) class TestParseKanaException(TestCase): def _assert_error_code(self, kana: str, code: ParseKanaErrorCode): with self.assertRaises(ParseKanaError) as err: parse_kana(kana) self.assertEqual(err.exception.errcode, code) def test_exceptions(self): self._assert_error_code("アクセント", ParseKanaErrorCode.ACCENT_NOTFOUND) self._assert_error_code("'アクセント", ParseKanaErrorCode.ACCENT_TOP) self._assert_error_code("ア'ク'セント", ParseKanaErrorCode.ACCENT_TWICE) self._assert_error_code("ひ'らがな", ParseKanaErrorCode.UNKNOWN_TEXT) self._assert_error_code("__ス'", ParseKanaErrorCode.UNKNOWN_TEXT) self._assert_error_code("ア'/", ParseKanaErrorCode.EMPTY_PHRASE) self._assert_error_code("/ア'", ParseKanaErrorCode.EMPTY_PHRASE) self._assert_error_code("", ParseKanaErrorCode.EMPTY_PHRASE) with self.assertRaises(ParseKanaError) as err: parse_kana("ヒト'ツメ/フタツメ") self.assertEqual(err.exception.errcode, ParseKanaErrorCode.ACCENT_NOTFOUND) self.assertEqual(err.exception.kwargs, {"text": "フタツメ"}) with self.assertRaises(ParseKanaError) as err: parse_kana("ア'/") self.assertEqual(err.exception.errcode, ParseKanaErrorCode.EMPTY_PHRASE) self.assertEqual(err.exception.kwargs, {"position": "2"}) with self.assertRaises(ParseKanaError) as err: kana_parser.parse_kana("ア?ア'") self.assertEqual( err.exception.errcode, ParseKanaErrorCode.INTERROGATION_MARK_NOT_AT_END ) class TestCreateKana(TestCase): def test_create_kana_interrogative(self): def koreha_arimasuka_accent_phrases(): return [ AccentPhrase( moras=[ Mora( text="コ", consonant="k", consonant_length=2.5, vowel="o", vowel_length=2.5, pitch=2.5, ), Mora( text="レ", consonant="r", consonant_length=2.5, vowel="e", vowel_length=2.5, pitch=2.5, ), Mora( text="ワ", consonant="w", consonant_length=2.5, vowel="a", vowel_length=2.5, pitch=2.5, ), ], accent=3, pause_mora=None, is_interrogative=False, ), AccentPhrase( moras=[ Mora( text="ア", consonant=None, consonant_length=None, vowel="a", vowel_length=2.5, pitch=2.5, ), Mora( text="リ", consonant="r", consonant_length=2.5, vowel="i", vowel_length=2.5, pitch=2.5, ), Mora( text="マ", consonant="m", consonant_length=2.5, vowel="a", vowel_length=2.5, pitch=2.5, ), Mora( text="ス", consonant="s", consonant_length=2.5, vowel="U", vowel_length=2.5, pitch=2.5, ), Mora( text="カ", consonant="k", consonant_length=2.5, vowel="a", vowel_length=2.5, pitch=2.5, ), ], accent=3, pause_mora=None, is_interrogative=False, ), ] accent_phrases = koreha_arimasuka_accent_phrases() self.assertEqual(create_kana(accent_phrases), "コレワ'/アリマ'_スカ") accent_phrases = koreha_arimasuka_accent_phrases() accent_phrases[-1].is_interrogative = True self.assertEqual(create_kana(accent_phrases), "コレワ'/アリマ'_スカ?") def kya_accent_phrases(): return [ AccentPhrase( moras=[ Mora( text="キャ", consonant="ky", consonant_length=2.5, vowel="a", vowel_length=2.5, pitch=2.5, ), Mora( text="ッ", consonant=None, consonant_length=None, vowel="cl", vowel_length=0.1, pitch=0, ), ], accent=1, pause_mora=None, is_interrogative=False, ), ] accent_phrases = kya_accent_phrases() self.assertEqual(create_kana(accent_phrases), "キャ'ッ") accent_phrases = kya_accent_phrases() accent_phrases[-1].is_interrogative = True self.assertEqual(create_kana(accent_phrases), "キャ'ッ?")