Spaces:

2ndelement
/

voicevox

Build error

App Files Files Community

voicevox / test /test_synthesis_engine_base.py

2ndelement

init

f1f433f over 1 year ago

raw

history blame

12.6 kB

	from typing import List, Union
	from unittest import TestCase
	from unittest.mock import Mock

	import numpy

	from voicevox_engine.model import AccentPhrase, AudioQuery, Mora
	from voicevox_engine.synthesis_engine import SynthesisEngine


	def yukarin_s_mock(length: int, phoneme_list: numpy.ndarray, speaker_id: numpy.ndarray):
	result = []
	# mockとしての適当な処理、特に意味はない
	for i in range(length):
	result.append(round(float(phoneme_list[i] * 0.0625 + speaker_id), 2))
	return numpy.array(result)


	def yukarin_sa_mock(
	length: int,
	vowel_phoneme_list: numpy.ndarray,
	consonant_phoneme_list: numpy.ndarray,
	start_accent_list: numpy.ndarray,
	end_accent_list: numpy.ndarray,
	start_accent_phrase_list: numpy.ndarray,
	end_accent_phrase_list: numpy.ndarray,
	speaker_id: numpy.ndarray,
	):
	result = []
	# mockとしての適当な処理、特に意味はない
	for i in range(length):
	result.append(
	round(
	float(
	(
	vowel_phoneme_list[0][i]
	+ consonant_phoneme_list[0][i]
	+ start_accent_list[0][i]
	+ end_accent_list[0][i]
	+ start_accent_phrase_list[0][i]
	+ end_accent_phrase_list[0][i]
	)
	* 0.0625
	+ speaker_id
	),
	2,
	)
	)
	return numpy.array(result)[numpy.newaxis]


	def decode_mock(
	length: int,
	phoneme_size: int,
	f0: numpy.ndarray,
	phoneme: numpy.ndarray,
	speaker_id: Union[numpy.ndarray, int],
	):
	result = []
	# mockとしての適当な処理、特に意味はない
	for i in range(length):
	# decode forwardはデータサイズがlengthの256倍になるのでとりあえず256回データをresultに入れる
	for _ in range(256):
	result.append(
	float(
	f0[i][0] * (numpy.where(phoneme[i] == 1)[0] / phoneme_size)
	+ speaker_id
	)
	)
	return numpy.array(result)


	def koreha_arimasuka_base_expected():
	return [
	AccentPhrase(
	moras=[
	Mora(
	text="コ",
	consonant="k",
	consonant_length=2.44,
	vowel="o",
	vowel_length=2.88,
	pitch=4.38,
	),
	Mora(
	text="レ",
	consonant="r",
	consonant_length=3.06,
	vowel="e",
	vowel_length=1.88,
	pitch=4.0,
	),
	Mora(
	text="ワ",
	consonant="w",
	consonant_length=3.62,
	vowel="a",
	vowel_length=1.44,
	pitch=4.19,
	),
	],
	accent=3,
	pause_mora=None,
	is_interrogative=False,
	),
	AccentPhrase(
	moras=[
	Mora(
	text="ア",
	consonant=None,
	consonant_length=None,
	vowel="a",
	vowel_length=1.44,
	pitch=1.44,
	),
	Mora(
	text="リ",
	consonant="r",
	consonant_length=3.06,
	vowel="i",
	vowel_length=2.31,
	pitch=4.44,
	),
	Mora(
	text="マ",
	consonant="m",
	consonant_length=2.62,
	vowel="a",
	vowel_length=1.44,
	pitch=3.12,
	),
	Mora(
	text="ス",
	consonant="s",
	consonant_length=3.19,
	vowel="U",
	vowel_length=1.38,
	pitch=0.0,
	),
	Mora(
	text="カ",
	consonant="k",
	consonant_length=2.44,
	vowel="a",
	vowel_length=1.44,
	pitch=2.94,
	),
	],
	accent=3,
	pause_mora=None,
	is_interrogative=False,
	),
	]


	def create_mock_query(accent_phrases):
	return AudioQuery(
	accent_phrases=accent_phrases,
	speedScale=1,
	pitchScale=0,
	intonationScale=1,
	volumeScale=1,
	prePhonemeLength=0.1,
	postPhonemeLength=0.1,
	outputSamplingRate=24000,
	outputStereo=False,
	kana="",
	)


	class MockCore:
	yukarin_s_forward = Mock(side_effect=yukarin_s_mock)
	yukarin_sa_forward = Mock(side_effect=yukarin_sa_mock)
	decode_forward = Mock(side_effect=decode_mock)

	def metas(self):
	return ""

	def supported_devices(self):
	return ""

	def is_model_loaded(self, speaker_id):
	return True


	class TestSynthesisEngineBase(TestCase):
	def setUp(self):
	super().setUp()
	self.synthesis_engine = SynthesisEngine(
	core=MockCore(),
	)
	self.synthesis_engine._synthesis_impl = Mock()

	def create_accent_phrases_test_base(self, text: str, expected: List[AccentPhrase]):
	actual = self.synthesis_engine.create_accent_phrases(text, 1)
	self.assertEqual(
	expected,
	actual,
	"case(text:" + text + ")",
	)

	def create_synthesis_test_base(
	self,
	text: str,
	expected: List[AccentPhrase],
	enable_interrogative_upspeak: bool,
	):
	"""音声合成時に疑問文モーラ処理を行っているかどうかを検証
	(https://github.com/VOICEVOX/voicevox_engine/issues/272#issuecomment-1022610866)
	"""
	accent_phrases = self.synthesis_engine.create_accent_phrases(text, 1)
	query = create_mock_query(accent_phrases=accent_phrases)
	self.synthesis_engine.synthesis(
	query, 0, enable_interrogative_upspeak=enable_interrogative_upspeak
	)
	# _synthesis_implの第一引数に与えられたqueryを検証
	actual = self.synthesis_engine._synthesis_impl.call_args[0][0].accent_phrases

	self.assertEqual(
	expected,
	actual,
	"case(text:" + text + ")",
	)

	def test_create_accent_phrases(self):
	"""accent_phrasesの作成時では疑問文モーラ処理を行わない
	(https://github.com/VOICEVOX/voicevox_engine/issues/272#issuecomment-1022610866)
	"""
	expected = koreha_arimasuka_base_expected()
	expected[-1].is_interrogative = True
	self.create_accent_phrases_test_base(text="これはありますか？", expected=expected)

	def test_synthesis_interrogative(self):
	expected = koreha_arimasuka_base_expected()
	expected[-1].is_interrogative = True
	expected[-1].moras += [
	Mora(
	text="ア",
	consonant=None,
	consonant_length=None,
	vowel="a",
	vowel_length=0.15,
	pitch=expected[-1].moras[-1].pitch + 0.3,
	)
	]
	self.create_synthesis_test_base(
	text="これはありますか？",
	expected=expected,
	enable_interrogative_upspeak=True,
	)

	expected = koreha_arimasuka_base_expected()
	expected[-1].is_interrogative = True
	self.create_synthesis_test_base(
	text="これはありますか？",
	expected=expected,
	enable_interrogative_upspeak=False,
	)

	expected = koreha_arimasuka_base_expected()
	self.create_synthesis_test_base(
	text="これはありますか",
	expected=expected,
	enable_interrogative_upspeak=True,
	)

	def nn_base_expected():
	return [
	AccentPhrase(
	moras=[
	Mora(
	text="ン",
	consonant=None,
	consonant_length=None,
	vowel="N",
	vowel_length=1.25,
	pitch=1.44,
	)
	],
	accent=1,
	pause_mora=None,
	is_interrogative=False,
	)
	]

	expected = nn_base_expected()
	self.create_synthesis_test_base(
	text="ん",
	expected=expected,
	enable_interrogative_upspeak=True,
	)

	expected = nn_base_expected()
	expected[-1].is_interrogative = True
	expected[-1].moras += [
	Mora(
	text="ン",
	consonant=None,
	consonant_length=None,
	vowel="N",
	vowel_length=0.15,
	pitch=expected[-1].moras[-1].pitch + 0.3,
	)
	]
	self.create_synthesis_test_base(
	text="ん？",
	expected=expected,
	enable_interrogative_upspeak=True,
	)

	expected = nn_base_expected()
	expected[-1].is_interrogative = True
	self.create_synthesis_test_base(
	text="ん？",
	expected=expected,
	enable_interrogative_upspeak=False,
	)

	def ltu_base_expected():
	return [
	AccentPhrase(
	moras=[
	Mora(
	text="ッ",
	consonant=None,
	consonant_length=None,
	vowel="cl",
	vowel_length=1.69,
	pitch=0.0,
	)
	],
	accent=1,
	pause_mora=None,
	is_interrogative=False,
	)
	]

	expected = ltu_base_expected()
	self.create_synthesis_test_base(
	text="っ",
	expected=expected,
	enable_interrogative_upspeak=True,
	)

	expected = ltu_base_expected()
	expected[-1].is_interrogative = True
	self.create_synthesis_test_base(
	text="っ？",
	expected=expected,
	enable_interrogative_upspeak=True,
	)

	expected = ltu_base_expected()
	expected[-1].is_interrogative = True
	self.create_synthesis_test_base(
	text="っ？",
	expected=expected,
	enable_interrogative_upspeak=False,
	)

	def su_base_expected():
	return [
	AccentPhrase(
	moras=[
	Mora(
	text="ス",
	consonant="s",
	consonant_length=3.19,
	vowel="u",
	vowel_length=3.5,
	pitch=5.94,
	)
	],
	accent=1,
	pause_mora=None,
	is_interrogative=False,
	)
	]

	expected = su_base_expected()
	self.create_synthesis_test_base(
	text="す",
	expected=expected,
	enable_interrogative_upspeak=True,
	)

	expected = su_base_expected()
	expected[-1].is_interrogative = True
	expected[-1].moras += [
	Mora(
	text="ウ",
	consonant=None,
	consonant_length=None,
	vowel="u",
	vowel_length=0.15,
	pitch=expected[-1].moras[-1].pitch + 0.3,
	)
	]
	self.create_synthesis_test_base(
	text="す？",
	expected=expected,
	enable_interrogative_upspeak=True,
	)

	expected = su_base_expected()
	expected[-1].is_interrogative = True
	self.create_synthesis_test_base(
	text="す？",
	expected=expected,
	enable_interrogative_upspeak=False,
	)