GPT-SoVITS-v2-VC

Running

App Files Files Community

GPT-SoVITS-v2-VC / GPT_SoVITS /text /g2pw /g2pw.py

kevinwang676

Upload folder using huggingface_hub

69cf514 verified 4 months ago

raw

history blame

4.89 kB

	# This code is modified from https://github.com/mozillazg/pypinyin-g2pW

	import pickle
	import os

	from pypinyin.constants import RE_HANS
	from pypinyin.core import Pinyin, Style
	from pypinyin.seg.simpleseg import simple_seg
	from pypinyin.converter import UltimateConverter
	from pypinyin.contrib.tone_convert import to_tone
	from .onnx_api import G2PWOnnxConverter

	current_file_path = os.path.dirname(__file__)
	CACHE_PATH = os.path.join(current_file_path, "polyphonic.pickle")
	PP_DICT_PATH = os.path.join(current_file_path, "polyphonic.rep")
	PP_FIX_DICT_PATH = os.path.join(current_file_path, "polyphonic-fix.rep")


	class G2PWPinyin(Pinyin):
	def __init__(self, model_dir='G2PWModel/', model_source=None,
	enable_non_tradional_chinese=True,
	v_to_u=False, neutral_tone_with_five=False, tone_sandhi=False, **kwargs):
	self._g2pw = G2PWOnnxConverter(
	model_dir=model_dir,
	style='pinyin',
	model_source=model_source,
	enable_non_tradional_chinese=enable_non_tradional_chinese,
	)
	self._converter = Converter(
	self._g2pw, v_to_u=v_to_u,
	neutral_tone_with_five=neutral_tone_with_five,
	tone_sandhi=tone_sandhi,
	)

	def get_seg(self, **kwargs):
	return simple_seg


	class Converter(UltimateConverter):
	def __init__(self, g2pw_instance, v_to_u=False,
	neutral_tone_with_five=False,
	tone_sandhi=False, **kwargs):
	super(Converter, self).__init__(
	v_to_u=v_to_u,
	neutral_tone_with_five=neutral_tone_with_five,
	tone_sandhi=tone_sandhi, **kwargs)

	self._g2pw = g2pw_instance

	def convert(self, words, style, heteronym, errors, strict, **kwargs):
	pys = []
	if RE_HANS.match(words):
	pys = self._to_pinyin(words, style=style, heteronym=heteronym,
	errors=errors, strict=strict)
	post_data = self.post_pinyin(words, heteronym, pys)
	if post_data is not None:
	pys = post_data

	pys = self.convert_styles(
	pys, words, style, heteronym, errors, strict)

	else:
	py = self.handle_nopinyin(words, style=style, errors=errors,
	heteronym=heteronym, strict=strict)
	if py:
	pys.extend(py)

	return _remove_dup_and_empty(pys)

	def _to_pinyin(self, han, style, heteronym, errors, strict, **kwargs):
	pinyins = []

	g2pw_pinyin = self._g2pw(han)

	if not g2pw_pinyin: # g2pw 不支持的汉字改为使用 pypinyin 原有逻辑
	return super(Converter, self).convert(
	han, Style.TONE, heteronym, errors, strict, **kwargs)

	for i, item in enumerate(g2pw_pinyin[0]):
	if item is None: # g2pw 不支持的汉字改为使用 pypinyin 原有逻辑
	py = super(Converter, self).convert(
	han[i], Style.TONE, heteronym, errors, strict, **kwargs)
	pinyins.extend(py)
	else:
	pinyins.append([to_tone(item)])

	return pinyins


	def _remove_dup_items(lst, remove_empty=False):
	new_lst = []
	for item in lst:
	if remove_empty and not item:
	continue
	if item not in new_lst:
	new_lst.append(item)
	return new_lst


	def _remove_dup_and_empty(lst_list):
	new_lst_list = []
	for lst in lst_list:
	lst = _remove_dup_items(lst, remove_empty=True)
	if lst:
	new_lst_list.append(lst)
	else:
	new_lst_list.append([''])

	return new_lst_list


	def cache_dict(polyphonic_dict, file_path):
	with open(file_path, "wb") as pickle_file:
	pickle.dump(polyphonic_dict, pickle_file)


	def get_dict():
	if os.path.exists(CACHE_PATH):
	with open(CACHE_PATH, "rb") as pickle_file:
	polyphonic_dict = pickle.load(pickle_file)
	else:
	polyphonic_dict = read_dict()
	cache_dict(polyphonic_dict, CACHE_PATH)

	return polyphonic_dict


	def read_dict():
	polyphonic_dict = {}
	with open(PP_DICT_PATH) as f:
	line = f.readline()
	while line:
	key, value_str = line.split(':')
	value = eval(value_str.strip())
	polyphonic_dict[key.strip()] = value
	line = f.readline()
	with open(PP_FIX_DICT_PATH) as f:
	line = f.readline()
	while line:
	key, value_str = line.split(':')
	value = eval(value_str.strip())
	polyphonic_dict[key.strip()] = value
	line = f.readline()
	return polyphonic_dict


	def correct_pronunciation(word,word_pinyins):
	if word in pp_dict:
	word_pinyins = pp_dict[word]

	return word_pinyins


	pp_dict = get_dict()