Spaces:

laubonghaudoi
/

zoengjyutgaai_tts

Running

App Files Files Community

zoengjyutgaai_tts / text /cantonese.py

laubonghaudoi

MVP

60cec7b about 2 months ago

raw

history blame

5.28 kB

	# reference: https://huggingface.co/spaces/Naozumi0512/Bert-VITS2-Cantonese-Yue/blob/main/text/chinese.py

	import sys
	import re
	import cn2an

	from pyjyutping import jyutping
	from text.symbols import punctuation
	from text.zh_normalization.text_normlization import TextNormalizer

	normalizer = lambda x: cn2an.transform(x, "an2cn")

	INITIALS = [
	"aa",
	"aai",
	"aak",
	"aap",
	"aat",
	"aau",
	"ai",
	"au",
	"ap",
	"at",
	"ak",
	"a",
	"p",
	"b",
	"e",
	"ts",
	"t",
	"dz",
	"d",
	"kw",
	"k",
	"gw",
	"g",
	"f",
	"h",
	"l",
	"m",
	"ng",
	"n",
	"s",
	"y",
	"w",
	"c",
	"z",
	"j",
	"ong",
	"on",
	"ou",
	"oi",
	"ok",
	"o",
	"uk",
	"ung",
	]
	INITIALS += ["sp", "spl", "spn", "sil"]


	rep_map = {
	"：": ",",
	"；": ",",
	"，": ",",
	"。": ".",
	"！": "!",
	"？": "?",
	"\n": ".",
	"·": ",",
	"、": ",",
	"...": "…",
	"$": ".",
	"“": "'",
	"”": "'",
	'"': "'",
	"‘": "'",
	"’": "'",
	"（": "'",
	"）": "'",
	"(": "'",
	")": "'",
	"《": "'",
	"》": "'",
	"【": "'",
	"】": "'",
	"[": "'",
	"]": "'",
	"—": "-",
	"～": "-",
	"~": "-",
	"「": "'",
	"」": "'",
	}


	def replace_punctuation(text):
	# text = text.replace("嗯", "恩").replace("呣", "母")
	pattern = re.compile("\|".join(re.escape(p) for p in rep_map.keys()))

	replaced_text = pattern.sub(lambda x: rep_map[x.group()], text)

	replaced_text = re.sub(
	r"[^\u4e00-\u9fa5" + "".join(punctuation) + r"]+", "", replaced_text
	)

	return replaced_text


	def text_normalize(text):
	tx = TextNormalizer()
	sentences = tx.normalize(text)
	dest_text = ""
	for sentence in sentences:
	dest_text += replace_punctuation(sentence)
	return dest_text


	punctuation_set=set(punctuation)
	def jyuping_to_initials_finals_tones(jyuping_syllables):
	initials_finals = []
	tones = []
	word2ph = []

	for syllable in jyuping_syllables:
	if syllable in punctuation:
	initials_finals.append(syllable)
	tones.append(0)
	word2ph.append(1) # Add 1 for punctuation
	elif syllable == "_":
	initials_finals.append(syllable)
	tones.append(0)
	word2ph.append(1) # Add 1 for underscore
	else:
	try:
	tone = int(syllable[-1])
	syllable_without_tone = syllable[:-1]
	except ValueError:
	tone = 0
	syllable_without_tone = syllable

	for initial in INITIALS:
	if syllable_without_tone.startswith(initial):
	if syllable_without_tone.startswith("nga"):
	initials_finals.extend(
	[
	syllable_without_tone[:2],
	syllable_without_tone[2:] or syllable_without_tone[-1],
	]
	)
	# tones.extend([tone, tone])
	tones.extend([-1, tone])
	word2ph.append(2)
	else:
	final = syllable_without_tone[len(initial) :] or initial[-1]
	initials_finals.extend([initial, final])
	# tones.extend([tone, tone])
	tones.extend([-1, tone])
	word2ph.append(2)
	break
	assert len(initials_finals) == len(tones)

	###魔改为辅音+带音调的元音
	phones=[]
	for a,b in zip(initials_finals,tones):
	if(b not in [-1,0]):###防止粤语和普通话重合开头加Y，如果是标点，不加。
	todo="%s%s"%(a,b)
	else:todo=a
	if(todo not in punctuation_set):todo="Y%s"%todo
	phones.append(todo)

	# return initials_finals, tones, word2ph
	return phones, word2ph


	def get_jyutping(text):
	jp = jyutping.convert(text)
	# print(1111111,jp)
	for symbol in punctuation:
	jp = jp.replace(symbol, " " + symbol + " ")
	jp_array = jp.split()
	return jp_array


	def get_bert_feature(text, word2ph):
	from text import chinese_bert

	return chinese_bert.get_bert_feature(text, word2ph)


	def g2p(text):
	# word2ph = []
	jyuping = get_jyutping(text)
	# print(jyuping)
	# phones, tones, word2ph = jyuping_to_initials_finals_tones(jyuping)
	phones, word2ph = jyuping_to_initials_finals_tones(jyuping)
	# phones = ["_"] + phones + ["_"]
	# tones = [0] + tones + [0]
	# word2ph = [1] + word2ph + [1]
	return phones, word2ph


	if __name__ == "__main__":
	# text = "啊！但是《原神》是由,米哈\游自主， [研发]的一款全.新开放世界.冒险游戏"
	text = "佢個鋤頭太短啦。"
	text = text_normalize(text)
	# phones, tones, word2ph = g2p(text)
	phones, word2ph = g2p(text)
	# print(phones, tones, word2ph)
	print(phones, word2ph)