Spaces:

kkvc-hf
/

Style-Bert-VITS2-SH

Running

TAKESHI0\ogawa

37f86d1 4 months ago

11.7 kB

	import pickle
	import os
	import re
	from g2p_en import G2p
	from transformers import DebertaV2Tokenizer

	from text import symbols
	from text.symbols import punctuation

	current_file_path = os.path.dirname(__file__)
	CMU_DICT_PATH = os.path.join(current_file_path, "cmudict.rep")
	CACHE_PATH = os.path.join(current_file_path, "cmudict_cache.pickle")
	_g2p = G2p()
	LOCAL_PATH = "./bert/deberta-v3-large"
	tokenizer = DebertaV2Tokenizer.from_pretrained(LOCAL_PATH)

	arpa = {
	"AH0",
	"S",
	"AH1",
	"EY2",
	"AE2",
	"EH0",
	"OW2",
	"UH0",
	"NG",
	"B",
	"G",
	"AY0",
	"M",
	"AA0",
	"F",
	"AO0",
	"ER2",
	"UH1",
	"IY1",
	"AH2",
	"DH",
	"IY0",
	"EY1",
	"IH0",
	"K",
	"N",
	"W",
	"IY2",
	"T",
	"AA1",
	"ER1",
	"EH2",
	"OY0",
	"UH2",
	"UW1",
	"Z",
	"AW2",
	"AW1",
	"V",
	"UW2",
	"AA2",
	"ER",
	"AW0",
	"UW0",
	"R",
	"OW1",
	"EH1",
	"ZH",
	"AE0",
	"IH2",
	"IH",
	"Y",
	"JH",
	"P",
	"AY1",
	"EY0",
	"OY2",
	"TH",
	"HH",
	"D",
	"ER0",
	"CH",
	"AO1",
	"AE1",
	"AO2",
	"OY1",
	"AY2",
	"IH1",
	"OW0",
	"L",
	"SH",
	}


	def post_replace_ph(ph):
	rep_map = {
	"：": ",",
	"；": ",",
	"，": ",",
	"。": ".",
	"！": "!",
	"？": "?",
	"\n": ".",
	"·": ",",
	"、": ",",
	"…": "...",
	"···": "...",
	"・・・": "...",
	"v": "V",
	}
	if ph in rep_map.keys():
	ph = rep_map[ph]
	if ph in symbols:
	return ph
	if ph not in symbols:
	ph = "UNK"
	return ph


	rep_map = {
	"：": ",",
	"；": ",",
	"，": ",",
	"。": ".",
	"！": "!",
	"？": "?",
	"\n": ".",
	"．": ".",
	"…": "...",
	"···": "...",
	"・・・": "...",
	"·": ",",
	"・": ",",
	"、": ",",
	"$": ".",
	"“": "'",
	"”": "'",
	'"': "'",
	"‘": "'",
	"’": "'",
	"（": "'",
	"）": "'",
	"(": "'",
	")": "'",
	"《": "'",
	"》": "'",
	"【": "'",
	"】": "'",
	"[": "'",
	"]": "'",
	"—": "-",
	"−": "-",
	"～": "-",
	"~": "-",
	"「": "'",
	"」": "'",
	}


	def replace_punctuation(text):
	pattern = re.compile("\|".join(re.escape(p) for p in rep_map.keys()))

	replaced_text = pattern.sub(lambda x: rep_map[x.group()], text)

	# replaced_text = re.sub(
	# r"[^\u3040-\u309F\u30A0-\u30FF\u4E00-\u9FFF\u3400-\u4DBF\u3005"
	# + "".join(punctuation)
	# + r"]+",
	# "",
	# replaced_text,
	# )

	return replaced_text


	def read_dict():
	g2p_dict = {}
	start_line = 49
	with open(CMU_DICT_PATH) as f:
	line = f.readline()
	line_index = 1
	while line:
	if line_index >= start_line:
	line = line.strip()
	word_split = line.split(" ")
	word = word_split[0]

	syllable_split = word_split[1].split(" - ")
	g2p_dict[word] = []
	for syllable in syllable_split:
	phone_split = syllable.split(" ")
	g2p_dict[word].append(phone_split)

	line_index = line_index + 1
	line = f.readline()

	return g2p_dict


	def cache_dict(g2p_dict, file_path):
	with open(file_path, "wb") as pickle_file:
	pickle.dump(g2p_dict, pickle_file)


	def get_dict():
	if os.path.exists(CACHE_PATH):
	with open(CACHE_PATH, "rb") as pickle_file:
	g2p_dict = pickle.load(pickle_file)
	else:
	g2p_dict = read_dict()
	cache_dict(g2p_dict, CACHE_PATH)

	return g2p_dict


	eng_dict = get_dict()


	def refine_ph(phn):
	tone = 0
	if re.search(r"\d$", phn):
	tone = int(phn[-1]) + 1
	phn = phn[:-1]
	else:
	tone = 3
	return phn.lower(), tone


	def refine_syllables(syllables):
	tones = []
	phonemes = []
	for phn_list in syllables:
	for i in range(len(phn_list)):
	phn = phn_list[i]
	phn, tone = refine_ph(phn)
	phonemes.append(phn)
	tones.append(tone)
	return phonemes, tones


	import re
	import inflect

	_inflect = inflect.engine()
	_comma_number_re = re.compile(r"([0-9][0-9\,]+[0-9])")
	_decimal_number_re = re.compile(r"([0-9]+\.[0-9]+)")
	_pounds_re = re.compile(r"£([0-9\,]*[0-9]+)")
	_dollars_re = re.compile(r"\$([0-9\.\,]*[0-9]+)")
	_ordinal_re = re.compile(r"[0-9]+(st\|nd\|rd\|th)")
	_number_re = re.compile(r"[0-9]+")

	# List of (regular expression, replacement) pairs for abbreviations:
	_abbreviations = [
	(re.compile("\\b%s\\." % x[0], re.IGNORECASE), x[1])
	for x in [
	("mrs", "misess"),
	("mr", "mister"),
	("dr", "doctor"),
	("st", "saint"),
	("co", "company"),
	("jr", "junior"),
	("maj", "major"),
	("gen", "general"),
	("drs", "doctors"),
	("rev", "reverend"),
	("lt", "lieutenant"),
	("hon", "honorable"),
	("sgt", "sergeant"),
	("capt", "captain"),
	("esq", "esquire"),
	("ltd", "limited"),
	("col", "colonel"),
	("ft", "fort"),
	]
	]


	# List of (ipa, lazy ipa) pairs:
	_lazy_ipa = [
	(re.compile("%s" % x[0]), x[1])
	for x in [
	("r", "ɹ"),
	("æ", "e"),
	("ɑ", "a"),
	("ɔ", "o"),
	("ð", "z"),
	("θ", "s"),
	("ɛ", "e"),
	("ɪ", "i"),
	("ʊ", "u"),
	("ʒ", "ʥ"),
	("ʤ", "ʥ"),
	("ˈ", "↓"),
	]
	]

	# List of (ipa, lazy ipa2) pairs:
	_lazy_ipa2 = [
	(re.compile("%s" % x[0]), x[1])
	for x in [
	("r", "ɹ"),
	("ð", "z"),
	("θ", "s"),
	("ʒ", "ʑ"),
	("ʤ", "dʑ"),
	("ˈ", "↓"),
	]
	]

	# List of (ipa, ipa2) pairs
	_ipa_to_ipa2 = [
	(re.compile("%s" % x[0]), x[1]) for x in [("r", "ɹ"), ("ʤ", "dʒ"), ("ʧ", "tʃ")]
	]


	def _expand_dollars(m):
	match = m.group(1)
	parts = match.split(".")
	if len(parts) > 2:
	return match + " dollars" # Unexpected format
	dollars = int(parts[0]) if parts[0] else 0
	cents = int(parts[1]) if len(parts) > 1 and parts[1] else 0
	if dollars and cents:
	dollar_unit = "dollar" if dollars == 1 else "dollars"
	cent_unit = "cent" if cents == 1 else "cents"
	return "%s %s, %s %s" % (dollars, dollar_unit, cents, cent_unit)
	elif dollars:
	dollar_unit = "dollar" if dollars == 1 else "dollars"
	return "%s %s" % (dollars, dollar_unit)
	elif cents:
	cent_unit = "cent" if cents == 1 else "cents"
	return "%s %s" % (cents, cent_unit)
	else:
	return "zero dollars"


	def _remove_commas(m):
	return m.group(1).replace(",", "")


	def _expand_ordinal(m):
	return _inflect.number_to_words(m.group(0))


	def _expand_number(m):
	num = int(m.group(0))
	if num > 1000 and num < 3000:
	if num == 2000:
	return "two thousand"
	elif num > 2000 and num < 2010:
	return "two thousand " + _inflect.number_to_words(num % 100)
	elif num % 100 == 0:
	return _inflect.number_to_words(num // 100) + " hundred"
	else:
	return _inflect.number_to_words(
	num, andword="", zero="oh", group=2
	).replace(", ", " ")
	else:
	return _inflect.number_to_words(num, andword="")


	def _expand_decimal_point(m):
	return m.group(1).replace(".", " point ")


	def normalize_numbers(text):
	text = re.sub(_comma_number_re, _remove_commas, text)
	text = re.sub(_pounds_re, r"\1 pounds", text)
	text = re.sub(_dollars_re, _expand_dollars, text)
	text = re.sub(_decimal_number_re, _expand_decimal_point, text)
	text = re.sub(_ordinal_re, _expand_ordinal, text)
	text = re.sub(_number_re, _expand_number, text)
	return text


	def text_normalize(text):
	text = normalize_numbers(text)
	text = replace_punctuation(text)
	text = re.sub(r"([,;.\?\!])([\w])", r"\1 \2", text)
	return text


	def distribute_phone(n_phone, n_word):
	phones_per_word = [0] * n_word
	for task in range(n_phone):
	min_tasks = min(phones_per_word)
	min_index = phones_per_word.index(min_tasks)
	phones_per_word[min_index] += 1
	return phones_per_word


	def sep_text(text):
	words = re.split(r"([,;.\?\!\s+])", text)
	words = [word for word in words if word.strip() != ""]
	return words


	def text_to_words(text):
	tokens = tokenizer.tokenize(text)
	words = []
	for idx, t in enumerate(tokens):
	if t.startswith("▁"):
	words.append([t[1:]])
	else:
	if t in punctuation:
	if idx == len(tokens) - 1:
	words.append([f"{t}"])
	else:
	if (
	not tokens[idx + 1].startswith("▁")
	and tokens[idx + 1] not in punctuation
	):
	if idx == 0:
	words.append([])
	words[-1].append(f"{t}")
	else:
	words.append([f"{t}"])
	else:
	if idx == 0:
	words.append([])
	words[-1].append(f"{t}")
	return words


	def g2p(text):
	phones = []
	tones = []
	phone_len = []
	# words = sep_text(text)
	# tokens = [tokenizer.tokenize(i) for i in words]
	words = text_to_words(text)

	for word in words:
	temp_phones, temp_tones = [], []
	if len(word) > 1:
	if "'" in word:
	word = ["".join(word)]
	for w in word:
	if w in punctuation:
	temp_phones.append(w)
	temp_tones.append(0)
	continue
	if w.upper() in eng_dict:
	phns, tns = refine_syllables(eng_dict[w.upper()])
	temp_phones += [post_replace_ph(i) for i in phns]
	temp_tones += tns
	# w2ph.append(len(phns))
	else:
	phone_list = list(filter(lambda p: p != " ", _g2p(w)))
	phns = []
	tns = []
	for ph in phone_list:
	if ph in arpa:
	ph, tn = refine_ph(ph)
	phns.append(ph)
	tns.append(tn)
	else:
	phns.append(ph)
	tns.append(0)
	temp_phones += [post_replace_ph(i) for i in phns]
	temp_tones += tns
	phones += temp_phones
	tones += temp_tones
	phone_len.append(len(temp_phones))
	# phones = [post_replace_ph(i) for i in phones]

	word2ph = []
	for token, pl in zip(words, phone_len):
	word_len = len(token)

	aaa = distribute_phone(pl, word_len)
	word2ph += aaa

	phones = ["_"] + phones + ["_"]
	tones = [0] + tones + [0]
	word2ph = [1] + word2ph + [1]
	assert len(phones) == len(tones), text
	assert len(phones) == sum(word2ph), text

	return phones, tones, word2ph


	def get_bert_feature(text, word2ph):
	from text import english_bert_mock

	return english_bert_mock.get_bert_feature(text, word2ph)


	if __name__ == "__main__":
	# print(get_dict())
	# print(eng_word_to_phoneme("hello"))
	print(g2p("In this paper, we propose 1 DSPGAN, a GAN-based universal vocoder."))
	# all_phones = set()
	# for k, syllables in eng_dict.items():
	# for group in syllables:
	# for ph in group:
	# all_phones.add(ph)
	# print(all_phones)