maskgct

Running

maskgct / text /g2p.py

Upload 61 files

7ee3434 verified 14 days ago

1.01 kB

	# Copyright (c) 2023 Amphion.
	#
	# This source code is licensed under the MIT license found in the
	# LICENSE file in the root directory of this source tree.

	import re
	from g2p_en import G2p
	from string import punctuation


	def read_lexicon(lex_path):
	lexicon = {}
	with open(lex_path) as f:
	for line in f:
	temp = re.split(r"\s+", line.strip("\n"))
	word = temp[0]
	phones = temp[1:]
	if word.lower() not in lexicon:
	lexicon[word.lower()] = phones
	return lexicon


	def preprocess_english(text, lexicon):
	text = text.rstrip(punctuation)

	g2p = G2p()
	phones = []
	words = re.split(r"([,;.\-\?\!\s+])", text)
	for w in words:
	if w.lower() in lexicon:
	phones += lexicon[w.lower()]
	else:
	phones += list(filter(lambda p: p != " ", g2p(w)))
	phones = "}{".join(phones)
	phones = re.sub(r"\{[^\w\s]?\}", "{sp}", phones)
	phones = phones.replace("}{", " ")

	return phones