Spaces:

skytnt
/

moe-tts

Running on CPU Upgrade

moe-tts / text /ngu_dialect.py

update model and add chinese_dialect_lexicons

be80077 about 2 years ago

1.09 kB

	import re
	import opencc


	dialects = {'SZ': 'suzhou', 'WX': 'wuxi', 'CZ': 'changzhou', 'HZ': 'hangzhou',
	'SX': 'shaoxing', 'NB': 'ningbo', 'JJ': 'jingjiang', 'YX': 'yixing',
	'JD': 'jiading', 'ZR': 'zhenru', 'PH': 'pinghu', 'TX': 'tongxiang',
	'JS': 'jiashan', 'HN': 'xiashi', 'LP': 'linping', 'XS': 'xiaoshan',
	'FY': 'fuyang', 'RA': 'ruao', 'CX': 'cixi', 'SM': 'sanmen',
	'TT': 'tiantai', 'WZ': 'wenzhou', 'SC': 'suichang', 'YB': 'youbu'}

	converters = {}

	for dialect in dialects.values():
	try:
	converters[dialect] = opencc.OpenCC("chinese_dialect_lexicons/"+dialect)
	except:
	pass


	def ngu_dialect_to_ipa(text, dialect):
	dialect = dialects[dialect]
	text = converters[dialect].convert(text).replace('-','').replace('$',' ')
	text = re.sub(r'[、；：]', '，', text)
	text = re.sub(r'\s，\s', ', ', text)
	text = re.sub(r'\s。\s', '. ', text)
	text = re.sub(r'\s？\s', '? ', text)
	text = re.sub(r'\s！\s', '! ', text)
	text = re.sub(r'\s*$', '', text)
	return text