chattts

Running

zhzluke96

update

01e655b 6 months ago

1.69 kB

	# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	from .text_normlization import *

	rep_map = {
	"：": ",",
	"；": ",",
	"，": ",",
	"。": ".",
	"！": "!",
	"？": "?",
	"\n": ".",
	"·": ",",
	"、": ",",
	"...": "…",
	"$": ".",
	"/": ",",
	"—": "-",
	"~": "…",
	"～": "…",
	}


	def replace_punctuation(text):
	text = text.replace("嗯", "恩").replace("呣", "母")
	pattern = re.compile("\|".join(re.escape(p) for p in rep_map.keys()))

	replaced_text = pattern.sub(lambda x: rep_map[x.group()], text)
	punctuation = ["!", "?", "…", ",", "."]
	replaced_text = re.sub(
	r"[^\u4e00-\u9fa5" + "".join(punctuation) + r"]+", "", replaced_text
	)
	print(replaced_text)

	return replaced_text


	def text_normalize(text):
	# https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/paddlespeech/t2s/frontend/zh_normalization
	tx = TextNormalizer()
	sentences = tx.normalize(text)
	dest_text = ""
	for sentence in sentences:
	dest_text += replace_punctuation(sentence)
	print(dest_text, sentence)
	return dest_text