Spaces:
Running
on
Zero
Running
on
Zero
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
from .text_normlization import * | |
rep_map = { | |
":": ",", | |
";": ",", | |
",": ",", | |
"。": ".", | |
"!": "!", | |
"?": "?", | |
"\n": ".", | |
"·": ",", | |
"、": ",", | |
"...": "…", | |
"$": ".", | |
"/": ",", | |
"—": "-", | |
"~": "…", | |
"~": "…", | |
} | |
def replace_punctuation(text): | |
text = text.replace("嗯", "恩").replace("呣", "母") | |
pattern = re.compile("|".join(re.escape(p) for p in rep_map.keys())) | |
replaced_text = pattern.sub(lambda x: rep_map[x.group()], text) | |
punctuation = ["!", "?", "…", ",", "."] | |
replaced_text = re.sub( | |
r"[^\u4e00-\u9fa5" + "".join(punctuation) + r"]+", "", replaced_text | |
) | |
print(replaced_text) | |
return replaced_text | |
def text_normalize(text): | |
# https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/paddlespeech/t2s/frontend/zh_normalization | |
tx = TextNormalizer() | |
sentences = tx.normalize(text) | |
dest_text = "" | |
for sentence in sentences: | |
dest_text += replace_punctuation(sentence) | |
print(dest_text, sentence) | |
return dest_text | |