Spaces:
Runtime error
Runtime error
File size: 7,594 Bytes
8a1292d 9116564 8a1292d 9116564 8a1292d 9116564 cabd1e3 8a1292d 9116564 8a1292d 0b7f713 8a1292d fec9963 0b7f713 171db4e 8654ef4 4cecaf4 221b8b8 1d45c44 0b7f713 4cecaf4 f59b38e fec9963 8a1292d 0627904 9116564 8a1292d d2565d9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 |
import sys, os
if sys.platform == "darwin":
os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
import logging
logging.getLogger("numba").setLevel(logging.WARNING)
logging.getLogger("markdown_it").setLevel(logging.WARNING)
logging.getLogger("urllib3").setLevel(logging.WARNING)
logging.getLogger("matplotlib").setLevel(logging.WARNING)
logging.basicConfig(level=logging.INFO, format="| %(name)s | %(levelname)s | %(message)s")
logger = logging.getLogger(__name__)
import torch
import argparse
import commons
import utils
from models import SynthesizerTrn
from text.symbols import symbols
from text import cleaned_text_to_sequence, get_bert
from text.cleaner import clean_text
import gradio as gr
import webbrowser
net_g = None
def get_text(text, language_str, hps):
norm_text, phone, tone, word2ph = clean_text(text, language_str)
phone, tone, language = cleaned_text_to_sequence(phone, tone, language_str)
if hps.data.add_blank:
phone = commons.intersperse(phone, 0)
tone = commons.intersperse(tone, 0)
language = commons.intersperse(language, 0)
for i in range(len(word2ph)):
word2ph[i] = word2ph[i] * 2
word2ph[0] += 1
bert = get_bert(norm_text, word2ph, language_str)
del word2ph
assert bert.shape[-1] == len(phone)
phone = torch.LongTensor(phone)
tone = torch.LongTensor(tone)
language = torch.LongTensor(language)
return bert, phone, tone, language
import soundfile as sf
def infer(text, sdp_ratio, noise_scale, noise_scale_w, length_scale, sid):
global net_g
bert, phones, tones, lang_ids = get_text(text, "ZH", hps)
with torch.no_grad():
x_tst=phones.to(device).unsqueeze(0)
tones=tones.to(device).unsqueeze(0)
lang_ids=lang_ids.to(device).unsqueeze(0)
bert = bert.to(device).unsqueeze(0)
x_tst_lengths = torch.LongTensor([phones.size(0)]).to(device)
del phones
speakers = torch.LongTensor([hps.data.spk2id[sid]]).to(device)
audio = net_g.infer(x_tst, x_tst_lengths, speakers, tones, lang_ids, bert, sdp_ratio=sdp_ratio
, noise_scale=noise_scale, noise_scale_w=noise_scale_w, length_scale=length_scale)[0][0,0].data.cpu().float().numpy()
del x_tst, tones, lang_ids, bert, x_tst_lengths, speakers
sf.write("tmp.wav", audio, 44100)
return audio
def convert_wav_to_ogg(wav_file):
os.makedirs('out', exist_ok=True)
filename = os.path.splitext(os.path.basename(wav_file.name))[0]
output_path_ogg = os.path.join('out', f"out.ogg")
renamed_input_path = os.path.join('in', f"in.wav")
os.makedirs('in', exist_ok=True)
os.rename(wav_file.name, renamed_input_path)
command = ["ffmpeg", "-i", renamed_input_path, "-acodec", "libopus", "-y", output_path_ogg]
os.system(" ".join(command))
return output_path_ogg
def tts_fn(text, speaker, sdp_ratio, noise_scale, noise_scale_w, length_scale):
with torch.no_grad():
audio = infer(text, sdp_ratio=sdp_ratio, noise_scale=noise_scale, noise_scale_w=noise_scale_w, length_scale=length_scale, sid=speaker)
with open('tmp.wav', 'rb') as wav_file:
newogg = convert_wav_to_ogg(wav_file)
return "Success", (hps.data.sampling_rate, audio),newogg
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--model_dir", default="./logs/bfy/bfy_a.pth", help="path of your model")
parser.add_argument("--config_dir", default="./configs/config.json", help="path of your config file")
parser.add_argument("--share", default=False, help="make link public")
parser.add_argument("-d", "--debug", action="store_true", help="enable DEBUG-LEVEL log")
args = parser.parse_args()
if args.debug:
logger.info("Enable DEBUG-LEVEL log")
logging.basicConfig(level=logging.DEBUG)
hps = utils.get_hparams_from_file(args.config_dir)
device = "cuda:0" if torch.cuda.is_available() else "cpu"
'''
device = (
"cuda:0"
if torch.cuda.is_available()
else (
"mps"
if sys.platform == "darwin" and torch.backends.mps.is_available()
else "cpu"
)
)
'''
net_g = SynthesizerTrn(
len(symbols),
hps.data.filter_length // 2 + 1,
hps.train.segment_size // hps.data.hop_length,
n_speakers=hps.data.n_speakers,
**hps.model).to(device)
_ = net_g.eval()
_ = utils.load_checkpoint(args.model_dir, net_g, None, skip_optimizer=True)
speaker_ids = hps.data.spk2id
speakers = list(speaker_ids.keys())
with gr.Blocks() as app:
with gr.Row():
with gr.Column():
gr.Markdown(value="""
步非烟 Ver.a Bert-Vits2在线语音生成\n
1、模型作者:数字星瞳企划 https://t.me/xingtong25680 \n
2、原项目地址:https://github.com/Stardust-minus/Bert-VITS2\n
3、使用此模型进行二创请注明AI生成,以及原项目地址\n
4、素材来自散文朗读比赛,严禁将此项目用于一切违反《中华人民共和国宪法》,《中华人民共和国刑法》,《中华人民共和国治安管理处罚法》和《中华人民共和国民法典》之用途。严禁用于任何政治相关用途。 \n
""")
text = gr.TextArea(label="Text", placeholder="Input Text Here",
value="这里是数字星瞳企画,请在电报搜索星瞳全拼加二五六八零,获取最新更新进展。")
speaker = gr.Dropdown(choices=speakers, value=speakers[0], label='Speaker')
sdp_ratio = gr.Slider(minimum=0, maximum=1, value=0.2, step=0.01, label='语调变化')
noise_scale = gr.Slider(minimum=0.1, maximum=1.5, value=0.6, step=0.01, label='感情变化')
noise_scale_w = gr.Slider(minimum=0.1, maximum=1.4, value=0.8, step=0.01, label='音节发音长度变化')
length_scale = gr.Slider(minimum=0.1, maximum=2, value=1, step=0.01, label='语速')
btn = gr.Button("开启AI语音之旅吧!", variant="primary")
with gr.Column():
text_output = gr.Textbox(label="Message")
audio_output = gr.Audio(label="Output Audio")
ogg_output = gr.File(label="Converted OGG file")
gr.Markdown(value="""
模型汇总:\n
星瞳整合 https://huggingface.co/spaces/digitalxingtong/Xingtong-All-in-One\n
步非烟 Ver.a https://huggingface.co/spaces/digitalxingtong/Bufeiyan-a-Bert-VITS2
步非烟 Ver.b https://huggingface.co/spaces/digitalxingtong/Bufeiyan-b-Bert-VITS2
步非烟 Ver.c https://huggingface.co/spaces/digitalxingtong/Bufeiyan-c-Bert-VITS2
男声朗读 https://huggingface.co/spaces/digitalxingtong/Kanghui-Read-Bert-VITS2 \n
男声朗读(长文本) https://huggingface.co/spaces/digitalxingtong/Kanghui-Longread-Bert-VITS2\n
IGN 中国 https://huggingface.co/spaces/digitalxingtong/Ign-Read-Bert-VITS2 \n
IGN 中国(长文本)https://huggingface.co/spaces/digitalxingtong/Ign-Longread-Bert-VITS2 \n
""")
btn.click(tts_fn,
inputs=[text, speaker, sdp_ratio, noise_scale, noise_scale_w, length_scale],
outputs=[text_output, audio_output,ogg_output])
app.launch(show_error=True)
|