Spaces:
Runtime error
Runtime error
File size: 8,181 Bytes
8a1292d 9116564 8a1292d 9116564 8a1292d 9116564 cabd1e3 8a1292d 9116564 8a1292d 3da2529 8a1292d fec9963 c656884 171db4e fec9963 8654ef4 221b8b8 44b78c2 76aa9ba 87c7688 171db4e 12de674 171db4e fec9963 8a1292d 0627904 9116564 8a1292d d2565d9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 |
import sys, os
if sys.platform == "darwin":
os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
import logging
logging.getLogger("numba").setLevel(logging.WARNING)
logging.getLogger("markdown_it").setLevel(logging.WARNING)
logging.getLogger("urllib3").setLevel(logging.WARNING)
logging.getLogger("matplotlib").setLevel(logging.WARNING)
logging.basicConfig(level=logging.INFO, format="| %(name)s | %(levelname)s | %(message)s")
logger = logging.getLogger(__name__)
import torch
import argparse
import commons
import utils
from models import SynthesizerTrn
from text.symbols import symbols
from text import cleaned_text_to_sequence, get_bert
from text.cleaner import clean_text
import gradio as gr
import webbrowser
net_g = None
def get_text(text, language_str, hps):
norm_text, phone, tone, word2ph = clean_text(text, language_str)
phone, tone, language = cleaned_text_to_sequence(phone, tone, language_str)
if hps.data.add_blank:
phone = commons.intersperse(phone, 0)
tone = commons.intersperse(tone, 0)
language = commons.intersperse(language, 0)
for i in range(len(word2ph)):
word2ph[i] = word2ph[i] * 2
word2ph[0] += 1
bert = get_bert(norm_text, word2ph, language_str)
del word2ph
assert bert.shape[-1] == len(phone)
phone = torch.LongTensor(phone)
tone = torch.LongTensor(tone)
language = torch.LongTensor(language)
return bert, phone, tone, language
import soundfile as sf
def infer(text, sdp_ratio, noise_scale, noise_scale_w, length_scale, sid):
global net_g
bert, phones, tones, lang_ids = get_text(text, "ZH", hps)
with torch.no_grad():
x_tst=phones.to(device).unsqueeze(0)
tones=tones.to(device).unsqueeze(0)
lang_ids=lang_ids.to(device).unsqueeze(0)
bert = bert.to(device).unsqueeze(0)
x_tst_lengths = torch.LongTensor([phones.size(0)]).to(device)
del phones
speakers = torch.LongTensor([hps.data.spk2id[sid]]).to(device)
audio = net_g.infer(x_tst, x_tst_lengths, speakers, tones, lang_ids, bert, sdp_ratio=sdp_ratio
, noise_scale=noise_scale, noise_scale_w=noise_scale_w, length_scale=length_scale)[0][0,0].data.cpu().float().numpy()
del x_tst, tones, lang_ids, bert, x_tst_lengths, speakers
sf.write("tmp.wav", audio, 44100)
return audio
def convert_wav_to_ogg(wav_file):
os.makedirs('out', exist_ok=True)
filename = os.path.splitext(os.path.basename(wav_file.name))[0]
output_path_ogg = os.path.join('out', f"out.ogg")
renamed_input_path = os.path.join('in', f"in.wav")
os.makedirs('in', exist_ok=True)
os.rename(wav_file.name, renamed_input_path)
command = ["ffmpeg", "-i", renamed_input_path, "-acodec", "libopus", "-y", output_path_ogg]
os.system(" ".join(command))
return output_path_ogg
def tts_fn(text, speaker, sdp_ratio, noise_scale, noise_scale_w, length_scale):
with torch.no_grad():
audio = infer(text, sdp_ratio=sdp_ratio, noise_scale=noise_scale, noise_scale_w=noise_scale_w, length_scale=length_scale, sid=speaker)
with open('tmp.wav', 'rb') as wav_file:
newogg = convert_wav_to_ogg(wav_file)
return "Success", (hps.data.sampling_rate, audio),newogg
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--model_dir", default="./logs/xingtong/xt_new.pth", help="path of your model")
parser.add_argument("--config_dir", default="./configs/config.json", help="path of your config file")
parser.add_argument("--share", default=False, help="make link public")
parser.add_argument("-d", "--debug", action="store_true", help="enable DEBUG-LEVEL log")
args = parser.parse_args()
if args.debug:
logger.info("Enable DEBUG-LEVEL log")
logging.basicConfig(level=logging.DEBUG)
hps = utils.get_hparams_from_file(args.config_dir)
device = "cuda:0" if torch.cuda.is_available() else "cpu"
'''
device = (
"cuda:0"
if torch.cuda.is_available()
else (
"mps"
if sys.platform == "darwin" and torch.backends.mps.is_available()
else "cpu"
)
)
'''
net_g = SynthesizerTrn(
len(symbols),
hps.data.filter_length // 2 + 1,
hps.train.segment_size // hps.data.hop_length,
n_speakers=hps.data.n_speakers,
**hps.model).to(device)
_ = net_g.eval()
_ = utils.load_checkpoint(args.model_dir, net_g, None, skip_optimizer=True)
speaker_ids = hps.data.spk2id
speakers = list(speaker_ids.keys())
with gr.Blocks() as app:
with gr.Row():
with gr.Column():
gr.Markdown(value="""
扇宝 Bert-Vits2在线语音生成\n
1、模型作者:数字星瞳企划 https://t.me/xingtong25680 \n
\n
2、原项目地址:https://github.com/Stardust-minus/Bert-VITS2\n
3、使用此模型进行二创请注明AI生成,以及该项目地址。\n
4、如果想生成超长txt文本的音频请使用colab。 https://colab.research.google.com/drive/13ek8_j1aknr-pbjj3NXxSM4vBIsracU3?usp=drive_link\n
""")
text = gr.TextArea(label="Text", placeholder="Input Text Here",
value="这里是数字星瞳企画,请在电报搜索星瞳全拼加二五六八零,获取最新更新进展。")
speaker = gr.Dropdown(choices=speakers, value=speakers[0], label='Speaker')
sdp_ratio = gr.Slider(minimum=0, maximum=1, value=0.2, step=0.01, label='语调变化')
noise_scale = gr.Slider(minimum=0.1, maximum=1.5, value=0.6, step=0.01, label='感情变化')
noise_scale_w = gr.Slider(minimum=0.1, maximum=1.4, value=0.8, step=0.01, label='音节发音长度变化')
length_scale = gr.Slider(minimum=0.1, maximum=2, value=1, step=0.01, label='语速')
btn = gr.Button("开启AI语音之旅吧!", variant="primary")
with gr.Column():
text_output = gr.Textbox(label="Message")
audio_output = gr.Audio(label="Output Audio")
ogg_output = gr.File(label="Converted OGG file")
gr.Markdown(value="""
模型汇总:\n
星瞳 https://huggingface.co/spaces/digitalxingtong/Xingtong-Bert-Vits2 \n
星瞳 朗读专用 https://huggingface.co/spaces/digitalxingtong/Xingtong-Read-Bert-VITS2 \n
星瞳 长文本专用 https://huggingface.co/spaces/digitalxingtong/Xingtong-Longread-Bert-VITS2 \n
七海 https://huggingface.co/spaces/digitalxingtong/Nanami-Bert-Vits2 \n
阿梓 https://huggingface.co/spaces/digitalxingtong/Azusa-Bert-Vits2 \n
东雪莲 https://huggingface.co/spaces/digitalxingtong/Azuma-Bert-Vits2 \n
嘉然 https://huggingface.co/spaces/digitalxingtong/Jiaran-Bert-Vits2 \n
奶绿 杂谈 https://huggingface.co/spaces/digitalxingtong/Nailv-Bert-Vits2 \n
奶绿 朗读 https://huggingface.co/spaces/digitalxingtong/Nailv-read-Bert-Vits2 \n
露米 https://huggingface.co/spaces/digitalxingtong/Lumi-Bert-Vits2 \n
扇宝 https://huggingface.co/spaces/digitalxingtong/Shanbao-Bert-Vits2 \n
吉诺儿kino https://huggingface.co/spaces/digitalxingtong/Kino-Bert-Vits2 \n
恬豆 https://huggingface.co/spaces/digitalxingtong/Dou-Bert-Vits2 \n
牧牧白 https://huggingface.co/spaces/digitalxingtong/Miiu-Bert-Vits2 \n
鹿鸣 https://huggingface.co/spaces/digitalxingtong/Luming-Bert-Vits2 \n
永雏塔菲 https://huggingface.co/spaces/digitalxingtong/Taffy-Bert-VITS2 \n
""")
btn.click(tts_fn,
inputs=[text, speaker, sdp_ratio, noise_scale, noise_scale_w, length_scale],
outputs=[text_output, audio_output,ogg_output])
app.launch(show_error=True)
|