Spaces:
Runtime error
Runtime error
import gradio as gr | |
import torch | |
import commons | |
import utils | |
from models import SynthesizerTrn | |
from text.symbols import symbols | |
from text import text_to_sequence | |
import random | |
import os | |
import datetime | |
import numpy as np | |
def get_text(text, hps): | |
text_norm = text_to_sequence(text, hps.data.text_cleaners) | |
if hps.data.add_blank: | |
text_norm = commons.intersperse(text_norm, 0) | |
text_norm = torch.LongTensor(text_norm) | |
return text_norm | |
def tts(txt, emotion, index, hps, net_g, random_emotion_root): | |
"""emotion为参考情感音频路径 或random_sample(随机抽取)""" | |
stn_tst = get_text(txt, hps) | |
with torch.no_grad(): | |
x_tst = stn_tst.unsqueeze(0) | |
x_tst_lengths = torch.LongTensor([stn_tst.size(0)]) | |
sid = torch.LongTensor([index]) ##appoint character | |
if os.path.exists(f"{emotion}.emo.npy"): | |
emo = torch.FloatTensor(np.load(f"{emotion}.emo.npy")).unsqueeze(0) | |
elif emotion == "random_sample": | |
while True: | |
rand_wav = random.sample(os.listdir(random_emotion_root), 1)[0] | |
if os.path.exists(f"{random_emotion_root}/{rand_wav}"): | |
break | |
emo = torch.FloatTensor(np.load(f"{random_emotion_root}/{rand_wav}")).unsqueeze(0) | |
print(f"{random_emotion_root}/{rand_wav}") | |
else: | |
print("emotion参数不正确") | |
audio = \ | |
net_g.infer(x_tst, x_tst_lengths, sid=sid, noise_scale=0.667, noise_scale_w=0.8, length_scale=1, emo=emo)[0][ | |
0, 0].data.float().numpy() | |
return audio | |
def random_generate(txt, index, hps, net_g, random_emotion_root): | |
# count = 0 | |
# path = "./TTSdemo/" | |
audio = tts(txt, emotion='random_sample', index=index, hps=hps, net_g=net_g, | |
random_emotion_root=random_emotion_root) | |
# curr_time = datetime.datetime.now() | |
# seed = str(curr_time.year) + str(curr_time.month) + str(curr_time.day) \ | |
# + str(curr_time.hour) + str(curr_time.minute) + str(curr_time.second) + '_' + str(num) | |
# count += 1 | |
return audio | |
def charaterRoot(name): | |
global random_emotion_root | |
if name == '九条都': | |
random_emotion_root = "./9nineEmo/my" | |
index = 0 | |
elif name == '新海天': | |
random_emotion_root = "./9nineEmo/sr" | |
index = 1 | |
elif name == '结城希亚': | |
random_emotion_root = "./9nineEmo/na" | |
index = 2 | |
elif name == '蕾娜': | |
random_emotion_root = "./9nineEmo/gt" | |
index = 3 | |
elif name == '索菲': | |
random_emotion_root = "./9nineEmo/sf" | |
index = 4 | |
return random_emotion_root, index | |
def configSelect(config): | |
global checkPonit, config_file | |
if config == 'mul': | |
config_file = "./configs/9nine_multi.json" | |
checkPonit = "logs/9nineM/G_115600.pth" | |
elif config == "single": | |
config_file = "./configs/sora.json" | |
checkPonit = "logs/sora/G_341200.pth" | |
return config_file, checkPonit | |
def runVits(name, config, txt): | |
config_file, checkPoint = configSelect(config) | |
random_emotion_root, index = charaterRoot(name=name) | |
checkPonit = checkPoint | |
hps = utils.get_hparams_from_file(config_file) | |
net_g = SynthesizerTrn( | |
len(symbols), | |
hps.data.filter_length // 2 + 1, | |
hps.train.segment_size // hps.data.hop_length, | |
n_speakers=hps.data.n_speakers, | |
**hps.model) | |
_ = net_g.eval() | |
_ = utils.load_checkpoint(checkPonit, net_g, None) | |
audio = random_generate(txt=txt, index=index, random_emotion_root=random_emotion_root, | |
net_g=net_g, hps=hps) | |
return hps.data.sampling_rate,audio | |
# app = gr.Blocks() | |
# with app: | |
# with gr.Tabs(): | |
# with gr.TabItem("9nine multiple model"): | |
# character = gr.Radio(['九条都', '新海天', '结城希亚', '蕾娜', '索菲'], label='character', | |
# info="select character you want") | |
# model = 'mul' | |
# text = gr.Textbox(label="input content", lines=4, visible=True) | |
# | |
# submit = gr.Button("generate", variant='privite') | |
# audio = gr.Audio(label="output") | |
# submit.click(runVits, [character, model, text], audio) | |
# with gr.TabItem("9nine single model"): | |
# character = gr.Radio(['九条都'], label='character', | |
# info="select character you want") | |
# model = 'single' | |
# text = gr.Textbox(label="input content", lines=4, visible=True), | |
# | |
# submit = gr.Button("generate", variant='privite') | |
# audio = gr.Audio(label="output") | |
# submit.click(runVits, [character, model, text], audio) | |
# app.launch(share=True) | |
if __name__ == '__main__': | |
demo = gr.Interface( | |
fn=runVits, | |
inputs=[ | |
gr.Radio(['九条都', '新海天', '结城希亚', '蕾娜', '索菲'], label='character', | |
info="select character you want"), | |
gr.Radio(['mul'], label='configfile', info="select models(recent multiple model only)"), | |
gr.Textbox(label="input content", lines=4, visible=True), | |
], outputs=[ | |
gr.Audio(label='output') | |
] | |
) | |
demo.launch(share=True) | |