Spaces:

AriaMei
/

TTSdemo

Running

App Files Files Community

TTSdemo / app.py

AriaMei

Update app.py

7c16f80 over 1 year ago

raw

history blame contribute delete

6.36 kB

	import gradio as gr
	import torch
	import commons
	import utils
	from models import SynthesizerTrn
	from text.symbols import symbols
	from text import text_to_sequence
	import random
	import os
	import datetime
	import numpy as np


	def get_text(text, hps):
	text_norm = text_to_sequence(text, hps.data.text_cleaners)
	if hps.data.add_blank:
	text_norm = commons.intersperse(text_norm, 0)
	text_norm = torch.LongTensor(text_norm)
	return text_norm


	def tts(txt, emotion, index, hps, net_g, random_emotion_root):
	"""emotion为参考情感音频路径或random_sample（随机抽取）"""
	stn_tst = get_text(txt, hps)
	rand_wav = ""
	with torch.no_grad():
	x_tst = stn_tst.unsqueeze(0)
	x_tst_lengths = torch.LongTensor([stn_tst.size(0)])
	sid = torch.LongTensor([index]) ##appoint character
	if os.path.exists(f"{emotion}"):
	emo = torch.FloatTensor(np.load(f"{emotion}")).unsqueeze(0)
	rand_wav = emotion
	elif emotion == "random_sample":
	while True:
	rand_wav = random.sample(os.listdir(random_emotion_root), 1)[0]
	if os.path.exists(f"{random_emotion_root}/{rand_wav}"):
	break
	emo = torch.FloatTensor(np.load(f"{random_emotion_root}/{rand_wav}")).unsqueeze(0)
	print(f"{random_emotion_root}/{rand_wav}")
	else:
	print("emotion参数不正确")

	audio = \
	net_g.infer(x_tst, x_tst_lengths, sid=sid, noise_scale=0.667, noise_scale_w=0.8, length_scale=1, emo=emo)[
	0][
	0, 0].data.float().numpy()
	path = random_emotion_root+"/"+rand_wav
	return audio,path


	def random_generate(txt, index, hps, net_g, random_emotion_root):

	audio ,rand_wav= tts(txt, emotion='random_sample', index=index, hps=hps, net_g=net_g,
	random_emotion_root=random_emotion_root)
	return audio,rand_wav


	def charaterRoot(name):
	global random_emotion_root
	if name == '九条都':
	random_emotion_root = "9nineEmo/my"
	index = 0
	elif name == '新海天':
	random_emotion_root = "9nineEmo/sr"
	index = 1
	elif name == '结城希亚':
	random_emotion_root = "9nineEmo/na"
	index = 2
	elif name == '蕾娜':
	random_emotion_root = "9nineEmo/gt"
	index = 3
	elif name == '索菲':
	random_emotion_root = "9nineEmo/sf"
	index = 4
	return random_emotion_root, index


	def configSelect(config):
	global checkPonit, config_file
	if config == 'mul':
	config_file = "./configs/9nine_multi.json"
	checkPonit = "logs/9nineM/G_252000.pth"
	elif config == "single":
	config_file = "./configs/sora.json"
	checkPonit = "logs/sora/G_341200.pth"
	return config_file, checkPonit


	def runVits(name, config, txt,emotion):
	config_file, checkPoint = configSelect(config)
	random_emotion_root, index = charaterRoot(name=name)
	checkPonit = checkPoint
	hps = utils.get_hparams_from_file(config_file)
	net_g = SynthesizerTrn(
	len(symbols),
	hps.data.filter_length // 2 + 1,
	hps.train.segment_size // hps.data.hop_length,
	n_speakers=hps.data.n_speakers,
	**hps.model)
	_ = net_g.eval()

	_ = utils.load_checkpoint(checkPonit, net_g, None)
	audio, rand_wav = tts(txt, emotion=emotion, index=index, hps=hps, net_g=net_g,
	random_emotion_root=random_emotion_root)
	return (hps.data.sampling_rate, audio),rand_wav


	def nineMul(name, txt):
	config = 'mul'
	audio ,rand_wav= runVits(name, config, txt,'random_sample')
	return "multiple model success", audio,rand_wav


	def nineSingle(name,txt):
	config = 'single'
	# name = "新海天"
	audio ,rand_wav= runVits(name, config, txt,'random_sample')
	return "single model success", audio,rand_wav

	def nineMul_select_emo(name, txt,emo):
	config = 'mul'
	# emo = "./9nine"emotion
	print(emo)
	audio, _ = runVits(name, config, txt, emo)
	message = "情感依赖：" + emo + "sythesis success!"
	return message,audio

	app = gr.Blocks()
	with app:
	with gr.Tabs():
	with gr.TabItem("9nine multiple model"):
	character = gr.Radio(['九条都', '新海天', '结城希亚', '蕾娜', '索菲'], label='character',
	info="select character you want")

	text = gr.TextArea(label="input content，Japanese support only", value="祭りに行っただよね、知らない女の子と一緒にいて。")

	submit = gr.Button("generate", variant='privite')
	message = gr.Textbox(label="Message")
	audio = gr.Audio(label="output")
	emotion = gr.Textbox(label="参照情感:")
	submit.click(nineMul, [character, text], [message, audio,emotion])
	with gr.TabItem("9nine single model"):
	character = gr.Radio(['新海天'], label='character',
	info="single model for 新海天 only")

	text = gr.TextArea(label="input content，Japanese support only", value="祭りに行っただよね、知らない女の子と一緒にいて。")

	submit = gr.Button("generate", variant='privite')
	message = gr.Textbox(label="Message")
	audio = gr.Audio(label="output")
	emotion = gr.Textbox(label="参照情感:")
	submit.click(nineSingle, [character, text], [message, audio,emotion])
	with gr.TabItem("Choose Emotion Embedding"):
	character = gr.Radio(['九条都', '新海天', '结城希亚', '蕾娜', '索菲'], label='character',
	info="select character you want")

	text = gr.TextArea(label="input content, Japanese support only", value="祭りに行っただよね、知らない女の子と一緒にいて。")
	emotion = gr.Textbox(label="从多人模型中获得的情感依照。例如”./9nineEmo/sf/sf0207.wav.emo.npy“,尽量使用本人的情感他人的情感会串味")
	submit = gr.Button("generate", variant='privite')
	message = gr.Textbox(label="Message")
	audio = gr.Audio(label="output")

	submit.click(nineMul_select_emo, [character, text,emotion], [message, audio])
	app.launch()