leo-emovits

Build error

App Files Files Community

leo-emovits / app.py

chilge

Update app.py

81470ae over 1 year ago

raw

history blame

3.47 kB

	import gradio as gr
	import os
	import random
	import IPython.display as ipd
	import commons
	import utils
	import json
	import torch
	import tempfile
	import numpy as np
	from models import SynthesizerTrn
	from text.symbols import symbols
	from text import text_to_sequence
	from scipy.io.wavfile import write

	def get_text(text, hps):
	text_norm = text_to_sequence(text, hps.data.text_cleaners)
	if hps.data.add_blank:
	text_norm = commons.intersperse(text_norm, 0)
	text_norm = torch.LongTensor(text_norm)
	return text_norm

	def get_text_byroma(text, hps):
	text_norm = []
	for i in text:
	text_norm.append(symbols.index(i))
	if hps.data.add_blank:
	text_norm = commons.intersperse(text_norm, 0)
	text_norm = torch.LongTensor(text_norm)
	return text_norm

	hps = utils.get_hparams_from_file("./configs/leo.json")
	net_g = SynthesizerTrn(
	len(symbols),
	hps.data.filter_length // 2 + 1,
	hps.train.segment_size // hps.data.hop_length,
	n_speakers=hps.data.n_speakers,
	**hps.model)
	_ = net_g.eval()
	_ = utils.load_checkpoint("logs/leo/G_4000.pth", net_g, None)

	# 随机抽取情感参考音频的根目录
	random_emotion_root = "wavs"
	emotion_dict = json.load(open("configs/leo.json", "r"))

	def tts(txt, emotion, temp_file_path):
	"""emotion为参考情感音频路径或random_sample（随机抽取）"""
	if roma:
	stn_tst = get_text_byroma(txt, hps)
	else:
	stn_tst = get_text(txt, hps)
	with torch.no_grad():
	x_tst = stn_tst.unsqueeze(0)
	x_tst_lengths = torch.LongTensor([stn_tst.size(0)])
	sid = torch.LongTensor([0])
	if os.path.exists(f"{emotion}.emo.npy"):
	emo = torch.FloatTensor(np.load(f"{emotion}.emo.npy")).unsqueeze(0)
	elif emotion == "random_sample":
	while True:
	rand_wav = random.sample(os.listdir(random_emotion_root), 1)[0]
	if rand_wav.endswith('wav') and os.path.exists(f"{random_emotion_root}/{rand_wav}.emo.npy"):
	break
	emo = torch.FloatTensor(np.load(f"{random_emotion_root}/{rand_wav}.emo.npy")).unsqueeze(0)
	print(f"{random_emotion_root}/{rand_wav}")
	elif emotion.endswith("wav"):
	import emotion_extract
	emo = torch.FloatTensor(emotion_extract.extract_wav(emotion))
	else:
	print("emotion参数不正确")

	audio = net_g.infer(x_tst, x_tst_lengths, sid=sid, noise_scale=0.667, noise_scale_w=0.8, length_scale=1.2, emo=emo)[0][0,0].data.float().numpy()

	# Save the numpy array as a temporary file
	write(temp_file_path, hps.data.sampling_rate, audio)

	# Display the audio
	ipd.display(ipd.Audio(temp_file_path, rate=hps.data.sampling_rate, normalize=False))

	# Delete the temporary file
	os.remove(temp_file_path)

	return audio

	def generate_audio(txt, emotion):
	# Create a temporary file
	temp_file = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
	temp_file_path = temp_file.name
	audio = tts(txt, emotion, temp_file_path)
	return audio

	input_text = gr.inputs.Textbox(label="输入文本")
	input_emotion = gr.inputs.Dropdown(choices=["random_sample"] + os.listdir(random_emotion_root), label="参考情感音频")
	output_audio = gr.outputs.Audio(type="numpy", label="合成音频")

	iface = gr.Interface(fn=generate_audio, inputs=[input_text, input_emotion], outputs=output_audio)
	iface.launch()