Spaces:

tovaru
/

vits-for-ba

Runtime error

App Files Files Community

vits-for-ba / run.py

tovaru

initial commit

f2f3712 over 1 year ago

raw

history blame

1.74 kB

	import matplotlib.pyplot as plt
	import IPython.display as ipd

	import os
	import json
	import math
	import torch


	import commons
	import utils
	from models import SynthesizerTrn
	from text.symbols import symbols
	from text import text_to_sequence

	from scipy.io.wavfile import write


	def get_text(text, hps):
	text_norm = text_to_sequence(text, hps.data.text_cleaners)
	if hps.data.add_blank:
	text_norm = commons.intersperse(text_norm, 0)
	text_norm = torch.LongTensor(text_norm)
	return text_norm

	config_path = "C:\\Users\\zelda\\Documents\\GitHub\\vits-finetuning\\models\\kayoko\\config.json"
	model_path = "C:\\Users\\zelda\\Documents\\GitHub\\vits-finetuning\\models\\kayoko\\hayoko.pth"
	hps = utils.get_hparams_from_file(config_path)
	net_g = SynthesizerTrn(
	len(hps.symbols),
	hps.data.filter_length // 2 + 1,
	hps.train.segment_size // hps.data.hop_length,
	n_speakers=hps.data.n_speakers,
	**hps.model).cuda()
	model = net_g.eval()
	pythomodel = utils.load_checkpoint(model_path, net_g, None)

	speaker_id = 10 #@param {type:"number"}
	text = "\u306F\u3041... \u843D\u3061\u7740\u3044\u3066\u304F\u308C\u306A\u3044\u304B\uFF1F"
	noise_scale=0.6 #@param {type:"number"}
	noise_scale_w=0.668 #@param {type:"number"}
	length_scale=1.0 #@param {type:"number"}
	stn_tst = get_text(text, hps)
	with torch.no_grad():
	x_tst = stn_tst.cuda().unsqueeze(0)
	x_tst_lengths = torch.LongTensor([stn_tst.size(0)]).cuda()
	sid = torch.LongTensor([speaker_id]).cuda()
	audio = net_g.infer(x_tst, x_tst_lengths, sid=sid, noise_scale=noise_scale, noise_scale_w=noise_scale_w, length_scale=length_scale)[0][0,0].data.cpu().float().numpy()

	ipd.display(ipd.Audio(audio, rate=hps.data.sampling_rate, normalize=False))