talktalkai-models

Running

talktalkai-models / infer_web.py

xJuuzouYTx

[REMOVE] binary files

20d05ae about 1 year ago

6.04 kB

	from vc_infer_pipeline import VC
	from myutils import Audio
	from infer_pack.models import (
	SynthesizerTrnMs256NSFsid,
	SynthesizerTrnMs256NSFsid_nono,
	SynthesizerTrnMs768NSFsid,
	SynthesizerTrnMs768NSFsid_nono,
	)
	from fairseq import checkpoint_utils
	from config import Config
	import torch
	import numpy as np
	import traceback
	import os
	import sys
	import warnings

	now_dir = os.getcwd()
	sys.path.append(now_dir)
	os.makedirs(os.path.join(now_dir, "audios"), exist_ok=True)
	os.makedirs(os.path.join(now_dir, "audio-outputs"), exist_ok=True)
	os.makedirs(os.path.join(now_dir, "weights"), exist_ok=True)
	warnings.filterwarnings("ignore")
	torch.manual_seed(114514)

	config = Config()

	hubert_model = None
	weight_root = "weights"

	def load_hubert():
	# Determinar si existe una tarjeta N que pueda usarse para entrenar y acelerar la inferencia.
	global hubert_model
	models, _, _ = checkpoint_utils.load_model_ensemble_and_task(
	["hubert_base.pt"],
	suffix="",
	)
	hubert_model = models[0]
	hubert_model = hubert_model.to(config.device)
	if config.is_half:
	hubert_model = hubert_model.half()
	else:
	hubert_model = hubert_model.float()
	hubert_model.eval()

	def vc_single(
	sid,
	input_audio_path0,
	input_audio_path1,
	f0_up_key,
	f0_file,
	f0_method,
	file_index,
	file_index2,
	# file_big_npy,
	index_rate,
	filter_radius,
	resample_sr,
	rms_mix_rate,
	protect,
	crepe_hop_length,
	):
	global tgt_sr, net_g, vc, hubert_model, version
	if input_audio_path0 is None or input_audio_path0 is None:
	return "You need to upload an audio", None
	f0_up_key = int(f0_up_key)
	try:
	if input_audio_path0 == "":
	audio = Audio.load_audio(input_audio_path1, 16000)
	else:
	audio = Audio.load_audio(input_audio_path0, 16000)

	audio_max = np.abs(audio).max() / 0.95
	if audio_max > 1:
	audio /= audio_max
	times = [0, 0, 0]
	if not hubert_model:
	load_hubert()
	if_f0 = cpt.get("f0", 1)
	file_index = (
	(
	file_index.strip(" ")
	.strip('"')
	.strip("\n")
	.strip('"')
	.strip(" ")
	.replace("trained", "added")
	)
	if file_index != ""
	else file_index2
	)

	audio_opt = vc.pipeline(
	hubert_model,
	net_g,
	sid,
	audio,
	input_audio_path1,
	times,
	f0_up_key,
	f0_method,
	file_index,
	# file_big_npy,
	index_rate,
	if_f0,
	filter_radius,
	tgt_sr,
	resample_sr,
	rms_mix_rate,
	version,
	protect,
	crepe_hop_length,
	f0_file=f0_file,
	)
	if tgt_sr != resample_sr >= 16000:
	tgt_sr = resample_sr
	index_info = (
	"Using index:%s." % file_index
	if os.path.exists(file_index)
	else "Index not used."
	)
	print(index_info)
	return "Success.\n %s\nTime:\n npy:%ss, f0:%ss, infer:%ss" % (
	index_info,
	times[0],
	times[1],
	times[2],
	), (tgt_sr, audio_opt)
	except:
	info = traceback.format_exc()
	print(info)
	return info, (None, None)

	def get_vc(model_name):
	global tgt_sr, net_g, vc, cpt, version

	# Comprobar si se pasó uno o varios modelos
	if model_name == "" or model_name == []:
	global hubert_model
	if hubert_model is not None: # 考虑到轮询, 需要加个判断看是否 sid 是由有模型切换到无模型的
	print("Limpiar caché")
	del net_g, vc, hubert_model, tgt_sr # ,cpt
	hubert_model = net_g = vc = hubert_model = tgt_sr = None

	# Si hay una GPU disponible, libera la memoria de la GPU
	if torch.cuda.is_available():
	torch.cuda.empty_cache()

	# Bloque de abajo no limpia completamente
	if_f0 = cpt.get("f0", 1)
	version = cpt.get("version", "v1")
	if version == "v1":
	if if_f0 == 1:
	net_g = SynthesizerTrnMs256NSFsid(
	*cpt["config"], is_half=config.is_half
	)
	else:
	net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
	elif version == "v2":
	if if_f0 == 1:
	net_g = SynthesizerTrnMs768NSFsid(
	*cpt["config"], is_half=config.is_half
	)
	else:
	net_g = SynthesizerTrnMs768NSFsid_nono(*cpt["config"])

	del net_g, cpt
	if torch.cuda.is_available():
	torch.cuda.empty_cache()
	cpt = None
	return {"success": False, "message": "No se proporcionó un sid"}

	person = "%s/%s" % (weight_root, model_name)
	print("Cargando %s" % person)
	cpt = torch.load(person, map_location="cpu")
	tgt_sr = cpt["config"][-1]
	cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0]
	if_f0 = cpt.get("f0", 1)
	version = cpt.get("version", "v1")

	if version == "v1":
	if if_f0 == 1:
	net_g = SynthesizerTrnMs256NSFsid(
	*cpt["config"], is_half=config.is_half)
	else:
	net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
	elif version == "v2":
	if if_f0 == 1:
	net_g = SynthesizerTrnMs768NSFsid(
	*cpt["config"], is_half=config.is_half)
	else:
	net_g = SynthesizerTrnMs768NSFsid_nono(*cpt["config"])
	del net_g.enc_q

	print(net_g.load_state_dict(cpt["weight"], strict=False))
	net_g.eval().to(config.device)
	if config.is_half:
	net_g = net_g.half()
	else:
	net_g = net_g.float()
	vc = VC(tgt_sr, config)