chattts

Running

chattts / modules /repos_static /resemble_enhance /enhancer /univnet /univnet.py

zhzluke96

update

d2b7e94 6 months ago

2.58 kB

	from typing import Union

	import numpy as np
	import torch
	import torch.nn.functional as F
	from torch import Tensor, nn
	from torch.nn.utils.parametrizations import weight_norm

	from ..hparams import HParams
	from .lvcnet import LVCBlock
	from .mrstft import MRSTFTLoss


	class UnivNet(nn.Module):
	@property
	def d_noise(self):
	return 128

	@property
	def strides(self):
	return [7, 5, 4, 3]

	@property
	def dilations(self):
	return [1, 3, 9, 27]

	@property
	def nc(self):
	return self.hp.univnet_nc

	@property
	def scale_factor(self) -> int:
	return self.hp.hop_size

	def __init__(self, hp: HParams, d_input):
	super().__init__()
	self.d_input = d_input

	self.hp = hp

	self.blocks = nn.ModuleList(
	[
	LVCBlock(
	self.nc,
	d_input,
	stride=stride,
	dilations=self.dilations,
	cond_hop_length=hop_length,
	kpnet_conv_size=3,
	)
	for stride, hop_length in zip(self.strides, np.cumprod(self.strides))
	]
	)

	self.conv_pre = weight_norm(
	nn.Conv1d(self.d_noise, self.nc, 7, padding=3, padding_mode="reflect")
	)

	self.conv_post = nn.Sequential(
	nn.LeakyReLU(0.2),
	weight_norm(nn.Conv1d(self.nc, 1, 7, padding=3, padding_mode="reflect")),
	nn.Tanh(),
	)

	self.mrstft = MRSTFTLoss(hp)

	@property
	def eps(self):
	return 1e-5

	def forward(self, x: Tensor, y: Union[Tensor, None] = None, npad=10):
	"""
	Args:
	x: (b c t), acoustic features
	y: (b t), waveform
	Returns:
	z: (b t), waveform
	"""
	assert x.ndim == 3, "x must be 3D tensor"
	assert y is None or y.ndim == 2, "y must be 2D tensor"
	assert (
	x.shape[1] == self.d_input
	), f"x.shape[1] must be {self.d_input}, but got {x.shape}"
	assert npad >= 0, "npad must be positive or zero"

	x = F.pad(x, (0, npad), "constant", 0)
	z = torch.randn(x.shape[0], self.d_noise, x.shape[2]).to(x)
	z = self.conv_pre(z) # (b c t)

	for block in self.blocks:
	z = block(z, x) # (b c t)

	z = self.conv_post(z) # (b 1 t)
	z = z[..., : -self.scale_factor * npad]
	z = z.squeeze(1) # (b t)

	if y is not None:
	self.losses = self.mrstft(z, y)

	return z