rvc-ui / rvc /layers /nsf.py
Blane187's picture
Upload folder using huggingface_hub
7ff2ba3 verified
raw
history blame
8.18 kB
from typing import Optional, List
import math
import torch
from torch import nn
from torch.nn import Conv1d, ConvTranspose1d
from torch.nn import functional as F
from torch.nn.utils import remove_weight_norm, weight_norm
from .generators import SineGenerator
from .residuals import ResBlock1, ResBlock2, LRELU_SLOPE
from .utils import call_weight_data_normal_if_Conv
class SourceModuleHnNSF(torch.nn.Module):
"""SourceModule for hn-nsf
SourceModule(sampling_rate, harmonic_num=0, sine_amp=0.1,
add_noise_std=0.003, voiced_threshod=0)
sampling_rate: sampling_rate in Hz
harmonic_num: number of harmonic above F0 (default: 0)
sine_amp: amplitude of sine source signal (default: 0.1)
add_noise_std: std of additive Gaussian noise (default: 0.003)
note that amplitude of noise in unvoiced is decided
by sine_amp
voiced_threshold: threhold to set U/V given F0 (default: 0)
Sine_source, noise_source = SourceModuleHnNSF(F0_sampled)
F0_sampled (batchsize, length, 1)
Sine_source (batchsize, length, 1)
noise_source (batchsize, length 1)
uv (batchsize, length, 1)
"""
def __init__(
self,
sampling_rate: int,
harmonic_num: int = 0,
sine_amp: float = 0.1,
add_noise_std: float = 0.003,
voiced_threshod: int = 0,
):
super(SourceModuleHnNSF, self).__init__()
self.sine_amp = sine_amp
self.noise_std = add_noise_std
# to produce sine waveforms
self.l_sin_gen = SineGenerator(
sampling_rate, harmonic_num, sine_amp, add_noise_std, voiced_threshod
)
# to merge source harmonics into a single excitation
self.l_linear = torch.nn.Linear(harmonic_num + 1, 1)
self.l_tanh = torch.nn.Tanh()
def __call__(self, x: torch.Tensor, upp: int = 1) -> torch.Tensor:
return super().__call__(x, upp=upp)
def forward(self, x: torch.Tensor, upp: int = 1) -> torch.Tensor:
sine_wavs, _, _ = self.l_sin_gen(x, upp)
sine_wavs = sine_wavs.to(dtype=self.l_linear.weight.dtype)
sine_merge: torch.Tensor = self.l_tanh(self.l_linear(sine_wavs))
return sine_merge # , None, None # noise, uv
class NSFGenerator(torch.nn.Module):
def __init__(
self,
initial_channel: int,
resblock: str,
resblock_kernel_sizes: List[int],
resblock_dilation_sizes: List[List[int]],
upsample_rates: List[int],
upsample_initial_channel: int,
upsample_kernel_sizes: List[int],
gin_channels: int,
sr: int,
):
super(NSFGenerator, self).__init__()
self.num_kernels = len(resblock_kernel_sizes)
self.num_upsamples = len(upsample_rates)
self.f0_upsamp = torch.nn.Upsample(scale_factor=math.prod(upsample_rates))
self.m_source = SourceModuleHnNSF(sampling_rate=sr, harmonic_num=0)
self.noise_convs = nn.ModuleList()
self.conv_pre = Conv1d(
initial_channel, upsample_initial_channel, 7, 1, padding=3
)
resblock = ResBlock1 if resblock == "1" else ResBlock2
self.ups = nn.ModuleList()
for i, (u, k) in enumerate(zip(upsample_rates, upsample_kernel_sizes)):
c_cur = upsample_initial_channel // (2 ** (i + 1))
self.ups.append(
weight_norm(
ConvTranspose1d(
upsample_initial_channel // (2**i),
upsample_initial_channel // (2 ** (i + 1)),
k,
u,
padding=(k - u) // 2,
)
)
)
if i + 1 < len(upsample_rates):
stride_f0 = math.prod(upsample_rates[i + 1 :])
self.noise_convs.append(
Conv1d(
1,
c_cur,
kernel_size=stride_f0 * 2,
stride=stride_f0,
padding=stride_f0 // 2,
)
)
else:
self.noise_convs.append(Conv1d(1, c_cur, kernel_size=1))
self.resblocks = nn.ModuleList()
for i in range(len(self.ups)):
ch: int = upsample_initial_channel // (2 ** (i + 1))
for j, (k, d) in enumerate(
zip(resblock_kernel_sizes, resblock_dilation_sizes)
):
self.resblocks.append(resblock(ch, k, d))
self.conv_post = Conv1d(ch, 1, 7, 1, padding=3, bias=False)
self.ups.apply(call_weight_data_normal_if_Conv)
if gin_channels != 0:
self.cond = nn.Conv1d(gin_channels, upsample_initial_channel, 1)
self.upp = math.prod(upsample_rates)
self.lrelu_slope = LRELU_SLOPE
def __call__(
self,
x: torch.Tensor,
f0: torch.Tensor,
g: Optional[torch.Tensor] = None,
n_res: Optional[int] = None,
) -> torch.Tensor:
return super().__call__(x, f0, g=g, n_res=n_res)
def forward(
self,
x: torch.Tensor,
f0: torch.Tensor,
g: Optional[torch.Tensor] = None,
n_res: Optional[int] = None,
) -> torch.Tensor:
har_source = self.m_source(f0, self.upp)
har_source = har_source.transpose(1, 2)
if n_res is not None:
n_res = int(n_res)
if n_res * self.upp != har_source.shape[-1]:
har_source = F.interpolate(
har_source, size=n_res * self.upp, mode="linear"
)
if n_res != x.shape[-1]:
x = F.interpolate(x, size=n_res, mode="linear")
x = self.conv_pre(x)
if g is not None:
x = x + self.cond(g)
# torch.jit.script() does not support direct indexing of torch modules
# That's why I wrote this
for i, (ups, noise_convs) in enumerate(zip(self.ups, self.noise_convs)):
if i < self.num_upsamples:
x = F.leaky_relu(x, self.lrelu_slope)
x = ups(x)
x_source = noise_convs(har_source)
x = x + x_source
xs: Optional[torch.Tensor] = None
l = [i * self.num_kernels + j for j in range(self.num_kernels)]
for j, resblock in enumerate(self.resblocks):
if j in l:
if xs is None:
xs = resblock(x)
else:
xs += resblock(x)
# This assertion cannot be ignored! \
# If ignored, it will cause torch.jit.script() compilation errors
assert isinstance(xs, torch.Tensor)
x = xs / self.num_kernels
x = F.leaky_relu(x)
x = self.conv_post(x)
x = torch.tanh(x)
return x
def remove_weight_norm(self):
for l in self.ups:
remove_weight_norm(l)
for l in self.resblocks:
l.remove_weight_norm()
def __prepare_scriptable__(self):
for l in self.ups:
for hook in l._forward_pre_hooks.values():
# The hook we want to remove is an instance of WeightNorm class, so
# normally we would do `if isinstance(...)` but this class is not accessible
# because of shadowing, so we check the module name directly.
# https://github.com/pytorch/pytorch/blob/be0ca00c5ce260eb5bcec3237357f7a30cc08983/torch/nn/utils/__init__.py#L3
if (
hook.__module__ == "torch.nn.utils.weight_norm"
and hook.__class__.__name__ == "WeightNorm"
):
torch.nn.utils.remove_weight_norm(l)
for l in self.resblocks:
for hook in self.resblocks._forward_pre_hooks.values():
if (
hook.__module__ == "torch.nn.utils.weight_norm"
and hook.__class__.__name__ == "WeightNorm"
):
torch.nn.utils.remove_weight_norm(l)
return self