# -*- coding: utf-8 -*- # Copyright 2019 Tomoki Hayashi # MIT License (https://opensource.org/licenses/MIT) """STFT-based Loss modules.""" import librosa import torch from text_to_speech.modules.vocoder.parallel_wavegan.losses import LogSTFTMagnitudeLoss, SpectralConvergengeLoss, stft class STFTLoss(torch.nn.Module): """STFT loss module.""" def __init__(self, fft_size=1024, shift_size=120, win_length=600, window="hann_window", use_mel_loss=False): """Initialize STFT loss module.""" super(STFTLoss, self).__init__() self.fft_size = fft_size self.shift_size = shift_size self.win_length = win_length self.window = getattr(torch, window)(win_length) self.spectral_convergenge_loss = SpectralConvergengeLoss() self.log_stft_magnitude_loss = LogSTFTMagnitudeLoss() self.use_mel_loss = use_mel_loss self.mel_basis = None def forward(self, x, y): """Calculate forward propagation. Args: x (Tensor): Predicted signal (B, T). y (Tensor): Groundtruth signal (B, T). Returns: Tensor: Spectral convergence loss value. Tensor: Log STFT magnitude loss value. """ if self.window.device != x.device: self.window = self.window.to(x.device) x_mag = stft(x, self.fft_size, self.shift_size, self.win_length, self.window) y_mag = stft(y, self.fft_size, self.shift_size, self.win_length, self.window) if self.use_mel_loss: if self.mel_basis is None: self.mel_basis = torch.from_numpy(librosa.filters.mel(22050, self.fft_size, 80)).cuda().T x_mag = x_mag @ self.mel_basis y_mag = y_mag @ self.mel_basis sc_loss = self.spectral_convergenge_loss(x_mag, y_mag) mag_loss = self.log_stft_magnitude_loss(x_mag, y_mag) return sc_loss, mag_loss class MultiResolutionSTFTLoss(torch.nn.Module): """Multi resolution STFT loss module.""" def __init__(self, fft_sizes=[1024, 2048, 512], hop_sizes=[120, 240, 50], win_lengths=[600, 1200, 240], window="hann_window", use_mel_loss=False): """Initialize Multi resolution STFT loss module. Args: fft_sizes (list): List of FFT sizes. hop_sizes (list): List of hop sizes. win_lengths (list): List of window lengths. window (str): Window function type. """ super(MultiResolutionSTFTLoss, self).__init__() assert len(fft_sizes) == len(hop_sizes) == len(win_lengths) self.stft_losses = torch.nn.ModuleList() for fs, ss, wl in zip(fft_sizes, hop_sizes, win_lengths): self.stft_losses += [STFTLoss(fs, ss, wl, window, use_mel_loss)] def forward(self, x, y): """Calculate forward propagation. Args: x (Tensor): Predicted signal (B, T). y (Tensor): Groundtruth signal (B, T). Returns: Tensor: Multi resolution spectral convergence loss value. Tensor: Multi resolution log STFT magnitude loss value. """ sc_loss = 0.0 mag_loss = 0.0 for f in self.stft_losses: sc_l, mag_l = f(x, y) sc_loss += sc_l mag_loss += mag_l sc_loss /= len(self.stft_losses) mag_loss /= len(self.stft_losses) return sc_loss, mag_loss