Spaces:
Running
Running
# -*- coding: utf-8 -*- | |
# Copyright 2019 Tomoki Hayashi | |
# MIT License (https://opensource.org/licenses/MIT) | |
"""STFT-based Loss modules.""" | |
import librosa | |
import torch | |
from modules.parallel_wavegan.losses import LogSTFTMagnitudeLoss, SpectralConvergengeLoss, stft | |
class STFTLoss(torch.nn.Module): | |
"""STFT loss module.""" | |
def __init__(self, fft_size=1024, shift_size=120, win_length=600, window="hann_window", | |
use_mel_loss=False): | |
"""Initialize STFT loss module.""" | |
super(STFTLoss, self).__init__() | |
self.fft_size = fft_size | |
self.shift_size = shift_size | |
self.win_length = win_length | |
self.window = getattr(torch, window)(win_length) | |
self.spectral_convergenge_loss = SpectralConvergengeLoss() | |
self.log_stft_magnitude_loss = LogSTFTMagnitudeLoss() | |
self.use_mel_loss = use_mel_loss | |
self.mel_basis = None | |
def forward(self, x, y): | |
"""Calculate forward propagation. | |
Args: | |
x (Tensor): Predicted signal (B, T). | |
y (Tensor): Groundtruth signal (B, T). | |
Returns: | |
Tensor: Spectral convergence loss value. | |
Tensor: Log STFT magnitude loss value. | |
""" | |
x_mag = stft(x, self.fft_size, self.shift_size, self.win_length, self.window) | |
y_mag = stft(y, self.fft_size, self.shift_size, self.win_length, self.window) | |
if self.use_mel_loss: | |
if self.mel_basis is None: | |
self.mel_basis = torch.from_numpy(librosa.filters.mel(22050, self.fft_size, 80)).cuda().T | |
x_mag = x_mag @ self.mel_basis | |
y_mag = y_mag @ self.mel_basis | |
sc_loss = self.spectral_convergenge_loss(x_mag, y_mag) | |
mag_loss = self.log_stft_magnitude_loss(x_mag, y_mag) | |
return sc_loss, mag_loss | |
class MultiResolutionSTFTLoss(torch.nn.Module): | |
"""Multi resolution STFT loss module.""" | |
def __init__(self, | |
fft_sizes=[1024, 2048, 512], | |
hop_sizes=[120, 240, 50], | |
win_lengths=[600, 1200, 240], | |
window="hann_window", | |
use_mel_loss=False): | |
"""Initialize Multi resolution STFT loss module. | |
Args: | |
fft_sizes (list): List of FFT sizes. | |
hop_sizes (list): List of hop sizes. | |
win_lengths (list): List of window lengths. | |
window (str): Window function type. | |
""" | |
super(MultiResolutionSTFTLoss, self).__init__() | |
assert len(fft_sizes) == len(hop_sizes) == len(win_lengths) | |
self.stft_losses = torch.nn.ModuleList() | |
for fs, ss, wl in zip(fft_sizes, hop_sizes, win_lengths): | |
self.stft_losses += [STFTLoss(fs, ss, wl, window, use_mel_loss)] | |
def forward(self, x, y): | |
"""Calculate forward propagation. | |
Args: | |
x (Tensor): Predicted signal (B, T). | |
y (Tensor): Groundtruth signal (B, T). | |
Returns: | |
Tensor: Multi resolution spectral convergence loss value. | |
Tensor: Multi resolution log STFT magnitude loss value. | |
""" | |
sc_loss = 0.0 | |
mag_loss = 0.0 | |
for f in self.stft_losses: | |
sc_l, mag_l = f(x, y) | |
sc_loss += sc_l | |
mag_loss += mag_l | |
sc_loss /= len(self.stft_losses) | |
mag_loss /= len(self.stft_losses) | |
return sc_loss, mag_loss | |