Spaces:
Running
Running
# Copyright (c) 2023 Amphion. | |
# | |
# This source code is licensed under the MIT license found in the | |
# LICENSE file in the root directory of this source tree. | |
import math | |
import torch | |
from torch import nn | |
from .fvq import FactorizedVectorQuantize | |
class ResidualVQ(nn.Module): | |
"""Follows Algorithm 1. in https://arxiv.org/pdf/2107.03312.pdf""" | |
def __init__(self, *, num_quantizers, codebook_size, **kwargs): | |
super().__init__() | |
VQ = FactorizedVectorQuantize | |
if type(codebook_size) == int: | |
codebook_size = [codebook_size] * num_quantizers | |
self.layers = nn.ModuleList( | |
[VQ(codebook_size=2**size, **kwargs) for size in codebook_size] | |
) | |
self.num_quantizers = num_quantizers | |
self.quantizer_dropout = kwargs.get("quantizer_dropout", 0.0) | |
self.dropout_type = kwargs.get("dropout_type", None) | |
def forward(self, x, n_quantizers=None): | |
quantized_out = 0.0 | |
residual = x | |
all_losses = [] | |
all_indices = [] | |
all_quantized = [] | |
if n_quantizers is None: | |
n_quantizers = self.num_quantizers | |
if self.training: | |
n_quantizers = torch.ones((x.shape[0],)) * self.num_quantizers + 1 | |
if self.dropout_type == "linear": | |
dropout = torch.randint(1, self.num_quantizers + 1, (x.shape[0],)) | |
elif self.dropout_type == "exp": | |
dropout = torch.randint( | |
1, int(math.log2(self.num_quantizers)), (x.shape[0],) | |
) | |
dropout = torch.pow(2, dropout) | |
n_dropout = int(x.shape[0] * self.quantizer_dropout) | |
n_quantizers[:n_dropout] = dropout[:n_dropout] | |
n_quantizers = n_quantizers.to(x.device) | |
for idx, layer in enumerate(self.layers): | |
if not self.training and idx >= n_quantizers: | |
break | |
quantized, indices, loss = layer(residual) | |
mask = ( | |
torch.full((x.shape[0],), fill_value=idx, device=x.device) | |
< n_quantizers | |
) | |
residual = residual - quantized | |
quantized_out = quantized_out + quantized * mask[:, None, None] | |
# loss | |
loss = (loss * mask).mean() | |
all_indices.append(indices) | |
all_losses.append(loss) | |
all_quantized.append(quantized) | |
all_losses, all_indices, all_quantized = map( | |
torch.stack, (all_losses, all_indices, all_quantized) | |
) | |
return quantized_out, all_indices, all_losses, all_quantized | |
def vq2emb(self, vq): | |
# vq: [n_quantizers, B, T] | |
quantized_out = 0.0 | |
for idx, layer in enumerate(self.layers): | |
quantized = layer.vq2emb(vq[idx]) | |
quantized_out += quantized | |
return quantized_out | |
def get_emb(self): | |
embs = [] | |
for idx, layer in enumerate(self.layers): | |
embs.append(layer.get_emb()) | |
return embs | |