Nithya
updated parent repo and restructured things
98eb218
raw
history blame
4.42 kB
from __gin__ import dynamic_registration
from gamadhani import src
from gamadhani.src import dataset
from gamadhani.src import model_diffusion
from gamadhani.utils import pitch_to_audio_utils
from gamadhani.utils import utils
import torch
# Macros:
# ==============================================================================
AUDIO_SEQ_LEN = 750
LR = 0.0001
NFFT = 1024
NUM_MELS = 192
SINGER_CONDITIONING = True
SR = 16000
# Parameters for torch.optim.AdamW:
# ==============================================================================
torch.optim.AdamW.betas = (0.9, 0.99)
torch.optim.AdamW.lr = 0.0001
# Parameters for utils.build_warmed_exponential_lr_scheduler:
# ==============================================================================
utils.build_warmed_exponential_lr_scheduler.cycle_length = 480000
utils.build_warmed_exponential_lr_scheduler.eta_max = %LR
utils.build_warmed_exponential_lr_scheduler.eta_min = 0.1
utils.build_warmed_exponential_lr_scheduler.peak_iteration = 10000
utils.build_warmed_exponential_lr_scheduler.start_factor = 0.01
# Parameters for model_diffusion.UNetPitchConditioned.configure_optimizers:
# ==============================================================================
model_diffusion.UNetPitchConditioned.configure_optimizers.optimizer_cls = @torch.optim.AdamW
model_diffusion.UNetPitchConditioned.configure_optimizers.scheduler_cls = \
@utils.build_warmed_exponential_lr_scheduler
# Parameters for pitch_to_audio_utils.from_mels:
# ==============================================================================
pitch_to_audio_utils.from_mels.nfft = %NFFT
pitch_to_audio_utils.from_mels.num_mels = %NUM_MELS
pitch_to_audio_utils.from_mels.sr = %SR
# Parameters for pitch_to_audio_utils.normalized_mels_to_audio:
# ==============================================================================
pitch_to_audio_utils.normalized_mels_to_audio.n_iter = 100
pitch_to_audio_utils.normalized_mels_to_audio.nfft = %NFFT
pitch_to_audio_utils.normalized_mels_to_audio.num_mels = %NUM_MELS
pitch_to_audio_utils.normalized_mels_to_audio.sr = %SR
# Parameters for dataset.SequenceDataset:
# ==============================================================================
dataset.SequenceDataset.task = @dataset.Task()
# Parameters for dataset.Task:
# ==============================================================================
dataset.Task.read_fn = @dataset.load_cached_dataset
dataset.Task.kwargs = {"audio_len": %AUDIO_SEQ_LEN,
"return_singer": %SINGER_CONDITIONING}
# Parameters for pitch_to_audio_utils.torch_gl:
# ==============================================================================
pitch_to_audio_utils.torch_gl.n_iter = 200
pitch_to_audio_utils.torch_gl.nfft = %NFFT
pitch_to_audio_utils.torch_gl.sr = %SR
# Parameters for pitch_to_audio_utils.torch_istft:
# ==============================================================================
pitch_to_audio_utils.torch_istft.nfft = %NFFT
# Parameters for model_diffusion.UNetPitchConditioned:
# ==============================================================================
model_diffusion.UNetPitchConditioned.audio_seq_len = %AUDIO_SEQ_LEN
model_diffusion.UNetPitchConditioned.cfg = True
model_diffusion.UNetPitchConditioned.cond_drop_prob = 0.2
model_diffusion.UNetPitchConditioned.dropout = 0.3
model_diffusion.UNetPitchConditioned.f0_dim = 128
model_diffusion.UNetPitchConditioned.features = [512, 640, 1024]
model_diffusion.UNetPitchConditioned.inp_dim = %NUM_MELS
model_diffusion.UNetPitchConditioned.kernel_size = 5
model_diffusion.UNetPitchConditioned.log_samples_every = 10
model_diffusion.UNetPitchConditioned.log_wandb_samples_every = 50
model_diffusion.UNetPitchConditioned.loss_w_padding = True
model_diffusion.UNetPitchConditioned.nonlinearity = 'mish'
model_diffusion.UNetPitchConditioned.norm = False
model_diffusion.UNetPitchConditioned.num_attns = 4
model_diffusion.UNetPitchConditioned.num_convs = 4
model_diffusion.UNetPitchConditioned.num_heads = 8
model_diffusion.UNetPitchConditioned.project_dim = 256
model_diffusion.UNetPitchConditioned.singer_conditioning = %SINGER_CONDITIONING
model_diffusion.UNetPitchConditioned.singer_dim = 128
model_diffusion.UNetPitchConditioned.singer_vocab = 55
model_diffusion.UNetPitchConditioned.sr = %SR
model_diffusion.UNetPitchConditioned.strides = [4, 2, 2]
model_diffusion.UNetPitchConditioned.time_dim = 128