|
{ |
|
"exp_name": "facodec", |
|
"model_type": "FAcodec", |
|
"log_dir": "./runs/", |
|
"log_interval": 10, |
|
"save_interval": 1000, |
|
"device": "cuda", |
|
"epochs": 1000, |
|
"batch_size": 4, |
|
"batch_length": 100, |
|
"max_len": 80, |
|
"pretrained_model": "", |
|
"load_only_params": false, |
|
"F0_path": "modules/JDC/bst.t7", |
|
"dataset": "dummy", |
|
"preprocess_params": { |
|
"sr": 24000, |
|
"frame_rate": 80, |
|
"duration_range": [1.0, 25.0], |
|
"spect_params": { |
|
"n_fft": 2048, |
|
"win_length": 1200, |
|
"hop_length": 300, |
|
"n_mels": 80, |
|
}, |
|
}, |
|
"train": { |
|
"gradient_accumulation_step": 1, |
|
"batch_size": 1, |
|
"save_checkpoint_stride": [20], |
|
"random_seed": 1234, |
|
"max_epoch": -1, |
|
"max_frame_len": 80, |
|
"tracker": ["tensorboard"], |
|
"run_eval": [false], |
|
"sampler": {"holistic_shuffle": true, "drop_last": true}, |
|
"dataloader": {"num_worker": 0, "pin_memory": true}, |
|
}, |
|
"model_params": { |
|
"causal": true, |
|
"lstm": 2, |
|
"norm_f0": true, |
|
"use_gr_content_f0": false, |
|
"use_gr_prosody_phone": false, |
|
"use_gr_timbre_prosody": false, |
|
"separate_prosody_encoder": true, |
|
"n_c_codebooks": 2, |
|
"timbre_norm": true, |
|
"use_gr_content_global_f0": true, |
|
"DAC": { |
|
"encoder_dim": 64, |
|
"encoder_rates": [2, 5, 5, 6], |
|
"decoder_dim": 1536, |
|
"decoder_rates": [6, 5, 5, 2], |
|
"sr": 24000, |
|
}, |
|
}, |
|
"loss_params": { |
|
"base_lr": 0.0001, |
|
"warmup_steps": 200, |
|
"discriminator_iter_start": 2000, |
|
"lambda_spk": 1.0, |
|
"lambda_mel": 45, |
|
"lambda_f0": 1.0, |
|
"lambda_uv": 1.0, |
|
}, |
|
} |
|
|