{ "exp_name": "facodec", "model_type": "FAcodec", "log_dir": "./runs/", "log_interval": 10, "save_interval": 1000, "device": "cuda", "epochs": 1000, "batch_size": 4, "batch_length": 100, "max_len": 80, "pretrained_model": "", "load_only_params": false, "F0_path": "modules/JDC/bst.t7", "dataset": "/path/to/dataset", "preprocess_params": { "sr": 24000, "frame_rate": 80, "duration_range": [1.0, 25.0], "spect_params": { "n_fft": 2048, "win_length": 1200, "hop_length": 300, "n_mels": 80 } }, "train": { "gradient_accumulation_step": 1, "batch_size": 1, "save_checkpoint_stride": [ 20 ], "random_seed": 1234, "max_epoch": -1, "max_frame_len": 80, "tracker": [ "tensorboard" ], "run_eval": [ false ], "sampler": { "holistic_shuffle": true, "drop_last": true }, "dataloader": { "num_worker": 0, "pin_memory": true } }, "model_params": { "causal": true, "lstm": 2, "norm_f0": true, "use_gr_content_f0": false, "use_gr_prosody_phone": false, "use_gr_timbre_prosody": false, "separate_prosody_encoder": true, "n_c_codebooks": 2, "timbre_norm": true, "use_gr_content_global_f0": true, "DAC": { "encoder_dim": 64, "encoder_rates": [2, 5, 5, 6], "decoder_dim": 1536, "decoder_rates": [6, 5, 5, 2], "sr": 24000 } }, "loss_params": { "base_lr": 0.0001, "warmup_steps": 200, "discriminator_iter_start": 2000, "lambda_spk": 1.0, "lambda_mel": 45, "lambda_f0": 1.0, "lambda_uv": 1.0 } }