{ "base_config": "egs/vocoder/gan/exp_config_base.json", "model_type": "GANVocoder", "dataset": [ "ljspeech", "vctk", "libritts", ], "dataset_path": { // TODO: Fill in your dataset path "ljspeech": "[dataset path]", "vctk": "[dataset path]", "libritts": "[dataset path]", }, // TODO: Fill in the output log path. The default value is "Amphion/ckpts/vocoder" "log_dir": "ckpts/vocoder", "preprocess": { // TODO: Fill in the output data path. The default value is "Amphion/data" "processed_dir": "data", // acoustic features "extract_mel": true, "extract_audio": true, "extract_pitch": false, "extract_uv": false, "extract_amplitude_phase": false, "pitch_extractor": "parselmouth", // Features used for model training "use_mel": true, "use_frame_pitch": false, "use_uv": false, "use_audio": true, "n_mel": 100, "sample_rate": 24000 }, "model": { "generator": "hifigan", "discriminators": [ "msd", "mpd", "mssbcqtd", "msstftd", ], "hifigan": { "resblock": "1", "upsample_rates": [ 8, 4, 2, 2, 2 ], "upsample_kernel_sizes": [ 16, 8, 4, 4, 4 ], "upsample_initial_channel": 768, "resblock_kernel_sizes": [ 3, 5, 7 ], "resblock_dilation_sizes": [ [ 1, 3, 5 ], [ 1, 3, 5 ], [ 1, 3, 5 ] ] }, "mpd": { "mpd_reshapes": [ 2, 3, 5, 7, 11, 17, 23, 37 ], "use_spectral_norm": false, "discriminator_channel_multi": 1 } }, "train": { "batch_size": 32, "adamw": { "lr": 2.0e-4, "adam_b1": 0.8, "adam_b2": 0.99 }, "exponential_lr": { "lr_decay": 0.999 }, "criterions": [ "feature", "discriminator", "generator", "mel", ] }, "inference": { "batch_size": 1, } }