maskgct / egs /vocoder /gan /exp_config_base.json
Hecheng0625's picture
Upload 167 files
8c92a11 verified
{
"base_config": "config/vocoder.json",
"model_type": "GANVocoder",
// TODO: Choose your needed datasets
"dataset": [
"csd",
"kising",
"m4singer",
"nus48e",
"opencpop",
"opensinger",
"opera",
"pjs",
"popbutfy",
"popcs",
"ljspeech",
"vctk",
"libritts",
],
"dataset_path": {
// TODO: Fill in your dataset path
"csd": "[dataset path]",
"kising": "[dataset path]",
"m4singer": "[dataset path]",
"nus48e": "[dataset path]",
"opencpop": "[dataset path]",
"opensinger": "[dataset path]",
"opera": "[dataset path]",
"pjs": "[dataset path]",
"popbutfy": "[dataset path]",
"popcs": "[dataset path]",
"ljspeech": "[dataset path]",
"vctk": "[dataset path]",
"libritts": "[dataset path]",
},
// TODO: Fill in the output log path
"log_dir": "ckpts/vocoder",
"preprocess": {
// Acoustic features
"extract_mel": true,
"extract_audio": true,
"extract_pitch": false,
"extract_uv": false,
"pitch_extractor": "parselmouth",
// Features used for model training
"use_mel": true,
"use_frame_pitch": false,
"use_uv": false,
"use_audio": true,
// TODO: Fill in the output data path
"processed_dir": "data/",
"n_mel": 100,
"sample_rate": 24000
},
"model": {
// TODO: Choose your needed discriminators
"discriminators": [
"msd",
"mpd",
"msstftd",
"mssbcqtd",
],
"mpd": {
"mpd_reshapes": [
2,
3,
5,
7,
11
],
"use_spectral_norm": false,
"discriminator_channel_mult_factor": 1
},
"mrd": {
"resolutions": [[1024, 120, 600], [2048, 240, 1200], [512, 50, 240]],
"use_spectral_norm": false,
"discriminator_channel_mult_factor": 1,
"mrd_override": false
},
"msstftd": {
"filters": 32
},
"mssbcqtd": {
hop_lengths: [512, 256, 256],
filters: 32,
max_filters: 1024,
filters_scale: 1,
dilations: [1, 2, 4],
in_channels: 1,
out_channels: 1,
n_octaves: [9, 9, 9],
bins_per_octaves: [24, 36, 48]
},
},
"train": {
// TODO: Choose a suitable batch size, training epoch, and save stride
"batch_size": 32,
"max_epoch": 1000000,
"save_checkpoint_stride": [20],
"adamw": {
"lr": 2.0e-4,
"adam_b1": 0.8,
"adam_b2": 0.99
},
"exponential_lr": {
"lr_decay": 0.999
},
}
}