{ "base_config": "config/vocoder.json", "model_type": "DiffusionVocoder", // TODO: Choose your needed datasets "dataset": [ "csd", "kising", "m4singer", "nus48e", "opencpop", "opensinger", "opera", "pjs", "popbutfy", "popcs", "ljspeech", "vctk", "libritts", ], "dataset_path": { // TODO: Fill in your dataset path "csd": "[dataset path]", "kising": "[dataset path]", "m4singer": "[dataset path]", "nus48e": "[dataset path]", "opencpop": "[dataset path]", "opensinger": "[dataset path]", "opera": "[dataset path]", "pjs": "[dataset path]", "popbutfy": "[dataset path]", "popcs": "[dataset path]", "ljspeech": "[dataset path]", "vctk": "[dataset path]", "libritts": "[dataset path]", }, // TODO: Fill in the output log path "log_dir": "ckpts/vocoder", "preprocess": { // Acoustic features "extract_mel": true, "extract_audio": true, "extract_pitch": false, "extract_uv": false, "pitch_extractor": "parselmouth", // Features used for model training "use_mel": true, "use_frame_pitch": false, "use_uv": false, "use_audio": true, // TODO: Fill in the output data path "processed_dir": "data/", "n_mel": 100, "sample_rate": 24000 }, "train": { // TODO: Choose a suitable batch size, training epoch, and save stride "batch_size": 32, "max_epoch": 1000000, "save_checkpoint_stride": [20], "adamw": { "lr": 2.0e-4, "adam_b1": 0.8, "adam_b2": 0.99 }, "exponential_lr": { "lr_decay": 0.999 }, } }