|
{ |
|
"base_config": "config/valle.json", |
|
"model_type": "VALLE", |
|
"dataset": [ |
|
"libritts" |
|
], |
|
"dataset_path": { |
|
"libritts": "[LibriTTS dataset path]" |
|
}, |
|
"preprocess": { |
|
"extract_phone": true, |
|
"phone_extractor": "espeak", |
|
"extract_acoustic_token": true, |
|
"use_phone": true, |
|
"use_acoustic_token": true, |
|
"processed_dir": "Amphion/data/", |
|
"sample_rate": 24000, |
|
"codec_hop_size": 320, |
|
"valid_file": "test.json", |
|
}, |
|
"model": { |
|
"prefix_mode": 1, |
|
}, |
|
"log_dir": "Amphion/ckpts/tts/valle", |
|
"train": { |
|
"batch_size": 4, |
|
"train_stage": 1, |
|
"max_epoch": 20, |
|
"use_dynamic_batchsize": true, |
|
"max_tokens": 4000, |
|
"max_sentences": 10 |
|
} |
|
} |
|
|