{
  "base_config": "config/vits.json",
  "model_type": "VITS",
  "dataset": [
    "LJSpeech",
    //"hifitts"
  ],
  "dataset_path": {
    // TODO: Fill in your dataset path
    "LJSpeech": "[LJSpeech dataset path]",
    //"hifitts": "[Hi-Fi TTS dataset path]
  },
  // TODO: Fill in the output log path. The default value is "Amphion/ckpts/tts"
  "log_dir": "ckpts/tts",
  "preprocess": {
    //"extract_audio":true,
    "use_phone": true,
    // linguistic features
    "extract_phone": true,
    "phone_extractor": "espeak", // "espeak, pypinyin, pypinyin_initials_finals, lexicon (only for language=en-us right now)"
    // TODO: Fill in the output data path. The default value is "Amphion/data"
    "processed_dir": "data",
    "sample_rate": 22050, // target sampling rate
    "valid_file": "valid.json", // validation set
    //"use_spkid": true // use speaker ID to train multi-speaker TTS model
  },
  "model":{
    //"n_speakers": 10 // number of speakers, greater than or equal to the number of speakers in the dataset(s) used. The default value is 0 if not specified.
  },
  "train": {
    "batch_size": 16,
    //"multi_speaker_training": true 
  }
}