{ "base_config": "config/ns2.json", "model_type": "NaturalSpeech2", "dataset": [ "libritts" ], "preprocess": { "use_mel": false, "use_code": true, "use_spkid": true, "use_pitch": true, "use_duration": true, "use_phone": true, "use_len": true, "use_cross_reference": true, "train_file": "train.json", "valid_file": "test.json", "melspec_dir": "mel", "code_dir": "code", "pitch_dir": "pitch", "duration_dir": "duration", "metadata_dir": "metadata", "read_metadata": true, "clip_mode": "start" }, "model": { "latent_dim": 128, "prior_encoder": { "vocab_size": 100, "pitch_min": 50, "pitch_max": 1100, "pitch_bins_num": 512, "encoder": { "encoder_layer": 6, "encoder_hidden": 512, "encoder_head": 8, "conv_filter_size": 2048, "conv_kernel_size": 9, "encoder_dropout": 0.2, "use_cln": true }, "duration_predictor": { "input_size": 512, "filter_size": 512, "kernel_size": 3, "conv_layers": 30, "cross_attn_per_layer": 3, "attn_head": 8, "drop_out": 0.5 }, "pitch_predictor": { "input_size": 512, "filter_size": 512, "kernel_size": 5, "conv_layers": 30, "cross_attn_per_layer": 3, "attn_head": 8, "drop_out": 0.5 } }, "diffusion": { "wavenet": { "input_size": 128, "hidden_size": 512, "out_size": 128, "num_layers": 40, "cross_attn_per_layer": 3, "dilation_cycle": 2, "attn_head": 8, "drop_out": 0.2 }, "beta_min": 0.05, "beta_max": 20, "sigma": 1.0, "noise_factor": 1.0, "ode_solver": "euler", "diffusion_type": "diffusion" }, "prompt_encoder": { "encoder_layer": 6, "encoder_hidden": 512, "encoder_head": 8, "conv_filter_size": 2048, "conv_kernel_size": 9, "encoder_dropout": 0.2, "use_cln": false }, "query_emb": { "query_token_num": 32, "hidden_size": 512, "head_num": 8 }, "inference_step": 500 }, "train": { "use_dynamic_batchsize": true, "max_tokens": 7500, "max_sentences": 32, "lr_warmup_steps": 5000, "lr_scheduler": "cosine", "num_train_steps": 800000, "adam": { "lr": 7.5e-5 }, "diff_ce_loss_lambda": 0.5, "diff_noise_loss_lambda": 1.0, "ddp": false, "random_seed": 114, "batch_size": 32, "epochs": 5000, "max_steps": 1000000, "total_training_steps": 800000, "save_summary_steps": 500, "save_checkpoints_steps": 2000, "valid_interval": 2000, "keep_checkpoint_max": 100 } }