Spaces:
Running
on
Zero
Running
on
Zero
{ | |
"base_config": "config/svc/base.json", | |
"model": { | |
"condition_encoder": { | |
"merge_mode": "add", | |
// Prosody Features | |
"use_f0": true, | |
"use_uv": true, | |
"use_energy": true, | |
// Quantization (0 for not quantization) | |
"input_melody_dim": 1, | |
"n_bins_melody": 256, | |
"output_melody_dim": 384, | |
"input_loudness_dim": 1, | |
"n_bins_loudness": 256, | |
"output_loudness_dim": 384, | |
// Semantic Features | |
"use_whisper": false, | |
"use_contentvec": false, | |
"use_wenet": false, | |
"use_mert": false, | |
"whisper_dim": 1024, | |
"contentvec_dim": 256, | |
"mert_dim": 256, | |
"wenet_dim": 512, | |
"content_encoder_dim": 384, | |
// Speaker Features | |
"output_singer_dim": 384, | |
"singer_table_size": 512, | |
"use_spkid": true | |
}, | |
"diffusion": { | |
"scheduler": "ddpm", | |
"scheduler_settings": { | |
"num_train_timesteps": 1000, | |
"beta_start": 1.0e-4, | |
"beta_end": 0.02, | |
"beta_schedule": "linear" | |
}, | |
// Diffusion steps encoder | |
"step_encoder": { | |
"dim_raw_embedding": 128, | |
"dim_hidden_layer": 512, | |
"activation": "SiLU", | |
"num_layer": 2, | |
"max_period": 10000 | |
}, | |
// Diffusion decoder | |
"model_type": "bidilconv", | |
// bidilconv, unet2d, TODO: unet1d | |
"bidilconv": { | |
"base_channel": 384, | |
"n_res_block": 20, | |
"conv_kernel_size": 3, | |
"dilation_cycle_length": 4, | |
// specially, 1 means no dilation | |
"conditioner_size": 384 | |
}, | |
"unet2d": { | |
"in_channels": 1, | |
"out_channels": 1, | |
"down_block_types": [ | |
"CrossAttnDownBlock2D", | |
"CrossAttnDownBlock2D", | |
"CrossAttnDownBlock2D", | |
"DownBlock2D" | |
], | |
"mid_block_type": "UNetMidBlock2DCrossAttn", | |
"up_block_types": [ | |
"UpBlock2D", | |
"CrossAttnUpBlock2D", | |
"CrossAttnUpBlock2D", | |
"CrossAttnUpBlock2D" | |
], | |
"only_cross_attention": false | |
} | |
} | |
}, | |
"train": { | |
// Basic settings | |
"batch_size": 64, | |
"gradient_accumulation_step": 1, | |
"max_epoch": -1, | |
// -1 means no limit | |
"save_checkpoint_stride": [ | |
5, | |
20 | |
], | |
// unit is epoch | |
"keep_last": [ | |
3, | |
-1 | |
], | |
// -1 means infinite, if one number will broadcast | |
"run_eval": [ | |
false, | |
true | |
], | |
// if one number will broadcast | |
// Fix the random seed | |
"random_seed": 10086, | |
// Batchsampler | |
"sampler": { | |
"holistic_shuffle": true, | |
"drop_last": true | |
}, | |
// Dataloader | |
"dataloader": { | |
"num_worker": 32, | |
"pin_memory": true | |
}, | |
// Trackers | |
"tracker": [ | |
"tensorboard" | |
// "wandb", | |
// "cometml", | |
// "mlflow", | |
], | |
// Optimizer | |
"optimizer": "AdamW", | |
"adamw": { | |
"lr": 4.0e-4 | |
// nn model lr | |
}, | |
// LR Scheduler | |
"scheduler": "ReduceLROnPlateau", | |
"reducelronplateau": { | |
"factor": 0.8, | |
"patience": 10, | |
// unit is epoch | |
"min_lr": 1.0e-4 | |
} | |
}, | |
"inference": { | |
"diffusion": { | |
"scheduler": "pndm", | |
"scheduler_settings": { | |
"num_inference_timesteps": 1000 | |
} | |
} | |
} | |
} |