vocab_size: 6001 embedding_dim: 1024 in_channels: 1024 out_channels: 1 resblock_type: "1" resblock_dilation_sizes: [[1, 3, 5], [1, 3, 5], [1, 3, 5]] resblock_kernel_sizes: [3, 7, 11] upsample_kernel_sizes: [11, 8, 8, 4, 4] upsample_initial_channel: 512 upsample_factors: [5, 4, 4, 2, 2] inference_padding: 5 cond_channels: 0 conv_post_bias: True generator: !new:speechbrain.lobes.models.HifiGAN.UnitHifiganGenerator in_channels: !ref out_channels: !ref resblock_type: !ref resblock_dilation_sizes: !ref resblock_kernel_sizes: !ref upsample_kernel_sizes: !ref upsample_initial_channel: !ref upsample_factors: !ref inference_padding: !ref cond_channels: !ref conv_post_bias: !ref vocab_size: !ref embedding_dim: !ref duration_predictor: False multi_speaker: False skip_token_embedding: True modules: generator: !ref pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer loadables: generator: !ref