|
vocab_size: 101 |
|
embedding_dim: 128 |
|
|
|
in_channels: 128 |
|
out_channels: 1 |
|
resblock_type: "1" |
|
resblock_dilation_sizes: [[1, 3, 5], [1, 3, 5], [1, 3, 5]] |
|
resblock_kernel_sizes: [3, 7, 11] |
|
upsample_kernel_sizes: [11, 8, 8, 4, 4] |
|
upsample_initial_channel: 512 |
|
upsample_factors: [5, 4, 4, 2, 2] |
|
inference_padding: 5 |
|
cond_channels: 0 |
|
conv_post_bias: True |
|
|
|
var_pred_hidden_dim: 128 |
|
var_pred_kernel_size: 3 |
|
var_pred_dropout: 0.5 |
|
dur_prediction_weight: 1.0 |
|
|
|
generator: !new:speechbrain.lobes.models.HifiGAN.UnitHifiganGenerator |
|
in_channels: !ref <in_channels> |
|
out_channels: !ref <out_channels> |
|
resblock_type: !ref <resblock_type> |
|
resblock_dilation_sizes: !ref <resblock_dilation_sizes> |
|
resblock_kernel_sizes: !ref <resblock_kernel_sizes> |
|
upsample_kernel_sizes: !ref <upsample_kernel_sizes> |
|
upsample_initial_channel: !ref <upsample_initial_channel> |
|
upsample_factors: !ref <upsample_factors> |
|
inference_padding: !ref <inference_padding> |
|
cond_channels: !ref <cond_channels> |
|
conv_post_bias: !ref <conv_post_bias> |
|
vocab_size: !ref <vocab_size> |
|
embedding_dim: !ref <embedding_dim> |
|
duration_predictor: True |
|
var_pred_hidden_dim: !ref <var_pred_hidden_dim> |
|
var_pred_kernel_size: !ref <var_pred_kernel_size> |
|
var_pred_dropout: !ref <var_pred_dropout> |
|
pooling_type: none |
|
|
|
modules: |
|
generator: !ref <generator> |
|
|
|
pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer |
|
loadables: |
|
generator: !ref <generator> |