aioxlabs
/

tacotron-swahili

speech-synthesis

Model card Files Files and versions Community

nairaxo commited on Aug 22, 2022

Commit

4aa3330

•

1 Parent(s): 8fee85c

Create new file

Files changed (1) hide show

hyperparams.yaml +69 -0

hyperparams.yaml ADDED Viewed

	@@ -0,0 +1,69 @@

+# ################################
+# Model: Tacotroon2 for TTS
+# Authors: Artem Ploujnikov, Yingzhi Wang
+# ################################
+mask_padding: True
+n_mel_channels: 80
+n_symbols: 148
+symbols_embedding_dim: 512
+encoder_kernel_size: 5
+encoder_n_convolutions: 3
+encoder_embedding_dim: 512
+attention_rnn_dim: 1024
+attention_dim: 128
+attention_location_n_filters: 32
+attention_location_kernel_size: 31
+n_frames_per_step: 1
+decoder_rnn_dim: 1024
+prenet_dim: 256
+max_decoder_steps: 1000
+gate_threshold: 0.5
+p_attention_dropout: 0.1
+p_decoder_dropout: 0.1
+postnet_embedding_dim: 512
+postnet_kernel_size: 5
+postnet_n_convolutions: 5
+decoder_no_early_stopping: False
+sample_rate: 22050
+# Model
+model: !new:speechbrain.lobes.models.Tacotron2.Tacotron2
+  mask_padding: !ref <mask_padding>
+  n_mel_channels: !ref <n_mel_channels>
+  # symbols
+  n_symbols: !ref <n_symbols>
+  symbols_embedding_dim: !ref <symbols_embedding_dim>
+  # encoder
+  encoder_kernel_size: !ref <encoder_kernel_size>
+  encoder_n_convolutions: !ref <encoder_n_convolutions>
+  encoder_embedding_dim: !ref <encoder_embedding_dim>
+  # attention
+  attention_rnn_dim: !ref <attention_rnn_dim>
+  attention_dim: !ref <attention_dim>
+  # attention location
+  attention_location_n_filters: !ref <attention_location_n_filters>
+  attention_location_kernel_size: !ref <attention_location_kernel_size>
+  # decoder
+  n_frames_per_step: !ref <n_frames_per_step>
+  decoder_rnn_dim: !ref <decoder_rnn_dim>
+  prenet_dim: !ref <prenet_dim>
+  max_decoder_steps: !ref <max_decoder_steps>
+  gate_threshold: !ref <gate_threshold>
+  p_attention_dropout: !ref <p_attention_dropout>
+  p_decoder_dropout: !ref <p_decoder_dropout>
+  # postnet
+  postnet_embedding_dim: !ref <postnet_embedding_dim>
+  postnet_kernel_size: !ref <postnet_kernel_size>
+  postnet_n_convolutions: !ref <postnet_n_convolutions>
+  decoder_no_early_stopping: !ref <decoder_no_early_stopping>
+# Function that converts the text into a sequence of valid characters.
+text_to_sequence: !name:speechbrain.utils.text_to_sequence.text_to_sequence
+modules:
+    model: !ref <model>
+pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
+    loadables:
+        model: !ref <model>