Text-to-Speech
speechbrain
Swahili
TTS
speech-synthesis
Tacotron2
nairaxo commited on
Commit
4aa3330
1 Parent(s): 8fee85c

Create new file

Browse files
Files changed (1) hide show
  1. hyperparams.yaml +69 -0
hyperparams.yaml ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ################################
2
+ # Model: Tacotroon2 for TTS
3
+ # Authors: Artem Ploujnikov, Yingzhi Wang
4
+ # ################################
5
+
6
+ mask_padding: True
7
+ n_mel_channels: 80
8
+ n_symbols: 148
9
+ symbols_embedding_dim: 512
10
+ encoder_kernel_size: 5
11
+ encoder_n_convolutions: 3
12
+ encoder_embedding_dim: 512
13
+ attention_rnn_dim: 1024
14
+ attention_dim: 128
15
+ attention_location_n_filters: 32
16
+ attention_location_kernel_size: 31
17
+ n_frames_per_step: 1
18
+ decoder_rnn_dim: 1024
19
+ prenet_dim: 256
20
+ max_decoder_steps: 1000
21
+ gate_threshold: 0.5
22
+ p_attention_dropout: 0.1
23
+ p_decoder_dropout: 0.1
24
+ postnet_embedding_dim: 512
25
+ postnet_kernel_size: 5
26
+ postnet_n_convolutions: 5
27
+ decoder_no_early_stopping: False
28
+ sample_rate: 22050
29
+
30
+ # Model
31
+ model: !new:speechbrain.lobes.models.Tacotron2.Tacotron2
32
+ mask_padding: !ref <mask_padding>
33
+ n_mel_channels: !ref <n_mel_channels>
34
+ # symbols
35
+ n_symbols: !ref <n_symbols>
36
+ symbols_embedding_dim: !ref <symbols_embedding_dim>
37
+ # encoder
38
+ encoder_kernel_size: !ref <encoder_kernel_size>
39
+ encoder_n_convolutions: !ref <encoder_n_convolutions>
40
+ encoder_embedding_dim: !ref <encoder_embedding_dim>
41
+ # attention
42
+ attention_rnn_dim: !ref <attention_rnn_dim>
43
+ attention_dim: !ref <attention_dim>
44
+ # attention location
45
+ attention_location_n_filters: !ref <attention_location_n_filters>
46
+ attention_location_kernel_size: !ref <attention_location_kernel_size>
47
+ # decoder
48
+ n_frames_per_step: !ref <n_frames_per_step>
49
+ decoder_rnn_dim: !ref <decoder_rnn_dim>
50
+ prenet_dim: !ref <prenet_dim>
51
+ max_decoder_steps: !ref <max_decoder_steps>
52
+ gate_threshold: !ref <gate_threshold>
53
+ p_attention_dropout: !ref <p_attention_dropout>
54
+ p_decoder_dropout: !ref <p_decoder_dropout>
55
+ # postnet
56
+ postnet_embedding_dim: !ref <postnet_embedding_dim>
57
+ postnet_kernel_size: !ref <postnet_kernel_size>
58
+ postnet_n_convolutions: !ref <postnet_n_convolutions>
59
+ decoder_no_early_stopping: !ref <decoder_no_early_stopping>
60
+
61
+ # Function that converts the text into a sequence of valid characters.
62
+ text_to_sequence: !name:speechbrain.utils.text_to_sequence.text_to_sequence
63
+
64
+ modules:
65
+ model: !ref <model>
66
+
67
+ pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
68
+ loadables:
69
+ model: !ref <model>