emiyasstar commited on
Commit
ac85292
1 Parent(s): 649d904

Upload train_conformer_100h.yaml

Browse files
Files changed (1) hide show
  1. train_conformer_100h.yaml +91 -0
train_conformer_100h.yaml ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # network architecture
2
+ # encoder related
3
+ encoder: conformer
4
+ encoder_conf:
5
+ output_size: 512 # dimension of attention
6
+ attention_heads: 8
7
+ linear_units: 2048 # the number of units of position-wise feed forward
8
+ num_blocks: 12 # the number of encoder blocks
9
+ dropout_rate: 0.1
10
+ positional_dropout_rate: 0.0
11
+ attention_dropout_rate: 0.0
12
+ input_layer: conv2d # encoder input type, you can chose conv2d, conv2d6 and conv2d8
13
+ normalize_before: true
14
+ cnn_module_kernel: 31
15
+ use_cnn_module: True
16
+ activation_type: 'swish'
17
+ pos_enc_layer_type: 'rel_pos'
18
+ selfattention_layer_type: 'rel_selfattn'
19
+
20
+ # decoder related
21
+ decoder: transformer
22
+ decoder_conf:
23
+ attention_heads: 2
24
+ linear_units: 512
25
+ num_blocks: 1
26
+ dropout_rate: 0.1
27
+ positional_dropout_rate: 0.0
28
+ self_attention_dropout_rate: 0.0
29
+ src_attention_dropout_rate: 0.0
30
+
31
+ # hybrid CTC/attention
32
+ model_conf:
33
+ ctc_weight: 0.7
34
+ lsm_weight: 0.1 # label smoothing option
35
+ length_normalized_loss: false
36
+
37
+ # use raw_wav or kaldi feature
38
+ raw_wav: true
39
+
40
+ # dataset related
41
+ dataset_conf:
42
+ filter_conf:
43
+ max_length: 2000
44
+ min_length: 50
45
+ token_max_length: 400
46
+ token_min_length: 1
47
+ resample_conf:
48
+ resample_rate: 16000
49
+ speed_perturb: true
50
+ fbank_conf:
51
+ num_mel_bins: 80
52
+ frame_shift: 10
53
+ frame_length: 25
54
+ dither: 1.0
55
+ spec_aug: true
56
+ spec_aug_conf:
57
+ num_t_mask: 3
58
+ num_f_mask: 2
59
+ max_t: 50
60
+ max_f: 10
61
+ shuffle: true
62
+ shuffle_conf:
63
+ shuffle_size: 1500
64
+ sort: true
65
+ sort_conf:
66
+ sort_size: 500 # sort_size should be less than shuffle_size
67
+ batch_conf:
68
+ batch_type: 'static' # static or dynamic
69
+ batch_size: 10
70
+
71
+ pretrain: False
72
+ wav2vec_conf:
73
+ pretrain: False
74
+ quantize_targets: True
75
+ project_targets: True
76
+ latent_vars: 320
77
+ latent_dim: 512
78
+ latent_groups: 2
79
+ mask: False
80
+
81
+ grad_clip: 5
82
+ accum_grad: 1
83
+ max_epoch: 120
84
+ log_interval: 100
85
+
86
+ optim: adam
87
+ optim_conf:
88
+ lr: 0.001
89
+ scheduler: warmuplr # pytorch v1.1.0+ required
90
+ scheduler_conf:
91
+ warmup_steps: 15000