emiyasstar commited on
Commit
9edce30
1 Parent(s): 089b429

Upload train_conformer_pretrain_w2v.yaml

Browse files
Files changed (1) hide show
  1. train_conformer_pretrain_w2v.yaml +95 -0
train_conformer_pretrain_w2v.yaml ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # network architecture
2
+ # encoder related
3
+ encoder: conformer
4
+ encoder_conf:
5
+ output_size: 512 # dimension of attention
6
+ attention_heads: 8
7
+ linear_units: 2048 # the number of units of position-wise feed forward
8
+ num_blocks: 12 # the number of encoder blocks
9
+ dropout_rate: 0.1
10
+ positional_dropout_rate: 0.0
11
+ attention_dropout_rate: 0.0
12
+ input_layer: conv2d # encoder input type, you can chose conv2d, conv2d6 and conv2d8
13
+ normalize_before: true
14
+ cnn_module_kernel: 31
15
+ use_cnn_module: True
16
+ activation_type: 'swish'
17
+ pos_enc_layer_type: 'rel_pos'
18
+ selfattention_layer_type: 'rel_selfattn'
19
+
20
+ # decoder related
21
+ decoder: transformer
22
+ decoder_conf:
23
+ attention_heads: 8
24
+ linear_units: 2048
25
+ num_blocks: 6
26
+ dropout_rate: 0.1
27
+ positional_dropout_rate: 0.0
28
+ self_attention_dropout_rate: 0.0
29
+ src_attention_dropout_rate: 0.0
30
+
31
+ # hybrid CTC/attention
32
+ model_conf:
33
+ ctc_weight: 1.0
34
+ lsm_weight: 0.1 # label smoothing option
35
+ length_normalized_loss: false
36
+
37
+ # use raw_wav or kaldi feature
38
+ raw_wav: true
39
+
40
+ # dataset related
41
+ dataset_conf:
42
+ filter_conf:
43
+ max_length: 2000
44
+ min_length: 50
45
+ token_max_length: 400
46
+ token_min_length: 1
47
+ resample_conf:
48
+ resample_rate: 16000
49
+ speed_perturb: false
50
+ fbank_conf:
51
+ num_mel_bins: 80
52
+ frame_shift: 10
53
+ frame_length: 25
54
+ dither: 1.0
55
+ spec_aug: false
56
+ spec_aug_conf:
57
+ num_t_mask: 3
58
+ num_f_mask: 2
59
+ max_t: 50
60
+ max_f: 10
61
+ shuffle: true
62
+ shuffle_conf:
63
+ shuffle_size: 1500
64
+ sort: true
65
+ sort_conf:
66
+ sort_size: 500 # sort_size should be less than shuffle_size
67
+ batch_conf:
68
+ batch_type: 'dynamic' # static or dynamic
69
+ max_frames_in_batch: 20000
70
+ batch_size: 3
71
+
72
+ pretrain: True
73
+ wav2vec_conf:
74
+ pretrain: True
75
+ quantize_targets: True
76
+ project_targets: True
77
+ latent_vars: 320
78
+ latent_dim: 512
79
+ latent_groups: 2
80
+ w2v_ext_loss: True
81
+ w2v_loss_weights: [1.5,0]
82
+ mask: True
83
+ mask_prob: 0.65
84
+
85
+ grad_clip: 5
86
+ accum_grad: 4
87
+ max_epoch: 280
88
+ log_interval: 100
89
+
90
+ optim: adam
91
+ optim_conf:
92
+ lr: 0.002
93
+ scheduler: warmuplr # pytorch v1.1.0+ required
94
+ scheduler_conf:
95
+ warmup_steps: 25000