# network architecture | |
# encoder related | |
encoder: conformer | |
encoder_conf: | |
output_size: 512 # dimension of attention | |
attention_heads: 8 | |
linear_units: 2048 # the number of units of position-wise feed forward | |
num_blocks: 12 # the number of encoder blocks | |
dropout_rate: 0.1 | |
positional_dropout_rate: 0.0 | |
attention_dropout_rate: 0.0 | |
input_layer: conv2d # encoder input type, you can chose conv2d, conv2d6 and conv2d8 | |
normalize_before: true | |
cnn_module_kernel: 31 | |
use_cnn_module: True | |
activation_type: 'swish' | |
pos_enc_layer_type: 'rel_pos' | |
selfattention_layer_type: 'rel_selfattn' | |
# decoder related | |
decoder: transformer | |
decoder_conf: | |
attention_heads: 8 | |
linear_units: 2048 | |
num_blocks: 6 | |
dropout_rate: 0.1 | |
positional_dropout_rate: 0.0 | |
self_attention_dropout_rate: 0.0 | |
src_attention_dropout_rate: 0.0 | |
# hybrid CTC/attention | |
model_conf: | |
ctc_weight: 1.0 | |
lsm_weight: 0.1 # label smoothing option | |
length_normalized_loss: false | |
# use raw_wav or kaldi feature | |
raw_wav: true | |
# dataset related | |
dataset_conf: | |
filter_conf: | |
max_length: 2000 | |
min_length: 50 | |
token_max_length: 400 | |
token_min_length: 1 | |
resample_conf: | |
resample_rate: 16000 | |
speed_perturb: false | |
fbank_conf: | |
num_mel_bins: 80 | |
frame_shift: 10 | |
frame_length: 25 | |
dither: 1.0 | |
spec_aug: false | |
spec_aug_conf: | |
num_t_mask: 3 | |
num_f_mask: 2 | |
max_t: 50 | |
max_f: 10 | |
shuffle: true | |
shuffle_conf: | |
shuffle_size: 1500 | |
sort: true | |
sort_conf: | |
sort_size: 500 # sort_size should be less than shuffle_size | |
batch_conf: | |
batch_type: 'dynamic' # static or dynamic | |
max_frames_in_batch: 20000 | |
batch_size: 3 | |
pretrain: True | |
wav2vec_conf: | |
pretrain: True | |
quantize_targets: True | |
project_targets: True | |
latent_vars: 320 | |
latent_dim: 512 | |
latent_groups: 2 | |
w2v_ext_loss: True | |
w2v_loss_weights: [1.5,0] | |
mask: True | |
mask_prob: 0.65 | |
grad_clip: 5 | |
accum_grad: 4 | |
max_epoch: 280 | |
log_interval: 100 | |
optim: adam | |
optim_conf: | |
lr: 0.002 | |
scheduler: warmuplr # pytorch v1.1.0+ required | |
scheduler_conf: | |
warmup_steps: 25000 | |