text_encoder: !new:uetasr.featurizers.text.Subword model_prefix: vocabs/subword_vietnamese_500 data_path: transcript_v3.1.txt character_coverage: 1.0 model_type: bpe # word bpe unigram char num_threads: 16 unk_id: 1 pad_id: 0 eos_id: -1 unk_piece: pad_piece: eos_piece: vocab_size: 500 audio_encoder: !new:uetasr.featurizers.audio.LogMelSpectrogram fs: 16000 n_fft: 512 win_length: 400 hop_length: 160 n_mels: 80 fmin: 0 fmax: 8000 htk: False d_model: 256 encoder_model: !new:uetasr.models.encoders.Conformer num_features: 80 window_size: 1 d_model: !ref input_layer: vgg2l pos_enc_layer_type: rel_pos dropout_rate_pos_enc: 0.2 selfattention_layer_type: rel_selfattn attention_heads: 4 dropout_rate_att: 0.1 dropout_rate_pos_wise: 0.1 dropout_rate: 0.1 positionwise_layer_type: linear linear_units: 1024 conv_mod_kernel: 31 num_blocks: 18 use_macaron: True use_cnn_module: True eps_layer_norm: 0.000000000001 decoder_model: !new:uetasr.models.decoders.RNNDecoder vocab_size: !ref embedding_dim: 256 num_layers: 1 hidden_dim: !ref dropout_embed: 0.2 dropout_rnn: 0.1 rnn_type: LSTM jointer_model: !new:uetasr.layers.jointer.RNNTJointer encoder_dim: !ref decoder_dim: !ref hidden_dim: 512 output_dim: !ref ctc_lin: null model: !new:uetasr.models.rnnt.RNNT encoder: !ref decoder: !ref jointer: !ref ctc_lin: !ref ctc_dropout: 0.1 use_cmvn: True