textless_sm_sl_es / config.yaml
cndn
programmatically upload /textless_sl\_es
1ac22cd
raw
history blame
815 Bytes
data_root: N/A
input_channels: 1
input_feat_per_channel: 80
multitask:
source_unit:
data: N/A
decoder_type: transformer
dict: N/A
encoder_layer: 6
loss_weight: 8.0
target_type: text
output_channels: 1
output_feat_per_channel: 1
output_feat_reduction_rate: 0
output_sample_rate: 16000
specaugment:
freq_mask_F: 27
freq_mask_N: 1
time_mask_N: 1
time_mask_T: 100
time_mask_p: 1.0
time_wrap_W: 0
transforms:
_eval:
- utterance_cmvn
_train:
- utterance_cmvn
- specaugment
vocoder:
dur_prediction: true
model_path: N/A
speaker: false
type: code_hifigan
hub:
input_type: fbank80_w_utt_cmvn
tts_model_id: pytorch/fairseq:ust:unit_hifigan_mhubert_vp_en_es_fr_it3_400k_layer11_km1000_lj_dur
unit_vocoder: true
generation_args:
beam: 10
max_len_a: 1