voice-filter / config.json
nguyenvulebinh's picture
add model voice-filter
efb533c
raw
history blame contribute delete
No virus
2.18 kB
{
"architectures": [
"VoiceFilter"
],
"audio_max_lengh": 20,
"enh_args": {
"batch_size": 8,
"batch_type": "folded",
"best_model_criterion": [
[
"valid",
"si_snr",
"max"
],
[
"valid",
"loss",
"min"
]
],
"criterions": [
{
"conf": {
"eps": 1e-07
},
"name": "si_snr",
"wrapper": "pit",
"wrapper_conf": {
"independent_perm": false,
"weight": 1.0
}
}
],
"decoder": "stft",
"decoder_conf": {
"hop_length": 128,
"n_fft": 512
},
"encoder": "stft",
"encoder_conf": {
"hop_length": 128,
"n_fft": 512
},
"init": "xavier_uniform",
"keep_nbest_models": 1,
"max_epoch": 5,
"model_conf": {
"loss_type": "mask_mse",
"mask_type": "psm"
},
"num_workers": 4,
"optim": "adam",
"optim_conf": {
"eps": 1e-08,
"lr": 0.001,
"weight_decay": 1e-07
},
"patience": 10,
"scheduler": "reducelronplateau",
"scheduler_conf": {
"factor": 0.7,
"mode": "min",
"patience": 1
},
"separator": "conformer_voice_filter",
"separator_conf": {
"adim": 1024,
"aheads": 8,
"attention_dropout_rate": 0.1,
"concat_after": false,
"conformer_activation_type": "swish",
"conformer_enc_kernel_size": 5,
"conformer_pos_enc_layer_type": "rel_pos",
"conformer_self_attn_layer_type": "rel_selfattn",
"dropout_rate": 0.1,
"input_layer": "linear",
"layers": 4,
"linear_units": 896,
"nonlinear": "relu",
"normalize_before": false,
"num_spk": 1,
"positional_dropout_rate": 0.1,
"positionwise_conv_kernel_size": 1,
"positionwise_layer_type": "conv1d",
"use_cnn_in_conformer": true,
"use_macaron_style_in_conformer": true
},
"val_scheduler_criterion": [
"valid",
"loss"
],
"xvector_emb_dim": 512
},
"enh_chunk_size": 5,
"model_type": "voicefilter",
"sample_rate": 16000,
"torch_dtype": "float32",
"transformers_version": "4.25.1"
}