Fhrozen commited on
Commit
be831d2
1 Parent(s): e650c72
README.md CHANGED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - espnet
4
+ - audio
5
+ - audio-source-separation
6
+ language: en
7
+ datasets:
8
+ - wsj0_2mix
9
+ license: cc-by-4.0
10
+ inference: false
11
+ ---
12
+
13
+ # ESPnet2 ENH pretrained model
14
+
15
+ ## `Chenda Li/wsj0_2mix_enh_train_enh_conv_tasnet_raw_valid.si_snr.ave, fs=8k, lang=en`
16
+
17
+ ♻️ Imported from <https://zenodo.org/record/4498562#.YOAOApozZH4>.
18
+
19
+ This model was trained by Chenda Li using wsj0_2mix recipe in [espnet](https://github.com/espnet/espnet/).
20
+
21
+ ### Python API
22
+
23
+ ```text
24
+ See https://github.com/espnet/espnet_model_zoo
25
+ ```
26
+
27
+ ### Evaluate in the recipe
28
+
29
+ ```python
30
+ # coming soon
31
+ ```
32
+
33
+ ### Results
34
+
35
+ ```bash
36
+ # RESULTS
37
+ ## Environments
38
+ - date: `Thu Feb 4 01:16:18 CST 2021`
39
+ - python version: `3.7.6 (default, Jan 8 2020, 19:59:22) [GCC 7.3.0]`
40
+ - espnet version: `espnet 0.9.7`
41
+ - pytorch version: `pytorch 1.5.0`
42
+ - Git hash: `a3334220b0352931677946d178fade3313cf82bb`
43
+ - Commit date: `Fri Jan 29 23:35:47 2021 +0800`
44
+
45
+
46
+ ## enh_train_enh_conv_tasnet_raw
47
+
48
+ config: ./conf/tuning/train_enh_conv_tasnet.yaml
49
+
50
+ |dataset|STOI|SAR|SDR|SIR|
51
+ |---|---|---|---|---|
52
+ |enhanced_cv_min_8k|0.949205|17.3785|16.8028|26.9785|
53
+ |enhanced_tt_min_8k|0.95349|16.6221|15.9494|25.9032|
54
+ ```
55
+
56
+ ### Training config
57
+
58
+ See full config in [`config.yaml`](./exp/enh_train_enh_conv_tasnet_raw/config.yaml)
59
+
60
+ ```yaml
61
+ config: ./conf/tuning/train_enh_conv_tasnet.yaml
62
+ print_config: false
63
+ log_level: INFO
64
+ dry_run: false
65
+ iterator_type: chunk
66
+ output_dir: exp/enh_train_enh_conv_tasnet_raw
67
+ ngpu: 1
68
+ seed: 0
69
+ num_workers: 4
70
+ num_att_plot: 3
71
+ dist_backend: nccl
72
+ dist_init_method: env://
73
+ dist_world_size: null
74
+ dist_rank: null
75
+ local_rank: 0
76
+ dist_master_addr: null
77
+ dist_master_port: null
78
+ dist_launcher: null
79
+ multiprocessing_distributed: false
80
+ cudnn_enabled: true
81
+ cudnn_benchmark: false
82
+ cudnn_deterministic: true
83
+ ```
exp/enh_stats_8k/train/feats_stats.npz ADDED
Binary file (778 Bytes). View file
 
exp/enh_train_enh_conv_tasnet_raw/66epoch.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a324193d6a4537be5711e00caf18e631df5e10c88896a388e5745626bfe22ae
3
+ size 34969477
exp/enh_train_enh_conv_tasnet_raw/RESULTS.md ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!-- Generated by ./scripts/utils/show_enh_score.sh -->
2
+ # RESULTS
3
+ ## Environments
4
+ - date: `Thu Feb 4 01:16:18 CST 2021`
5
+ - python version: `3.7.6 (default, Jan 8 2020, 19:59:22) [GCC 7.3.0]`
6
+ - espnet version: `espnet 0.9.7`
7
+ - pytorch version: `pytorch 1.5.0`
8
+ - Git hash: `a3334220b0352931677946d178fade3313cf82bb`
9
+ - Commit date: `Fri Jan 29 23:35:47 2021 +0800`
10
+
11
+
12
+ ## enh_train_enh_conv_tasnet_raw
13
+
14
+ config: ./conf/tuning/train_enh_conv_tasnet.yaml
15
+
16
+ |dataset|STOI|SAR|SDR|SIR|
17
+ |---|---|---|---|---|
18
+ |enhanced_cv_min_8k|0.949205|17.3785|16.8028|26.9785|
19
+ |enhanced_tt_min_8k|0.95349|16.6221|15.9494|25.9032|
20
+
exp/enh_train_enh_conv_tasnet_raw/config.yaml ADDED
@@ -0,0 +1,146 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config: ./conf/tuning/train_enh_conv_tasnet.yaml
2
+ print_config: false
3
+ log_level: INFO
4
+ dry_run: false
5
+ iterator_type: chunk
6
+ output_dir: exp/enh_train_enh_conv_tasnet_raw
7
+ ngpu: 1
8
+ seed: 0
9
+ num_workers: 4
10
+ num_att_plot: 3
11
+ dist_backend: nccl
12
+ dist_init_method: env://
13
+ dist_world_size: null
14
+ dist_rank: null
15
+ local_rank: 0
16
+ dist_master_addr: null
17
+ dist_master_port: null
18
+ dist_launcher: null
19
+ multiprocessing_distributed: false
20
+ cudnn_enabled: true
21
+ cudnn_benchmark: false
22
+ cudnn_deterministic: true
23
+ collect_stats: false
24
+ write_collected_feats: false
25
+ max_epoch: 100
26
+ patience: 4
27
+ val_scheduler_criterion:
28
+ - valid
29
+ - loss
30
+ early_stopping_criterion:
31
+ - valid
32
+ - loss
33
+ - min
34
+ best_model_criterion:
35
+ - - valid
36
+ - si_snr
37
+ - max
38
+ - - valid
39
+ - loss
40
+ - min
41
+ keep_nbest_models: 1
42
+ grad_clip: 5.0
43
+ grad_clip_type: 2.0
44
+ grad_noise: false
45
+ accum_grad: 1
46
+ no_forward_run: false
47
+ resume: true
48
+ train_dtype: float32
49
+ use_amp: false
50
+ log_interval: null
51
+ unused_parameters: false
52
+ use_tensorboard: true
53
+ use_wandb: false
54
+ wandb_project: null
55
+ wandb_id: null
56
+ pretrain_path: null
57
+ init_param: []
58
+ freeze_param: []
59
+ num_iters_per_epoch: null
60
+ batch_size: 8
61
+ valid_batch_size: null
62
+ batch_bins: 1000000
63
+ valid_batch_bins: null
64
+ train_shape_file:
65
+ - exp/enh_stats_8k/train/speech_mix_shape
66
+ - exp/enh_stats_8k/train/speech_ref1_shape
67
+ - exp/enh_stats_8k/train/speech_ref2_shape
68
+ valid_shape_file:
69
+ - exp/enh_stats_8k/valid/speech_mix_shape
70
+ - exp/enh_stats_8k/valid/speech_ref1_shape
71
+ - exp/enh_stats_8k/valid/speech_ref2_shape
72
+ batch_type: folded
73
+ valid_batch_type: null
74
+ fold_length:
75
+ - 80000
76
+ - 80000
77
+ - 80000
78
+ sort_in_batch: descending
79
+ sort_batch: descending
80
+ multiple_iterator: false
81
+ chunk_length: 32000
82
+ chunk_shift_ratio: 0.5
83
+ num_cache_chunks: 1024
84
+ train_data_path_and_name_and_type:
85
+ - - dump/raw/tr_min_8k/wav.scp
86
+ - speech_mix
87
+ - sound
88
+ - - dump/raw/tr_min_8k/spk1.scp
89
+ - speech_ref1
90
+ - sound
91
+ - - dump/raw/tr_min_8k/spk2.scp
92
+ - speech_ref2
93
+ - sound
94
+ valid_data_path_and_name_and_type:
95
+ - - dump/raw/cv_min_8k/wav.scp
96
+ - speech_mix
97
+ - sound
98
+ - - dump/raw/cv_min_8k/spk1.scp
99
+ - speech_ref1
100
+ - sound
101
+ - - dump/raw/cv_min_8k/spk2.scp
102
+ - speech_ref2
103
+ - sound
104
+ allow_variable_data_keys: false
105
+ max_cache_size: 0.0
106
+ max_cache_fd: 32
107
+ valid_max_cache_size: null
108
+ optim: adam
109
+ optim_conf:
110
+ lr: 0.001
111
+ eps: 1.0e-08
112
+ weight_decay: 0
113
+ scheduler: reducelronplateau
114
+ scheduler_conf:
115
+ mode: min
116
+ factor: 0.5
117
+ patience: 1
118
+ init: xavier_uniform
119
+ model_conf:
120
+ loss_type: si_snr
121
+ use_preprocessor: false
122
+ encoder: conv
123
+ encoder_conf:
124
+ channel: 256
125
+ kernel_size: 20
126
+ stride: 10
127
+ separator: tcn
128
+ separator_conf:
129
+ num_spk: 2
130
+ layer: 8
131
+ stack: 4
132
+ bottleneck_dim: 256
133
+ hidden_dim: 512
134
+ kernel: 3
135
+ causal: false
136
+ norm_type: gLN
137
+ nonlinear: relu
138
+ decoder: conv
139
+ decoder_conf:
140
+ channel: 256
141
+ kernel_size: 20
142
+ stride: 10
143
+ required:
144
+ - output_dir
145
+ version: 0.9.7
146
+ distributed: false
exp/enh_train_enh_conv_tasnet_raw/images/backward_time.png ADDED
exp/enh_train_enh_conv_tasnet_raw/images/forward_time.png ADDED
exp/enh_train_enh_conv_tasnet_raw/images/iter_time.png ADDED
exp/enh_train_enh_conv_tasnet_raw/images/loss.png ADDED
exp/enh_train_enh_conv_tasnet_raw/images/lr_0.png ADDED
exp/enh_train_enh_conv_tasnet_raw/images/optim_step_time.png ADDED
exp/enh_train_enh_conv_tasnet_raw/images/si_snr.png ADDED
exp/enh_train_enh_conv_tasnet_raw/images/train_time.png ADDED
meta.yaml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ espnet: 0.9.7
2
+ files:
3
+ model_file: exp/enh_train_enh_conv_tasnet_raw/66epoch.pth
4
+ python: "3.7.6 (default, Jan 8 2020, 19:59:22) \n[GCC 7.3.0]"
5
+ timestamp: 1612372579.804363
6
+ torch: 1.5.0
7
+ yaml_files:
8
+ train_config: exp/enh_train_enh_conv_tasnet_raw/config.yaml