ESPnet
multilingual
audio
codec
ftshijt commited on
Commit
8efaae6
1 Parent(s): 66d5082

Update model

Browse files
Files changed (29) hide show
  1. README.md +322 -3
  2. exp/codec_mls_english_encodec_large_v1.1/186epoch.pth +3 -0
  3. exp/codec_mls_english_encodec_large_v1.1/config.yaml +247 -0
  4. exp/codec_mls_english_encodec_large_v1.1/images/adv_loss.png +0 -0
  5. exp/codec_mls_english_encodec_large_v1.1/images/codec_commit_loss.png +0 -0
  6. exp/codec_mls_english_encodec_large_v1.1/images/codec_loss.png +0 -0
  7. exp/codec_mls_english_encodec_large_v1.1/images/codec_quantization_loss.png +0 -0
  8. exp/codec_mls_english_encodec_large_v1.1/images/discriminator_backward_time.png +0 -0
  9. exp/codec_mls_english_encodec_large_v1.1/images/discriminator_forward_time.png +0 -0
  10. exp/codec_mls_english_encodec_large_v1.1/images/discriminator_loss.png +0 -0
  11. exp/codec_mls_english_encodec_large_v1.1/images/discriminator_optim_step_time.png +0 -0
  12. exp/codec_mls_english_encodec_large_v1.1/images/discriminator_train_time.png +0 -0
  13. exp/codec_mls_english_encodec_large_v1.1/images/fake_loss.png +0 -0
  14. exp/codec_mls_english_encodec_large_v1.1/images/feat_match_loss.png +0 -0
  15. exp/codec_mls_english_encodec_large_v1.1/images/generator_backward_time.png +0 -0
  16. exp/codec_mls_english_encodec_large_v1.1/images/generator_forward_time.png +0 -0
  17. exp/codec_mls_english_encodec_large_v1.1/images/generator_optim_step_time.png +0 -0
  18. exp/codec_mls_english_encodec_large_v1.1/images/generator_train_time.png +0 -0
  19. exp/codec_mls_english_encodec_large_v1.1/images/gpu_max_cached_mem_GB.png +0 -0
  20. exp/codec_mls_english_encodec_large_v1.1/images/iter_time.png +0 -0
  21. exp/codec_mls_english_encodec_large_v1.1/images/loss.png +0 -0
  22. exp/codec_mls_english_encodec_large_v1.1/images/mel_loss.png +0 -0
  23. exp/codec_mls_english_encodec_large_v1.1/images/mel_loss_real.png +0 -0
  24. exp/codec_mls_english_encodec_large_v1.1/images/optim0_lr0.png +0 -0
  25. exp/codec_mls_english_encodec_large_v1.1/images/optim1_lr0.png +0 -0
  26. exp/codec_mls_english_encodec_large_v1.1/images/real_loss.png +0 -0
  27. exp/codec_mls_english_encodec_large_v1.1/images/reconstruct_loss.png +0 -0
  28. exp/codec_mls_english_encodec_large_v1.1/images/train_time.png +0 -0
  29. meta.yaml +8 -0
README.md CHANGED
@@ -1,3 +1,322 @@
1
- ---
2
- license: apache-2.0
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - espnet
4
+ - audio
5
+ - codec
6
+ language: multilingual
7
+ datasets:
8
+ - amuse
9
+ license: cc-by-4.0
10
+ ---
11
+
12
+ ## ESPnet2 Codec model
13
+
14
+ ### `espnet/mls-english_encodec_16k`
15
+
16
+ This model was trained by ftshijt using amuse recipe in [espnet](https://github.com/espnet/espnet/).
17
+
18
+ ### Demo: How to use in ESPnet2
19
+
20
+ Follow the [ESPnet installation instructions](https://espnet.github.io/espnet/installation.html)
21
+ if you haven't done that already.
22
+
23
+ ```bash
24
+ cd espnet
25
+ git checkout 9baec3a7b10b784cb721849e19caed19e8ac45bc
26
+ pip install -e .
27
+ cd egs2/amuse/codec1
28
+ ./run.sh --skip_data_prep false --skip_train true --download_model espnet/mls-english_encodec_16k
29
+ ```
30
+
31
+
32
+
33
+ ## Codec config
34
+
35
+ <details><summary>expand</summary>
36
+
37
+ ```
38
+ config: conf/train_encodec_large_v1.1.yaml
39
+ print_config: false
40
+ log_level: INFO
41
+ drop_last_iter: false
42
+ dry_run: false
43
+ iterator_type: chunk
44
+ valid_iterator_type: null
45
+ output_dir: exp/codec_mls_english_encodec_large_v1.1
46
+ ngpu: 1
47
+ seed: 777
48
+ num_workers: 1
49
+ num_att_plot: 0
50
+ dist_backend: nccl
51
+ dist_init_method: env://
52
+ dist_world_size: 2
53
+ dist_rank: 0
54
+ local_rank: 0
55
+ dist_master_addr: localhost
56
+ dist_master_port: 58245
57
+ dist_launcher: null
58
+ multiprocessing_distributed: true
59
+ unused_parameters: true
60
+ sharded_ddp: false
61
+ use_deepspeed: false
62
+ deepspeed_config: null
63
+ cudnn_enabled: true
64
+ cudnn_benchmark: false
65
+ cudnn_deterministic: false
66
+ use_tf32: false
67
+ collect_stats: false
68
+ write_collected_feats: false
69
+ max_epoch: 360
70
+ patience: null
71
+ val_scheduler_criterion:
72
+ - valid
73
+ - loss
74
+ early_stopping_criterion:
75
+ - valid
76
+ - loss
77
+ - min
78
+ best_model_criterion:
79
+ - - valid
80
+ - mel_loss
81
+ - min
82
+ - - train
83
+ - mel_loss
84
+ - min
85
+ - - train
86
+ - total_count
87
+ - max
88
+ keep_nbest_models: 5
89
+ nbest_averaging_interval: 0
90
+ grad_clip: -1
91
+ grad_clip_type: 2.0
92
+ grad_noise: false
93
+ accum_grad: 1
94
+ no_forward_run: false
95
+ resume: true
96
+ train_dtype: float32
97
+ use_amp: false
98
+ log_interval: 50
99
+ use_matplotlib: true
100
+ use_tensorboard: true
101
+ create_graph_in_tensorboard: false
102
+ use_wandb: false
103
+ wandb_project: null
104
+ wandb_id: null
105
+ wandb_entity: null
106
+ wandb_name: null
107
+ wandb_model_log_interval: -1
108
+ detect_anomaly: false
109
+ use_adapter: false
110
+ adapter: lora
111
+ save_strategy: all
112
+ adapter_conf: {}
113
+ pretrain_path: null
114
+ init_param: []
115
+ ignore_init_mismatch: false
116
+ freeze_param: []
117
+ num_iters_per_epoch: 5000
118
+ batch_size: 128
119
+ valid_batch_size: null
120
+ batch_bins: 1000000
121
+ valid_batch_bins: null
122
+ train_shape_file:
123
+ - exp/codec_stats_mls_english_raw/train/audio_shape
124
+ valid_shape_file:
125
+ - exp/codec_stats_mls_english_raw/valid/audio_shape
126
+ batch_type: unsorted
127
+ valid_batch_type: null
128
+ fold_length:
129
+ - 256000
130
+ sort_in_batch: descending
131
+ shuffle_within_batch: false
132
+ sort_batch: descending
133
+ multiple_iterator: false
134
+ chunk_length: 32000
135
+ chunk_shift_ratio: 0.5
136
+ num_cache_chunks: 128
137
+ chunk_excluded_key_prefixes: []
138
+ chunk_default_fs: null
139
+ chunk_max_abs_length: null
140
+ chunk_discard_short_samples: true
141
+ train_data_path_and_name_and_type:
142
+ - - dump/raw/mls_english/wav.scp
143
+ - audio
144
+ - kaldi_ark
145
+ valid_data_path_and_name_and_type:
146
+ - - dump/raw/dev-small/wav.scp
147
+ - audio
148
+ - kaldi_ark
149
+ multi_task_dataset: false
150
+ allow_variable_data_keys: false
151
+ max_cache_size: 0.0
152
+ max_cache_fd: 32
153
+ allow_multi_rates: false
154
+ valid_max_cache_size: null
155
+ exclude_weight_decay: false
156
+ exclude_weight_decay_conf: {}
157
+ optim: adamw
158
+ optim_conf:
159
+ lr: 0.0002
160
+ betas:
161
+ - 0.5
162
+ - 0.9
163
+ eps: 1.0e-09
164
+ weight_decay: 0.0
165
+ scheduler: exponentiallr
166
+ scheduler_conf:
167
+ gamma: 0.999875
168
+ optim2: adamw
169
+ optim2_conf:
170
+ lr: 0.0002
171
+ betas:
172
+ - 0.5
173
+ - 0.9
174
+ eps: 1.0e-09
175
+ weight_decay: 0.0
176
+ scheduler2: exponentiallr
177
+ scheduler2_conf:
178
+ gamma: 0.999875
179
+ generator_first: true
180
+ skip_discriminator_prob: 0.3
181
+ model_conf: {}
182
+ use_preprocessor: true
183
+ codec: encodec
184
+ codec_conf:
185
+ sampling_rate: 16000
186
+ generator_params:
187
+ hidden_dim: 512
188
+ encdec_channels: 1
189
+ encdec_n_filters: 32
190
+ encdec_n_residual_layers: 3
191
+ encdec_ratios:
192
+ - 8
193
+ - 5
194
+ - 4
195
+ - 2
196
+ encdec_activation: ELU
197
+ encdec_activation_params:
198
+ alpha: 1.0
199
+ encdec_norm: weight_norm
200
+ encdec_kernel_size: 7
201
+ encdec_residual_kernel_size: 7
202
+ encdec_last_kernel_size: 7
203
+ encdec_dilation_base: 2
204
+ encdec_causal: false
205
+ encdec_pad_mode: reflect
206
+ encdec_true_skip: false
207
+ encdec_compress: 2
208
+ encdec_lstm: 2
209
+ decoder_trim_right_ratio: 1.0
210
+ decoder_final_activation: null
211
+ decoder_final_activation_params: null
212
+ quantizer_n_q: 32
213
+ quantizer_bins: 1024
214
+ quantizer_decay: 0.99
215
+ quantizer_kmeans_init: true
216
+ quantizer_kmeans_iters: 50
217
+ quantizer_threshold_ema_dead_code: 2
218
+ quantizer_target_bandwidth:
219
+ - 2
220
+ - 4
221
+ - 8
222
+ - 16
223
+ - 32
224
+ sample_rate: 16000
225
+ discriminator_params:
226
+ msstft_discriminator_params:
227
+ filters: 32
228
+ in_channels: 1
229
+ out_channels: 1
230
+ norm: weight_norm
231
+ n_ffts:
232
+ - 1024
233
+ - 2048
234
+ - 512
235
+ - 256
236
+ - 128
237
+ hop_lengths:
238
+ - 256
239
+ - 512
240
+ - 128
241
+ - 64
242
+ - 32
243
+ win_lengths:
244
+ - 1024
245
+ - 2048
246
+ - 512
247
+ - 256
248
+ - 128
249
+ activation: LeakyReLU
250
+ activation_params:
251
+ negative_slope: 0.3
252
+ generator_adv_loss_params:
253
+ average_by_discriminators: false
254
+ loss_type: mse
255
+ discriminator_adv_loss_params:
256
+ average_by_discriminators: false
257
+ loss_type: mse
258
+ use_feat_match_loss: true
259
+ feat_match_loss_params:
260
+ average_by_discriminators: false
261
+ average_by_layers: false
262
+ include_final_outputs: true
263
+ use_mel_loss: true
264
+ mel_loss_params:
265
+ range_start: 6
266
+ range_end: 11
267
+ window: hann
268
+ n_mels: 80
269
+ fmin: 0
270
+ fmax: null
271
+ log_base: null
272
+ fs: 16000
273
+ lambda_quantization: 1.0
274
+ lambda_commit: 1.0
275
+ lambda_reconstruct: 1.0
276
+ lambda_adv: 1.0
277
+ lambda_mel: 45.0
278
+ lambda_feat_match: 2.0
279
+ cache_generator_outputs: true
280
+ use_loss_balancer: false
281
+ required:
282
+ - output_dir
283
+ version: '202402'
284
+ distributed: true
285
+ ```
286
+
287
+ </details>
288
+
289
+
290
+
291
+ ### Citing ESPnet
292
+
293
+ ```BibTex
294
+ @inproceedings{watanabe2018espnet,
295
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
296
+ title={{ESPnet}: End-to-End Speech Processing Toolkit},
297
+ year={2018},
298
+ booktitle={Proceedings of Interspeech},
299
+ pages={2207--2211},
300
+ doi={10.21437/Interspeech.2018-1456},
301
+ url={http://dx.doi.org/10.21437/Interspeech.2018-1456}
302
+ }
303
+
304
+
305
+
306
+
307
+
308
+
309
+ ```
310
+
311
+ or arXiv:
312
+
313
+ ```bibtex
314
+ @misc{watanabe2018espnet,
315
+ title={ESPnet: End-to-End Speech Processing Toolkit},
316
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
317
+ year={2018},
318
+ eprint={1804.00015},
319
+ archivePrefix={arXiv},
320
+ primaryClass={cs.CL}
321
+ }
322
+ ```
exp/codec_mls_english_encodec_large_v1.1/186epoch.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b08f6601ceaf0f6bba4bcad4606e09b9204bafa82a64d52181968bf0f8b9216b
3
+ size 215644121
exp/codec_mls_english_encodec_large_v1.1/config.yaml ADDED
@@ -0,0 +1,247 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config: conf/train_encodec_large_v1.1.yaml
2
+ print_config: false
3
+ log_level: INFO
4
+ drop_last_iter: false
5
+ dry_run: false
6
+ iterator_type: chunk
7
+ valid_iterator_type: null
8
+ output_dir: exp/codec_mls_english_encodec_large_v1.1
9
+ ngpu: 1
10
+ seed: 777
11
+ num_workers: 1
12
+ num_att_plot: 0
13
+ dist_backend: nccl
14
+ dist_init_method: env://
15
+ dist_world_size: 2
16
+ dist_rank: 0
17
+ local_rank: 0
18
+ dist_master_addr: localhost
19
+ dist_master_port: 58245
20
+ dist_launcher: null
21
+ multiprocessing_distributed: true
22
+ unused_parameters: true
23
+ sharded_ddp: false
24
+ use_deepspeed: false
25
+ deepspeed_config: null
26
+ cudnn_enabled: true
27
+ cudnn_benchmark: false
28
+ cudnn_deterministic: false
29
+ use_tf32: false
30
+ collect_stats: false
31
+ write_collected_feats: false
32
+ max_epoch: 360
33
+ patience: null
34
+ val_scheduler_criterion:
35
+ - valid
36
+ - loss
37
+ early_stopping_criterion:
38
+ - valid
39
+ - loss
40
+ - min
41
+ best_model_criterion:
42
+ - - valid
43
+ - mel_loss
44
+ - min
45
+ - - train
46
+ - mel_loss
47
+ - min
48
+ - - train
49
+ - total_count
50
+ - max
51
+ keep_nbest_models: 5
52
+ nbest_averaging_interval: 0
53
+ grad_clip: -1
54
+ grad_clip_type: 2.0
55
+ grad_noise: false
56
+ accum_grad: 1
57
+ no_forward_run: false
58
+ resume: true
59
+ train_dtype: float32
60
+ use_amp: false
61
+ log_interval: 50
62
+ use_matplotlib: true
63
+ use_tensorboard: true
64
+ create_graph_in_tensorboard: false
65
+ use_wandb: false
66
+ wandb_project: null
67
+ wandb_id: null
68
+ wandb_entity: null
69
+ wandb_name: null
70
+ wandb_model_log_interval: -1
71
+ detect_anomaly: false
72
+ use_adapter: false
73
+ adapter: lora
74
+ save_strategy: all
75
+ adapter_conf: {}
76
+ pretrain_path: null
77
+ init_param: []
78
+ ignore_init_mismatch: false
79
+ freeze_param: []
80
+ num_iters_per_epoch: 5000
81
+ batch_size: 128
82
+ valid_batch_size: null
83
+ batch_bins: 1000000
84
+ valid_batch_bins: null
85
+ train_shape_file:
86
+ - exp/codec_stats_mls_english_raw/train/audio_shape
87
+ valid_shape_file:
88
+ - exp/codec_stats_mls_english_raw/valid/audio_shape
89
+ batch_type: unsorted
90
+ valid_batch_type: null
91
+ fold_length:
92
+ - 256000
93
+ sort_in_batch: descending
94
+ shuffle_within_batch: false
95
+ sort_batch: descending
96
+ multiple_iterator: false
97
+ chunk_length: 32000
98
+ chunk_shift_ratio: 0.5
99
+ num_cache_chunks: 128
100
+ chunk_excluded_key_prefixes: []
101
+ chunk_default_fs: null
102
+ chunk_max_abs_length: null
103
+ chunk_discard_short_samples: true
104
+ train_data_path_and_name_and_type:
105
+ - - dump/raw/mls_english/wav.scp
106
+ - audio
107
+ - kaldi_ark
108
+ valid_data_path_and_name_and_type:
109
+ - - dump/raw/dev-small/wav.scp
110
+ - audio
111
+ - kaldi_ark
112
+ multi_task_dataset: false
113
+ allow_variable_data_keys: false
114
+ max_cache_size: 0.0
115
+ max_cache_fd: 32
116
+ allow_multi_rates: false
117
+ valid_max_cache_size: null
118
+ exclude_weight_decay: false
119
+ exclude_weight_decay_conf: {}
120
+ optim: adamw
121
+ optim_conf:
122
+ lr: 0.0002
123
+ betas:
124
+ - 0.5
125
+ - 0.9
126
+ eps: 1.0e-09
127
+ weight_decay: 0.0
128
+ scheduler: exponentiallr
129
+ scheduler_conf:
130
+ gamma: 0.999875
131
+ optim2: adamw
132
+ optim2_conf:
133
+ lr: 0.0002
134
+ betas:
135
+ - 0.5
136
+ - 0.9
137
+ eps: 1.0e-09
138
+ weight_decay: 0.0
139
+ scheduler2: exponentiallr
140
+ scheduler2_conf:
141
+ gamma: 0.999875
142
+ generator_first: true
143
+ skip_discriminator_prob: 0.3
144
+ model_conf: {}
145
+ use_preprocessor: true
146
+ codec: encodec
147
+ codec_conf:
148
+ sampling_rate: 16000
149
+ generator_params:
150
+ hidden_dim: 512
151
+ encdec_channels: 1
152
+ encdec_n_filters: 32
153
+ encdec_n_residual_layers: 3
154
+ encdec_ratios:
155
+ - 8
156
+ - 5
157
+ - 4
158
+ - 2
159
+ encdec_activation: ELU
160
+ encdec_activation_params:
161
+ alpha: 1.0
162
+ encdec_norm: weight_norm
163
+ encdec_kernel_size: 7
164
+ encdec_residual_kernel_size: 7
165
+ encdec_last_kernel_size: 7
166
+ encdec_dilation_base: 2
167
+ encdec_causal: false
168
+ encdec_pad_mode: reflect
169
+ encdec_true_skip: false
170
+ encdec_compress: 2
171
+ encdec_lstm: 2
172
+ decoder_trim_right_ratio: 1.0
173
+ decoder_final_activation: null
174
+ decoder_final_activation_params: null
175
+ quantizer_n_q: 32
176
+ quantizer_bins: 1024
177
+ quantizer_decay: 0.99
178
+ quantizer_kmeans_init: true
179
+ quantizer_kmeans_iters: 50
180
+ quantizer_threshold_ema_dead_code: 2
181
+ quantizer_target_bandwidth:
182
+ - 2
183
+ - 4
184
+ - 8
185
+ - 16
186
+ - 32
187
+ sample_rate: 16000
188
+ discriminator_params:
189
+ msstft_discriminator_params:
190
+ filters: 32
191
+ in_channels: 1
192
+ out_channels: 1
193
+ norm: weight_norm
194
+ n_ffts:
195
+ - 1024
196
+ - 2048
197
+ - 512
198
+ - 256
199
+ - 128
200
+ hop_lengths:
201
+ - 256
202
+ - 512
203
+ - 128
204
+ - 64
205
+ - 32
206
+ win_lengths:
207
+ - 1024
208
+ - 2048
209
+ - 512
210
+ - 256
211
+ - 128
212
+ activation: LeakyReLU
213
+ activation_params:
214
+ negative_slope: 0.3
215
+ generator_adv_loss_params:
216
+ average_by_discriminators: false
217
+ loss_type: mse
218
+ discriminator_adv_loss_params:
219
+ average_by_discriminators: false
220
+ loss_type: mse
221
+ use_feat_match_loss: true
222
+ feat_match_loss_params:
223
+ average_by_discriminators: false
224
+ average_by_layers: false
225
+ include_final_outputs: true
226
+ use_mel_loss: true
227
+ mel_loss_params:
228
+ range_start: 6
229
+ range_end: 11
230
+ window: hann
231
+ n_mels: 80
232
+ fmin: 0
233
+ fmax: null
234
+ log_base: null
235
+ fs: 16000
236
+ lambda_quantization: 1.0
237
+ lambda_commit: 1.0
238
+ lambda_reconstruct: 1.0
239
+ lambda_adv: 1.0
240
+ lambda_mel: 45.0
241
+ lambda_feat_match: 2.0
242
+ cache_generator_outputs: true
243
+ use_loss_balancer: false
244
+ required:
245
+ - output_dir
246
+ version: '202402'
247
+ distributed: true
exp/codec_mls_english_encodec_large_v1.1/images/adv_loss.png ADDED
exp/codec_mls_english_encodec_large_v1.1/images/codec_commit_loss.png ADDED
exp/codec_mls_english_encodec_large_v1.1/images/codec_loss.png ADDED
exp/codec_mls_english_encodec_large_v1.1/images/codec_quantization_loss.png ADDED
exp/codec_mls_english_encodec_large_v1.1/images/discriminator_backward_time.png ADDED
exp/codec_mls_english_encodec_large_v1.1/images/discriminator_forward_time.png ADDED
exp/codec_mls_english_encodec_large_v1.1/images/discriminator_loss.png ADDED
exp/codec_mls_english_encodec_large_v1.1/images/discriminator_optim_step_time.png ADDED
exp/codec_mls_english_encodec_large_v1.1/images/discriminator_train_time.png ADDED
exp/codec_mls_english_encodec_large_v1.1/images/fake_loss.png ADDED
exp/codec_mls_english_encodec_large_v1.1/images/feat_match_loss.png ADDED
exp/codec_mls_english_encodec_large_v1.1/images/generator_backward_time.png ADDED
exp/codec_mls_english_encodec_large_v1.1/images/generator_forward_time.png ADDED
exp/codec_mls_english_encodec_large_v1.1/images/generator_optim_step_time.png ADDED
exp/codec_mls_english_encodec_large_v1.1/images/generator_train_time.png ADDED
exp/codec_mls_english_encodec_large_v1.1/images/gpu_max_cached_mem_GB.png ADDED
exp/codec_mls_english_encodec_large_v1.1/images/iter_time.png ADDED
exp/codec_mls_english_encodec_large_v1.1/images/loss.png ADDED
exp/codec_mls_english_encodec_large_v1.1/images/mel_loss.png ADDED
exp/codec_mls_english_encodec_large_v1.1/images/mel_loss_real.png ADDED
exp/codec_mls_english_encodec_large_v1.1/images/optim0_lr0.png ADDED
exp/codec_mls_english_encodec_large_v1.1/images/optim1_lr0.png ADDED
exp/codec_mls_english_encodec_large_v1.1/images/real_loss.png ADDED
exp/codec_mls_english_encodec_large_v1.1/images/reconstruct_loss.png ADDED
exp/codec_mls_english_encodec_large_v1.1/images/train_time.png ADDED
meta.yaml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ espnet: '202402'
2
+ files:
3
+ model_file: exp/codec_mls_english_encodec_large_v1.1/186epoch.pth
4
+ python: 3.10.13 | packaged by conda-forge | (main, Dec 23 2023, 15:26:55) [GCC 12.3.0]
5
+ timestamp: 1725493804.823128
6
+ torch: 2.5.0.dev20240825+cu124
7
+ yaml_files:
8
+ train_config: exp/codec_mls_english_encodec_large_v1.1/config.yaml