Elesis commited on
Commit
fbdcf7a
1 Parent(s): 3e7e837

Upload 2 files

Browse files
Files changed (2) hide show
  1. 100epoch.pth +3 -0
  2. config.yaml +366 -0
100epoch.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9dd7d9ea126ecf48442581d9958543e3c46e0ed96db32a7d7e6ab98ce338f948
3
+ size 373248270
config.yaml ADDED
@@ -0,0 +1,366 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config: conf/finetune.yaml
2
+ print_config: false
3
+ log_level: INFO
4
+ drop_last_iter: false
5
+ dry_run: false
6
+ iterator_type: sequence
7
+ valid_iterator_type: null
8
+ output_dir: outputs/JR_auto_w_1/checkpoints
9
+ ngpu: 1
10
+ seed: 777
11
+ num_workers: 4
12
+ num_att_plot: 3
13
+ dist_backend: nccl
14
+ dist_init_method: env://
15
+ dist_world_size: null
16
+ dist_rank: null
17
+ local_rank: null
18
+ dist_master_addr: null
19
+ dist_master_port: null
20
+ dist_launcher: null
21
+ multiprocessing_distributed: false
22
+ unused_parameters: true
23
+ sharded_ddp: false
24
+ cudnn_enabled: true
25
+ cudnn_benchmark: false
26
+ cudnn_deterministic: false
27
+ collect_stats: false
28
+ write_collected_feats: false
29
+ max_epoch: 100
30
+ patience: null
31
+ val_scheduler_criterion:
32
+ - valid
33
+ - loss
34
+ early_stopping_criterion:
35
+ - valid
36
+ - loss
37
+ - min
38
+ best_model_criterion:
39
+ - - train
40
+ - total_count
41
+ - max
42
+ keep_nbest_models: 10
43
+ nbest_averaging_interval: 0
44
+ grad_clip: -1
45
+ grad_clip_type: 2.0
46
+ grad_noise: false
47
+ accum_grad: 1
48
+ no_forward_run: false
49
+ resume: true
50
+ train_dtype: float32
51
+ use_amp: false
52
+ log_interval: 50
53
+ use_matplotlib: true
54
+ use_tensorboard: true
55
+ create_graph_in_tensorboard: false
56
+ use_wandb: false
57
+ wandb_project: null
58
+ wandb_id: null
59
+ wandb_entity: null
60
+ wandb_name: null
61
+ wandb_model_log_interval: -1
62
+ detect_anomaly: false
63
+ pretrain_path: null
64
+ init_param:
65
+ - pretrained/pretrained.pth:tts:tts
66
+ ignore_init_mismatch: false
67
+ freeze_param: []
68
+ num_iters_per_epoch: 1000
69
+ batch_size: 20
70
+ valid_batch_size: null
71
+ batch_bins: 1000000
72
+ valid_batch_bins: null
73
+ train_shape_file:
74
+ - outputs/JR_auto_w_1/stats/train/text_shape.phn
75
+ - outputs/JR_auto_w_1/stats/train/speech_shape
76
+ valid_shape_file:
77
+ - outputs/JR_auto_w_1/stats/valid/text_shape.phn
78
+ - outputs/JR_auto_w_1/stats/valid/speech_shape
79
+ batch_type: numel
80
+ valid_batch_type: null
81
+ fold_length:
82
+ - 150
83
+ - 409600
84
+ sort_in_batch: descending
85
+ shuffle_within_batch: false
86
+ sort_batch: descending
87
+ multiple_iterator: false
88
+ chunk_length: 500
89
+ chunk_shift_ratio: 0.5
90
+ num_cache_chunks: 1024
91
+ chunk_excluded_key_prefixes: []
92
+ train_data_path_and_name_and_type:
93
+ - - outputs/JR_auto_w_1/dump/train/text
94
+ - text
95
+ - text
96
+ - - outputs/JR_auto_w_1/dump/train/wav.scp
97
+ - speech
98
+ - sound
99
+ valid_data_path_and_name_and_type:
100
+ - - outputs/JR_auto_w_1/dump/valid/text
101
+ - text
102
+ - text
103
+ - - outputs/JR_auto_w_1/dump/valid/wav.scp
104
+ - speech
105
+ - sound
106
+ allow_variable_data_keys: false
107
+ max_cache_size: 0.0
108
+ max_cache_fd: 32
109
+ valid_max_cache_size: null
110
+ exclude_weight_decay: false
111
+ exclude_weight_decay_conf: {}
112
+ optim: adamw
113
+ optim_conf:
114
+ lr: 0.0001
115
+ betas:
116
+ - 0.8
117
+ - 0.99
118
+ eps: 1.0e-09
119
+ weight_decay: 0.0
120
+ scheduler: exponentiallr
121
+ scheduler_conf:
122
+ gamma: 0.999875
123
+ optim2: adamw
124
+ optim2_conf:
125
+ lr: 0.0001
126
+ betas:
127
+ - 0.8
128
+ - 0.99
129
+ eps: 1.0e-09
130
+ weight_decay: 0.0
131
+ scheduler2: exponentiallr
132
+ scheduler2_conf:
133
+ gamma: 0.999875
134
+ generator_first: false
135
+ token_list:
136
+ - <blank>
137
+ - <unk>
138
+ - a
139
+ - o
140
+ - i
141
+ - '['
142
+ - '#'
143
+ - u
144
+ - ']'
145
+ - e
146
+ - k
147
+ - n
148
+ - t
149
+ - r
150
+ - s
151
+ - N
152
+ - m
153
+ - _
154
+ - sh
155
+ - d
156
+ - g
157
+ - ^
158
+ - $
159
+ - w
160
+ - cl
161
+ - h
162
+ - y
163
+ - b
164
+ - j
165
+ - ts
166
+ - ch
167
+ - z
168
+ - p
169
+ - f
170
+ - ky
171
+ - ry
172
+ - gy
173
+ - hy
174
+ - ny
175
+ - by
176
+ - my
177
+ - py
178
+ - v
179
+ - dy
180
+ - '?'
181
+ - ty
182
+ - <sos/eos>
183
+ odim: null
184
+ model_conf: {}
185
+ use_preprocessor: true
186
+ token_type: phn
187
+ bpemodel: null
188
+ non_linguistic_symbols: null
189
+ cleaner: jaconv
190
+ g2p: pyopenjtalk_prosody
191
+ feats_extract: linear_spectrogram
192
+ feats_extract_conf:
193
+ n_fft: 2048
194
+ hop_length: 512
195
+ win_length: null
196
+ normalize: null
197
+ normalize_conf: {}
198
+ tts: vits
199
+ tts_conf:
200
+ generator_type: vits_generator
201
+ generator_params:
202
+ hidden_channels: 192
203
+ spks: -1
204
+ global_channels: -1
205
+ segment_size: 32
206
+ text_encoder_attention_heads: 2
207
+ text_encoder_ffn_expand: 4
208
+ text_encoder_blocks: 6
209
+ text_encoder_positionwise_layer_type: conv1d
210
+ text_encoder_positionwise_conv_kernel_size: 3
211
+ text_encoder_positional_encoding_layer_type: rel_pos
212
+ text_encoder_self_attention_layer_type: rel_selfattn
213
+ text_encoder_activation_type: swish
214
+ text_encoder_normalize_before: true
215
+ text_encoder_dropout_rate: 0.1
216
+ text_encoder_positional_dropout_rate: 0.0
217
+ text_encoder_attention_dropout_rate: 0.1
218
+ use_macaron_style_in_text_encoder: true
219
+ use_conformer_conv_in_text_encoder: false
220
+ text_encoder_conformer_kernel_size: -1
221
+ decoder_kernel_size: 7
222
+ decoder_channels: 512
223
+ decoder_upsample_scales:
224
+ - 8
225
+ - 8
226
+ - 2
227
+ - 2
228
+ - 2
229
+ decoder_upsample_kernel_sizes:
230
+ - 16
231
+ - 16
232
+ - 4
233
+ - 4
234
+ - 4
235
+ decoder_resblock_kernel_sizes:
236
+ - 3
237
+ - 7
238
+ - 11
239
+ decoder_resblock_dilations:
240
+ - - 1
241
+ - 3
242
+ - 5
243
+ - - 1
244
+ - 3
245
+ - 5
246
+ - - 1
247
+ - 3
248
+ - 5
249
+ use_weight_norm_in_decoder: true
250
+ posterior_encoder_kernel_size: 5
251
+ posterior_encoder_layers: 16
252
+ posterior_encoder_stacks: 1
253
+ posterior_encoder_base_dilation: 1
254
+ posterior_encoder_dropout_rate: 0.0
255
+ use_weight_norm_in_posterior_encoder: true
256
+ flow_flows: 4
257
+ flow_kernel_size: 5
258
+ flow_base_dilation: 1
259
+ flow_layers: 4
260
+ flow_dropout_rate: 0.0
261
+ use_weight_norm_in_flow: true
262
+ use_only_mean_in_flow: true
263
+ stochastic_duration_predictor_kernel_size: 3
264
+ stochastic_duration_predictor_dropout_rate: 0.5
265
+ stochastic_duration_predictor_flows: 4
266
+ stochastic_duration_predictor_dds_conv_layers: 3
267
+ vocabs: 47
268
+ aux_channels: 1025
269
+ discriminator_type: hifigan_multi_scale_multi_period_discriminator
270
+ discriminator_params:
271
+ scales: 1
272
+ scale_downsample_pooling: AvgPool1d
273
+ scale_downsample_pooling_params:
274
+ kernel_size: 4
275
+ stride: 2
276
+ padding: 2
277
+ scale_discriminator_params:
278
+ in_channels: 1
279
+ out_channels: 1
280
+ kernel_sizes:
281
+ - 15
282
+ - 41
283
+ - 5
284
+ - 3
285
+ channels: 128
286
+ max_downsample_channels: 1024
287
+ max_groups: 16
288
+ bias: true
289
+ downsample_scales:
290
+ - 2
291
+ - 2
292
+ - 4
293
+ - 4
294
+ - 1
295
+ nonlinear_activation: LeakyReLU
296
+ nonlinear_activation_params:
297
+ negative_slope: 0.1
298
+ use_weight_norm: true
299
+ use_spectral_norm: false
300
+ follow_official_norm: false
301
+ periods:
302
+ - 2
303
+ - 3
304
+ - 5
305
+ - 7
306
+ - 11
307
+ period_discriminator_params:
308
+ in_channels: 1
309
+ out_channels: 1
310
+ kernel_sizes:
311
+ - 5
312
+ - 3
313
+ channels: 32
314
+ downsample_scales:
315
+ - 3
316
+ - 3
317
+ - 3
318
+ - 3
319
+ - 1
320
+ max_downsample_channels: 1024
321
+ bias: true
322
+ nonlinear_activation: LeakyReLU
323
+ nonlinear_activation_params:
324
+ negative_slope: 0.1
325
+ use_weight_norm: true
326
+ use_spectral_norm: false
327
+ generator_adv_loss_params:
328
+ average_by_discriminators: false
329
+ loss_type: mse
330
+ discriminator_adv_loss_params:
331
+ average_by_discriminators: false
332
+ loss_type: mse
333
+ feat_match_loss_params:
334
+ average_by_discriminators: false
335
+ average_by_layers: false
336
+ include_final_outputs: true
337
+ mel_loss_params:
338
+ fs: 44100
339
+ n_fft: 2048
340
+ hop_length: 512
341
+ win_length: null
342
+ window: hann
343
+ n_mels: 80
344
+ fmin: 0
345
+ fmax: null
346
+ log_base: null
347
+ lambda_adv: 1.0
348
+ lambda_mel: 45.0
349
+ lambda_feat_match: 2.0
350
+ lambda_dur: 1.0
351
+ lambda_kl: 1.0
352
+ sampling_rate: 44100
353
+ cache_generator_outputs: true
354
+ pitch_extract: null
355
+ pitch_extract_conf: {}
356
+ pitch_normalize: null
357
+ pitch_normalize_conf: {}
358
+ energy_extract: null
359
+ energy_extract_conf: {}
360
+ energy_normalize: null
361
+ energy_normalize_conf: {}
362
+ required:
363
+ - output_dir
364
+ - token_list
365
+ version: '202308'
366
+ distributed: false