lmxue commited on
Commit
add6d1c
1 Parent(s): 6aee3ae

Upload checkpoint

Browse files
args.json ADDED
@@ -0,0 +1,209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_path": {
3
+ "LJSpeech": "/home/datasets/LJSpeech-1.1",
4
+ },
5
+ "base_config": "config/base.json",
6
+ "dataset": [
7
+ "LJSpeech",
8
+ ],
9
+ "preprocess": {
10
+ "trim_silence": false,
11
+ "num_silent_frames": 8,
12
+ "trim_fft_size": 512,
13
+ "trim_hop_size": 128,
14
+ "trim_top_db": 30,
15
+ "extract_mel": true,
16
+ "extract_mcep": false,
17
+ "extract_pitch": true,
18
+ "extract_uv": true,
19
+ "pitch_norm": false,
20
+ "extract_audio": true,
21
+ "extract_label": false,
22
+ "pitch_extractor": "parselmouth",
23
+ "extract_energy": false,
24
+ "energy_norm": false,
25
+ "energy_extract_mode": "from_mel",
26
+ "extract_duration": false,
27
+ "mel_min_max_norm": false,
28
+ "mu_law_norm": false,
29
+ "extract_whisper_feature": false,
30
+ "extract_contentvec_feature": false,
31
+ "extract_mert_feature": false,
32
+ "extract_wenet_feature": false,
33
+ "n_mel": 80,
34
+ "win_size": 1024,
35
+ "hop_size": 256,
36
+ "sample_rate": 22050,
37
+ "n_fft": 1024,
38
+ "fmin": 0,
39
+ "fmax": 8000,
40
+ "min_level_db": -115,
41
+ "ref_level_db": 20,
42
+ "bits": 8,
43
+ "processed_dir": "processed_data",
44
+ "trimmed_wav_dir": "trimmed_wavs",
45
+ "wav_dir": "wavs",
46
+ "audio_dir": "audios",
47
+ "label_dir": "labels",
48
+ "mel_dir": "mels",
49
+ "mcep_dir": "mcep",
50
+ "dur_dir": "durs",
51
+ "lab_dir": "labs",
52
+ "wenet_dir": "wenet",
53
+ "contentvec_dir": "contentvec",
54
+ "pitch_dir": "pitches",
55
+ "energy_dir": "energys",
56
+ "uv_dir": "uvs",
57
+ "duration_dir": "duration",
58
+ "phone_seq_file": "phone_seq_file",
59
+ "file_lst": "file.lst",
60
+ "train_file": "train.json",
61
+ "valid_file": "test.json",
62
+ "spk2id": "spk2id.json",
63
+ "utt2spk": "utt2spk",
64
+ "emo2id": "emo2id.json",
65
+ "utt2emo": "utt2emo",
66
+ "use_phn_seq": false,
67
+ "use_lab": false,
68
+ "use_mel": true,
69
+ "use_wav": false,
70
+ "use_phone_pitch": false,
71
+ "use_log_scale_pitch": false,
72
+ "use_phone_energy": false,
73
+ "use_phone_duration": false,
74
+ "use_log_scale_energy": false,
75
+ "use_wenet": false,
76
+ "use_dur": false,
77
+ "use_spkid": false,
78
+ "use_emoid": false,
79
+ "use_frame_pitch": false,
80
+ "use_uv": true,
81
+ "use_frame_energy": false,
82
+ "use_frame_duration": false,
83
+ "use_audio": true,
84
+ "use_label": false,
85
+ "use_one_hot": false,
86
+ "data_augment": false,
87
+ "align_mel_duration": false,
88
+ "f0_min": 50,
89
+ "f0_max": 1100,
90
+ "pitch_bin": 256,
91
+ "pitch_max": 1100.0,
92
+ "pitch_min": 50.0,
93
+ "cut_mel_frame": 32,
94
+ "use_min_max_norm_mel": false,
95
+ },
96
+ "train": {
97
+ "ddp": false,
98
+ "random_seed": 970227,
99
+ "batch_size": 16,
100
+ "epochs": 50000,
101
+ "max_steps": 1000000,
102
+ "total_training_steps": 50000,
103
+ "save_summary_steps": 500,
104
+ "save_checkpoints_steps": 10000,
105
+ "valid_interval": 10000,
106
+ "keep_checkpoint_max": 15,
107
+ "multi_speaker_training": false,
108
+ "adamw": {
109
+ "lr": 0.0002,
110
+ "adam_b1": 0.8,
111
+ "adam_b2": 0.99,
112
+ },
113
+ "exponential_lr": {
114
+ "lr_decay": 0.999,
115
+ },
116
+ "criterions": [
117
+ "feature",
118
+ "discriminator",
119
+ "generator",
120
+ "mel",
121
+ "wav",
122
+ ],
123
+ },
124
+ "model_type": "GANVocoder",
125
+ "model": {
126
+ "generator": "hifigan",
127
+ "discriminators": [
128
+ "msd",
129
+ "mpd",
130
+ "msstftd",
131
+ "mscqtd",
132
+ ],
133
+ "hifigan": {
134
+ "resblock": "2",
135
+ "upsample_rates": [
136
+ 8,
137
+ 8,
138
+ 4,
139
+ ],
140
+ "upsample_kernel_sizes": [
141
+ 16,
142
+ 16,
143
+ 8,
144
+ ],
145
+ "upsample_initial_channel": 256,
146
+ "resblock_kernel_sizes": [
147
+ 3,
148
+ 5,
149
+ 7,
150
+ ],
151
+ "resblock_dilation_sizes": [
152
+ [
153
+ 1,
154
+ 2,
155
+ ],
156
+ [
157
+ 2,
158
+ 6,
159
+ ],
160
+ [
161
+ 3,
162
+ 12,
163
+ ],
164
+ ],
165
+ },
166
+ "mpd": {
167
+ "mpd_reshapes": [
168
+ 2,
169
+ 3,
170
+ 5,
171
+ 7,
172
+ 11,
173
+ ],
174
+ "use_spectral_norm": false,
175
+ "discriminator_channel_multi": 1,
176
+ },
177
+ "msstftd": {
178
+ "filters": 32,
179
+ },
180
+ "mscqtd": {
181
+ "hop_lengths": [
182
+ 512,
183
+ 256,
184
+ 256,
185
+ ],
186
+ "filters": 32,
187
+ "max_filters": 1024,
188
+ "filters_scale": 1,
189
+ "dilations": [
190
+ 1,
191
+ 2,
192
+ 4,
193
+ ],
194
+ "in_channels": 1,
195
+ "out_channels": 1,
196
+ "n_octaves": [
197
+ 9,
198
+ 9,
199
+ 9,
200
+ ],
201
+ "bins_per_octaves": [
202
+ 24,
203
+ 36,
204
+ 48,
205
+ ],
206
+ },
207
+ },
208
+ "exp_name": "hifigan",
209
+ }
checkpoint/epoch-0125_step-0097902_loss-1.074404/optimizer.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5d9b9b39ccf380e221a5a6d478a7ff25180d06de2bf950a8dae87acf0ed193a
3
+ size 276922053
checkpoint/epoch-0125_step-0097902_loss-1.074404/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e43d61b22f26605009fa3158fd5180e372478b2da60ef10ff01e5757f27507a
3
+ size 140532853
checkpoint/epoch-0125_step-0097902_loss-1.074404/random_states_0.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3bd5cd54e288a2fd338b25270761631a6f1dc8a9341e57ae2ce4530f0f020b46
3
+ size 14663
checkpoint/epoch-0125_step-0097902_loss-1.074404/scheduler.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41d54eb84f01111fe50f4fa27b78a012dfbe425d9f708a28f54191fcab8bd569
3
+ size 627