justyoung commited on
Commit
b63cecd
1 Parent(s): ff9e390

Upload 20 files

Browse files
assets/Synthesizer_inputs.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c5ae8cd034b02bbc325939e9b9debbedb43ee9d71a654daaff8804815bd957d
3
+ size 122495
assets/hubert/.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ *
2
+ !.gitignore
3
+ !hubert_inputs.pth
assets/hubert/hubert_inputs.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bbd4741d4be8a71333170c0df5320f605a9d210b96547b391555da078167861f
3
+ size 169434
assets/indices/.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ *
2
+ !.gitignore
assets/pretrained/.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ *
2
+ !.gitignore
assets/pretrained_v2/.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ *
2
+ !.gitignore
assets/rmvpe/.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ *
2
+ !.gitignore
3
+ !rmvpe_inputs.pth
assets/rmvpe/rmvpe_inputs.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:339fcb7e1476b302e9aecef4a951e918c20852b2e871de5eea13b06e554e0a3a
3
+ size 33527
assets/uvr5_weights/.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ *
2
+ !.gitignore
assets/weights/.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ *
2
+ !.gitignore
configs/config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"pth_path": "assets/weights/kikiV1.pth", "index_path": "logs/kikiV1.index", "sg_hostapi": "MME", "sg_wasapi_exclusive": false, "sg_input_device": "VoiceMeeter Output (VB-Audio Vo", "sg_output_device": "VoiceMeeter Input (VB-Audio Voi", "sr_type": "sr_device", "threhold": -60.0, "pitch": 12.0, "formant": 0.0, "rms_mix_rate": 0.5, "index_rate": 0.0, "block_time": 0.15, "crossfade_length": 0.08, "extra_time": 2.0, "n_cpu": 4.0, "use_jit": false, "use_pv": false, "f0method": "fcpe"}
configs/config.py ADDED
@@ -0,0 +1,264 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import os
3
+ import sys
4
+ import json
5
+ import shutil
6
+ from multiprocessing import cpu_count
7
+
8
+ import torch
9
+
10
+ try:
11
+ import intel_extension_for_pytorch as ipex # pylint: disable=import-error, unused-import
12
+
13
+ if torch.xpu.is_available():
14
+ from infer.modules.ipex import ipex_init
15
+
16
+ ipex_init()
17
+ except Exception: # pylint: disable=broad-exception-caught
18
+ pass
19
+ import logging
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+
24
+ version_config_list = [
25
+ "v1/32k.json",
26
+ "v1/40k.json",
27
+ "v1/48k.json",
28
+ "v2/48k.json",
29
+ "v2/32k.json",
30
+ ]
31
+
32
+
33
+ def singleton_variable(func):
34
+ def wrapper(*args, **kwargs):
35
+ if not wrapper.instance:
36
+ wrapper.instance = func(*args, **kwargs)
37
+ return wrapper.instance
38
+
39
+ wrapper.instance = None
40
+ return wrapper
41
+
42
+
43
+ @singleton_variable
44
+ class Config:
45
+ def __init__(self):
46
+ self.device = "cuda:0"
47
+ self.is_half = True
48
+ self.use_jit = False
49
+ self.n_cpu = 0
50
+ self.gpu_name = None
51
+ self.json_config = self.load_config_json()
52
+ self.gpu_mem = None
53
+ (
54
+ self.python_cmd,
55
+ self.listen_port,
56
+ self.iscolab,
57
+ self.noparallel,
58
+ self.noautoopen,
59
+ self.dml,
60
+ self.nocheck,
61
+ self.update,
62
+ ) = self.arg_parse()
63
+ self.instead = ""
64
+ self.preprocess_per = 3.7
65
+ self.x_pad, self.x_query, self.x_center, self.x_max = self.device_config()
66
+
67
+ @staticmethod
68
+ def load_config_json() -> dict:
69
+ d = {}
70
+ for config_file in version_config_list:
71
+ p = f"configs/inuse/{config_file}"
72
+ if not os.path.exists(p):
73
+ shutil.copy(f"configs/{config_file}", p)
74
+ with open(f"configs/inuse/{config_file}", "r") as f:
75
+ d[config_file] = json.load(f)
76
+ return d
77
+
78
+ @staticmethod
79
+ def arg_parse() -> tuple:
80
+ exe = sys.executable or "python"
81
+ parser = argparse.ArgumentParser()
82
+ parser.add_argument("--port", type=int, default=7865, help="Listen port")
83
+ parser.add_argument("--pycmd", type=str, default=exe, help="Python command")
84
+ parser.add_argument("--colab", action="store_true", help="Launch in colab")
85
+ parser.add_argument(
86
+ "--noparallel", action="store_true", help="Disable parallel processing"
87
+ )
88
+ parser.add_argument(
89
+ "--noautoopen",
90
+ action="store_true",
91
+ help="Do not open in browser automatically",
92
+ )
93
+ parser.add_argument(
94
+ "--dml",
95
+ action="store_true",
96
+ help="torch_dml",
97
+ )
98
+ parser.add_argument(
99
+ "--nocheck", action="store_true", help="Run without checking assets"
100
+ )
101
+ parser.add_argument(
102
+ "--update", action="store_true", help="Update to latest assets"
103
+ )
104
+ cmd_opts = parser.parse_args()
105
+
106
+ cmd_opts.port = cmd_opts.port if 0 <= cmd_opts.port <= 65535 else 7865
107
+
108
+ return (
109
+ cmd_opts.pycmd,
110
+ cmd_opts.port,
111
+ cmd_opts.colab,
112
+ cmd_opts.noparallel,
113
+ cmd_opts.noautoopen,
114
+ cmd_opts.dml,
115
+ cmd_opts.nocheck,
116
+ cmd_opts.update,
117
+ )
118
+
119
+ # has_mps is only available in nightly pytorch (for now) and MasOS 12.3+.
120
+ # check `getattr` and try it for compatibility
121
+ @staticmethod
122
+ def has_mps() -> bool:
123
+ if not torch.backends.mps.is_available():
124
+ return False
125
+ try:
126
+ torch.zeros(1).to(torch.device("mps"))
127
+ return True
128
+ except Exception:
129
+ return False
130
+
131
+ @staticmethod
132
+ def has_xpu() -> bool:
133
+ if hasattr(torch, "xpu") and torch.xpu.is_available():
134
+ return True
135
+ else:
136
+ return False
137
+
138
+ def use_fp32_config(self):
139
+ for config_file in version_config_list:
140
+ self.json_config[config_file]["train"]["fp16_run"] = False
141
+ with open(f"configs/inuse/{config_file}", "r") as f:
142
+ strr = f.read().replace("true", "false")
143
+ with open(f"configs/inuse/{config_file}", "w") as f:
144
+ f.write(strr)
145
+ logger.info("overwrite " + config_file)
146
+ self.preprocess_per = 3.0
147
+ logger.info("overwrite preprocess_per to %d" % (self.preprocess_per))
148
+
149
+ def device_config(self) -> tuple:
150
+ if torch.cuda.is_available():
151
+ if self.has_xpu():
152
+ self.device = self.instead = "xpu:0"
153
+ self.is_half = True
154
+ i_device = int(self.device.split(":")[-1])
155
+ self.gpu_name = torch.cuda.get_device_name(i_device)
156
+ if (
157
+ ("16" in self.gpu_name and "V100" not in self.gpu_name.upper())
158
+ or "P40" in self.gpu_name.upper()
159
+ or "P10" in self.gpu_name.upper()
160
+ or "1060" in self.gpu_name
161
+ or "1070" in self.gpu_name
162
+ or "1080" in self.gpu_name
163
+ ):
164
+ logger.info("Found GPU %s, force to fp32", self.gpu_name)
165
+ self.is_half = False
166
+ self.use_fp32_config()
167
+ else:
168
+ logger.info("Found GPU %s", self.gpu_name)
169
+ self.gpu_mem = int(
170
+ torch.cuda.get_device_properties(i_device).total_memory
171
+ / 1024
172
+ / 1024
173
+ / 1024
174
+ + 0.4
175
+ )
176
+ if self.gpu_mem <= 4:
177
+ self.preprocess_per = 3.0
178
+ elif self.has_mps():
179
+ logger.info("No supported Nvidia GPU found")
180
+ self.device = self.instead = "mps"
181
+ self.is_half = False
182
+ self.use_fp32_config()
183
+ else:
184
+ logger.info("No supported Nvidia GPU found")
185
+ self.device = self.instead = "cpu"
186
+ self.is_half = False
187
+ self.use_fp32_config()
188
+
189
+ if self.n_cpu == 0:
190
+ self.n_cpu = cpu_count()
191
+
192
+ if self.is_half:
193
+ # 6G显存配置
194
+ x_pad = 3
195
+ x_query = 10
196
+ x_center = 60
197
+ x_max = 65
198
+ else:
199
+ # 5G显存配置
200
+ x_pad = 1
201
+ x_query = 6
202
+ x_center = 38
203
+ x_max = 41
204
+
205
+ if self.gpu_mem is not None and self.gpu_mem <= 4:
206
+ x_pad = 1
207
+ x_query = 5
208
+ x_center = 30
209
+ x_max = 32
210
+ if self.dml:
211
+ logger.info("Use DirectML instead")
212
+ if (
213
+ os.path.exists(
214
+ "runtime\Lib\site-packages\onnxruntime\capi\DirectML.dll"
215
+ )
216
+ == False
217
+ ):
218
+ try:
219
+ os.rename(
220
+ "runtime\Lib\site-packages\onnxruntime",
221
+ "runtime\Lib\site-packages\onnxruntime-cuda",
222
+ )
223
+ except:
224
+ pass
225
+ try:
226
+ os.rename(
227
+ "runtime\Lib\site-packages\onnxruntime-dml",
228
+ "runtime\Lib\site-packages\onnxruntime",
229
+ )
230
+ except:
231
+ pass
232
+ # if self.device != "cpu":
233
+ import torch_directml
234
+
235
+ self.device = torch_directml.device(torch_directml.default_device())
236
+ self.is_half = False
237
+ else:
238
+ if self.instead:
239
+ logger.info(f"Use {self.instead} instead")
240
+ if (
241
+ os.path.exists(
242
+ "runtime\Lib\site-packages\onnxruntime\capi\onnxruntime_providers_cuda.dll"
243
+ )
244
+ == False
245
+ ):
246
+ try:
247
+ os.rename(
248
+ "runtime\Lib\site-packages\onnxruntime",
249
+ "runtime\Lib\site-packages\onnxruntime-dml",
250
+ )
251
+ except:
252
+ pass
253
+ try:
254
+ os.rename(
255
+ "runtime\Lib\site-packages\onnxruntime-cuda",
256
+ "runtime\Lib\site-packages\onnxruntime",
257
+ )
258
+ except:
259
+ pass
260
+ logger.info(
261
+ "Half-precision floating-point: %s, device: %s"
262
+ % (self.is_half, self.device)
263
+ )
264
+ return x_pad, x_query, x_center, x_max
configs/inuse/.gitignore ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ *
2
+ !.gitignore
3
+ !v1
4
+ !v2
configs/inuse/v1/.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ *
2
+ !.gitignore
configs/inuse/v2/.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ *
2
+ !.gitignore
configs/v1/32k.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "train": {
3
+ "log_interval": 200,
4
+ "seed": 1234,
5
+ "epochs": 20000,
6
+ "learning_rate": 1e-4,
7
+ "betas": [0.8, 0.99],
8
+ "eps": 1e-9,
9
+ "batch_size": 4,
10
+ "fp16_run": true,
11
+ "lr_decay": 0.999875,
12
+ "segment_size": 12800,
13
+ "init_lr_ratio": 1,
14
+ "warmup_epochs": 0,
15
+ "c_mel": 45,
16
+ "c_kl": 1.0
17
+ },
18
+ "data": {
19
+ "max_wav_value": 32768.0,
20
+ "sampling_rate": 32000,
21
+ "filter_length": 1024,
22
+ "hop_length": 320,
23
+ "win_length": 1024,
24
+ "n_mel_channels": 80,
25
+ "mel_fmin": 0.0,
26
+ "mel_fmax": null
27
+ },
28
+ "model": {
29
+ "inter_channels": 192,
30
+ "hidden_channels": 192,
31
+ "filter_channels": 768,
32
+ "n_heads": 2,
33
+ "n_layers": 6,
34
+ "kernel_size": 3,
35
+ "p_dropout": 0,
36
+ "resblock": "1",
37
+ "resblock_kernel_sizes": [3,7,11],
38
+ "resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]],
39
+ "upsample_rates": [10,4,2,2,2],
40
+ "upsample_initial_channel": 512,
41
+ "upsample_kernel_sizes": [16,16,4,4,4],
42
+ "use_spectral_norm": false,
43
+ "gin_channels": 256,
44
+ "spk_embed_dim": 109
45
+ }
46
+ }
configs/v1/40k.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "train": {
3
+ "log_interval": 200,
4
+ "seed": 1234,
5
+ "epochs": 20000,
6
+ "learning_rate": 1e-4,
7
+ "betas": [0.8, 0.99],
8
+ "eps": 1e-9,
9
+ "batch_size": 4,
10
+ "fp16_run": true,
11
+ "lr_decay": 0.999875,
12
+ "segment_size": 12800,
13
+ "init_lr_ratio": 1,
14
+ "warmup_epochs": 0,
15
+ "c_mel": 45,
16
+ "c_kl": 1.0
17
+ },
18
+ "data": {
19
+ "max_wav_value": 32768.0,
20
+ "sampling_rate": 40000,
21
+ "filter_length": 2048,
22
+ "hop_length": 400,
23
+ "win_length": 2048,
24
+ "n_mel_channels": 125,
25
+ "mel_fmin": 0.0,
26
+ "mel_fmax": null
27
+ },
28
+ "model": {
29
+ "inter_channels": 192,
30
+ "hidden_channels": 192,
31
+ "filter_channels": 768,
32
+ "n_heads": 2,
33
+ "n_layers": 6,
34
+ "kernel_size": 3,
35
+ "p_dropout": 0,
36
+ "resblock": "1",
37
+ "resblock_kernel_sizes": [3,7,11],
38
+ "resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]],
39
+ "upsample_rates": [10,10,2,2],
40
+ "upsample_initial_channel": 512,
41
+ "upsample_kernel_sizes": [16,16,4,4],
42
+ "use_spectral_norm": false,
43
+ "gin_channels": 256,
44
+ "spk_embed_dim": 109
45
+ }
46
+ }
configs/v1/48k.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "train": {
3
+ "log_interval": 200,
4
+ "seed": 1234,
5
+ "epochs": 20000,
6
+ "learning_rate": 1e-4,
7
+ "betas": [0.8, 0.99],
8
+ "eps": 1e-9,
9
+ "batch_size": 4,
10
+ "fp16_run": true,
11
+ "lr_decay": 0.999875,
12
+ "segment_size": 11520,
13
+ "init_lr_ratio": 1,
14
+ "warmup_epochs": 0,
15
+ "c_mel": 45,
16
+ "c_kl": 1.0
17
+ },
18
+ "data": {
19
+ "max_wav_value": 32768.0,
20
+ "sampling_rate": 48000,
21
+ "filter_length": 2048,
22
+ "hop_length": 480,
23
+ "win_length": 2048,
24
+ "n_mel_channels": 128,
25
+ "mel_fmin": 0.0,
26
+ "mel_fmax": null
27
+ },
28
+ "model": {
29
+ "inter_channels": 192,
30
+ "hidden_channels": 192,
31
+ "filter_channels": 768,
32
+ "n_heads": 2,
33
+ "n_layers": 6,
34
+ "kernel_size": 3,
35
+ "p_dropout": 0,
36
+ "resblock": "1",
37
+ "resblock_kernel_sizes": [3,7,11],
38
+ "resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]],
39
+ "upsample_rates": [10,6,2,2,2],
40
+ "upsample_initial_channel": 512,
41
+ "upsample_kernel_sizes": [16,16,4,4,4],
42
+ "use_spectral_norm": false,
43
+ "gin_channels": 256,
44
+ "spk_embed_dim": 109
45
+ }
46
+ }
configs/v2/32k.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "train": {
3
+ "log_interval": 200,
4
+ "seed": 1234,
5
+ "epochs": 20000,
6
+ "learning_rate": 1e-4,
7
+ "betas": [0.8, 0.99],
8
+ "eps": 1e-9,
9
+ "batch_size": 4,
10
+ "fp16_run": true,
11
+ "lr_decay": 0.999875,
12
+ "segment_size": 12800,
13
+ "init_lr_ratio": 1,
14
+ "warmup_epochs": 0,
15
+ "c_mel": 45,
16
+ "c_kl": 1.0
17
+ },
18
+ "data": {
19
+ "max_wav_value": 32768.0,
20
+ "sampling_rate": 32000,
21
+ "filter_length": 1024,
22
+ "hop_length": 320,
23
+ "win_length": 1024,
24
+ "n_mel_channels": 80,
25
+ "mel_fmin": 0.0,
26
+ "mel_fmax": null
27
+ },
28
+ "model": {
29
+ "inter_channels": 192,
30
+ "hidden_channels": 192,
31
+ "filter_channels": 768,
32
+ "n_heads": 2,
33
+ "n_layers": 6,
34
+ "kernel_size": 3,
35
+ "p_dropout": 0,
36
+ "resblock": "1",
37
+ "resblock_kernel_sizes": [3,7,11],
38
+ "resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]],
39
+ "upsample_rates": [10,8,2,2],
40
+ "upsample_initial_channel": 512,
41
+ "upsample_kernel_sizes": [20,16,4,4],
42
+ "use_spectral_norm": false,
43
+ "gin_channels": 256,
44
+ "spk_embed_dim": 109
45
+ }
46
+ }
configs/v2/48k.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "train": {
3
+ "log_interval": 200,
4
+ "seed": 1234,
5
+ "epochs": 20000,
6
+ "learning_rate": 1e-4,
7
+ "betas": [0.8, 0.99],
8
+ "eps": 1e-9,
9
+ "batch_size": 4,
10
+ "fp16_run": true,
11
+ "lr_decay": 0.999875,
12
+ "segment_size": 17280,
13
+ "init_lr_ratio": 1,
14
+ "warmup_epochs": 0,
15
+ "c_mel": 45,
16
+ "c_kl": 1.0
17
+ },
18
+ "data": {
19
+ "max_wav_value": 32768.0,
20
+ "sampling_rate": 48000,
21
+ "filter_length": 2048,
22
+ "hop_length": 480,
23
+ "win_length": 2048,
24
+ "n_mel_channels": 128,
25
+ "mel_fmin": 0.0,
26
+ "mel_fmax": null
27
+ },
28
+ "model": {
29
+ "inter_channels": 192,
30
+ "hidden_channels": 192,
31
+ "filter_channels": 768,
32
+ "n_heads": 2,
33
+ "n_layers": 6,
34
+ "kernel_size": 3,
35
+ "p_dropout": 0,
36
+ "resblock": "1",
37
+ "resblock_kernel_sizes": [3,7,11],
38
+ "resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]],
39
+ "upsample_rates": [12,10,2,2],
40
+ "upsample_initial_channel": 512,
41
+ "upsample_kernel_sizes": [24,20,4,4],
42
+ "use_spectral_norm": false,
43
+ "gin_channels": 256,
44
+ "spk_embed_dim": 109
45
+ }
46
+ }