Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
add soft-vits and more models
Browse files- README.md +1 -1
- app.py +59 -9
- export_model.py +13 -0
- models.py +370 -363
- saved_model/10/config.json +3 -0
- saved_model/10/cover.jpg +3 -0
- saved_model/10/model.pth +3 -0
- saved_model/11/config.json +3 -0
- saved_model/11/cover.jpg +3 -0
- saved_model/11/model.pth +3 -0
- saved_model/8/config.json +3 -0
- saved_model/8/cover.jpg +3 -0
- saved_model/8/model.pth +3 -0
- saved_model/9/config.json +3 -0
- saved_model/9/cover.jpg +3 -0
- saved_model/9/model.pth +3 -0
- saved_model/info.json +2 -2
README.md
CHANGED
@@ -4,7 +4,7 @@ emoji: πποΈ
|
|
4 |
colorFrom: red
|
5 |
colorTo: pink
|
6 |
sdk: gradio
|
7 |
-
sdk_version: 3.
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
license: mit
|
|
|
4 |
colorFrom: red
|
5 |
colorTo: pink
|
6 |
sdk: gradio
|
7 |
+
sdk_version: 3.3
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
license: mit
|
app.py
CHANGED
@@ -84,6 +84,34 @@ def create_vc_fn(model, hps, speaker_ids):
|
|
84 |
return vc_fn
|
85 |
|
86 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
87 |
def create_to_phoneme_fn(hps):
|
88 |
def to_phoneme_fn(text):
|
89 |
return _clean_text(text, hps.data.text_cleaners) if text != "" else ""
|
@@ -110,7 +138,9 @@ css = """
|
|
110 |
"""
|
111 |
|
112 |
if __name__ == '__main__':
|
113 |
-
|
|
|
|
|
114 |
with open("saved_model/info.json", "r", encoding="utf-8") as f:
|
115 |
models_info = json.load(f)
|
116 |
for i, info in models_info.items():
|
@@ -132,9 +162,16 @@ if __name__ == '__main__':
|
|
132 |
speaker_ids = [sid for sid, name in enumerate(hps.speakers) if name != "None"]
|
133 |
speakers = [name for sid, name in enumerate(hps.speakers) if name != "None"]
|
134 |
|
135 |
-
|
136 |
-
|
137 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
138 |
|
139 |
app = gr.Blocks(css=css)
|
140 |
|
@@ -144,13 +181,14 @@ if __name__ == '__main__':
|
|
144 |
"unofficial demo for \n\n"
|
145 |
"- [https://github.com/CjangCjengh/MoeGoe](https://github.com/CjangCjengh/MoeGoe)\n"
|
146 |
"- [https://github.com/Francis-Komizu/VITS](https://github.com/Francis-Komizu/VITS)\n"
|
147 |
-
"- [https://github.com/luoyily/MoeTTS](https://github.com/luoyily/MoeTTS)"
|
|
|
148 |
)
|
149 |
with gr.Tabs():
|
150 |
with gr.TabItem("TTS"):
|
151 |
with gr.Tabs():
|
152 |
-
for i, (name, lang, example,
|
153 |
-
|
154 |
with gr.TabItem(f"model{i}"):
|
155 |
with gr.Column():
|
156 |
gr.Markdown(f"## {name}\n\n"
|
@@ -204,8 +242,7 @@ if __name__ == '__main__':
|
|
204 |
|
205 |
with gr.TabItem("Voice Conversion"):
|
206 |
with gr.Tabs():
|
207 |
-
for i, (name,
|
208 |
-
symbols, tts_fn, vc_fn, to_phoneme_fn) in enumerate(models):
|
209 |
with gr.TabItem(f"model{i}"):
|
210 |
gr.Markdown(f"## {name}\n\n"
|
211 |
f"![cover](file/{cover_path})")
|
@@ -218,4 +255,17 @@ if __name__ == '__main__':
|
|
218 |
vc_output1 = gr.Textbox(label="Output Message")
|
219 |
vc_output2 = gr.Audio(label="Output Audio")
|
220 |
vc_submit.click(vc_fn, [vc_input1, vc_input2, vc_input3], [vc_output1, vc_output2])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
221 |
app.launch()
|
|
|
84 |
return vc_fn
|
85 |
|
86 |
|
87 |
+
def create_soft_vc_fn(model, hps, speaker_ids):
|
88 |
+
def soft_vc_fn(target_speaker, input_audio):
|
89 |
+
if input_audio is None:
|
90 |
+
return "You need to upload an audio", None
|
91 |
+
sampling_rate, audio = input_audio
|
92 |
+
duration = audio.shape[0] / sampling_rate
|
93 |
+
if limitation and duration > 15:
|
94 |
+
return "Error: Audio is too long", None
|
95 |
+
target_speaker_id = speaker_ids[target_speaker]
|
96 |
+
|
97 |
+
audio = (audio / np.iinfo(audio.dtype).max).astype(np.float32)
|
98 |
+
if len(audio.shape) > 1:
|
99 |
+
audio = librosa.to_mono(audio.transpose(1, 0))
|
100 |
+
if sampling_rate != 16000:
|
101 |
+
audio = librosa.resample(audio, orig_sr=sampling_rate, target_sr=16000)
|
102 |
+
with torch.inference_mode():
|
103 |
+
units = hubert.units(torch.FloatTensor(audio).unsqueeze(0).unsqueeze(0))
|
104 |
+
with no_grad():
|
105 |
+
unit_lengths = LongTensor([units.size(1)])
|
106 |
+
sid = LongTensor([target_speaker_id])
|
107 |
+
audio = model.infer(units, unit_lengths, sid=sid, noise_scale=.667,
|
108 |
+
noise_scale_w=0.8)[0][0, 0].data.cpu().float().numpy()
|
109 |
+
del units, unit_lengths, sid
|
110 |
+
return "Success", (hps.data.sampling_rate, audio)
|
111 |
+
|
112 |
+
return soft_vc_fn
|
113 |
+
|
114 |
+
|
115 |
def create_to_phoneme_fn(hps):
|
116 |
def to_phoneme_fn(text):
|
117 |
return _clean_text(text, hps.data.text_cleaners) if text != "" else ""
|
|
|
138 |
"""
|
139 |
|
140 |
if __name__ == '__main__':
|
141 |
+
models_tts = []
|
142 |
+
models_vc = []
|
143 |
+
models_soft_vc = []
|
144 |
with open("saved_model/info.json", "r", encoding="utf-8") as f:
|
145 |
models_info = json.load(f)
|
146 |
for i, info in models_info.items():
|
|
|
162 |
speaker_ids = [sid for sid, name in enumerate(hps.speakers) if name != "None"]
|
163 |
speakers = [name for sid, name in enumerate(hps.speakers) if name != "None"]
|
164 |
|
165 |
+
t = info["type"]
|
166 |
+
if t == "vits":
|
167 |
+
models_tts.append((name, cover_path, speakers, lang, example,
|
168 |
+
hps.symbols, create_tts_fn(model, hps, speaker_ids),
|
169 |
+
create_to_phoneme_fn(hps)))
|
170 |
+
models_vc.append((name, cover_path, speakers, create_vc_fn(model, hps, speaker_ids)))
|
171 |
+
elif t == "soft-vits-vc":
|
172 |
+
models_soft_vc.append((name, cover_path, speakers, create_soft_vc_fn(model, hps, speaker_ids)))
|
173 |
+
|
174 |
+
hubert = torch.hub.load("bshall/hubert:main", "hubert_soft")
|
175 |
|
176 |
app = gr.Blocks(css=css)
|
177 |
|
|
|
181 |
"unofficial demo for \n\n"
|
182 |
"- [https://github.com/CjangCjengh/MoeGoe](https://github.com/CjangCjengh/MoeGoe)\n"
|
183 |
"- [https://github.com/Francis-Komizu/VITS](https://github.com/Francis-Komizu/VITS)\n"
|
184 |
+
"- [https://github.com/luoyily/MoeTTS](https://github.com/luoyily/MoeTTS)\n"
|
185 |
+
"- [https://github.com/Francis-Komizu/Sovits](https://github.com/Francis-Komizu/Sovits)"
|
186 |
)
|
187 |
with gr.Tabs():
|
188 |
with gr.TabItem("TTS"):
|
189 |
with gr.Tabs():
|
190 |
+
for i, (name, cover_path, speakers, lang, example, symbols, tts_fn,
|
191 |
+
to_phoneme_fn) in enumerate(models_tts):
|
192 |
with gr.TabItem(f"model{i}"):
|
193 |
with gr.Column():
|
194 |
gr.Markdown(f"## {name}\n\n"
|
|
|
242 |
|
243 |
with gr.TabItem("Voice Conversion"):
|
244 |
with gr.Tabs():
|
245 |
+
for i, (name, cover_path, speakers, vc_fn) in enumerate(models_vc):
|
|
|
246 |
with gr.TabItem(f"model{i}"):
|
247 |
gr.Markdown(f"## {name}\n\n"
|
248 |
f"![cover](file/{cover_path})")
|
|
|
255 |
vc_output1 = gr.Textbox(label="Output Message")
|
256 |
vc_output2 = gr.Audio(label="Output Audio")
|
257 |
vc_submit.click(vc_fn, [vc_input1, vc_input2, vc_input3], [vc_output1, vc_output2])
|
258 |
+
with gr.TabItem("Soft Voice Conversion"):
|
259 |
+
with gr.Tabs():
|
260 |
+
for i, (name, cover_path, speakers,soft_vc_fn) in enumerate(models_soft_vc):
|
261 |
+
with gr.TabItem(f"model{i}"):
|
262 |
+
gr.Markdown(f"## {name}\n\n"
|
263 |
+
f"![cover](file/{cover_path})")
|
264 |
+
vc_input1 = gr.Dropdown(label="Target Speaker", choices=speakers, type="index",
|
265 |
+
value=speakers[0])
|
266 |
+
vc_input2 = gr.Audio(label="Input Audio (15s limitation)")
|
267 |
+
vc_submit = gr.Button("Convert", variant="primary")
|
268 |
+
vc_output1 = gr.Textbox(label="Output Message")
|
269 |
+
vc_output2 = gr.Audio(label="Output Audio")
|
270 |
+
vc_submit.click(soft_vc_fn, [vc_input1, vc_input2], [vc_output1, vc_output2])
|
271 |
app.launch()
|
export_model.py
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
|
3 |
+
if __name__ == '__main__':
|
4 |
+
model_path = "saved_model/11/model.pth"
|
5 |
+
output_path = "saved_model/11/model1.pth"
|
6 |
+
checkpoint_dict = torch.load(model_path, map_location='cpu')
|
7 |
+
checkpoint_dict_new = {}
|
8 |
+
for k, v in checkpoint_dict.items():
|
9 |
+
if k == "optimizer":
|
10 |
+
print("remove optimizer")
|
11 |
+
continue
|
12 |
+
checkpoint_dict_new[k] = v
|
13 |
+
torch.save(checkpoint_dict_new, output_path)
|
models.py
CHANGED
@@ -14,234 +14,239 @@ from commons import init_weights, get_padding
|
|
14 |
|
15 |
|
16 |
class StochasticDurationPredictor(nn.Module):
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
|
96 |
|
97 |
class DurationPredictor(nn.Module):
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
|
133 |
|
134 |
class TextEncoder(nn.Module):
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
|
|
|
|
176 |
|
177 |
|
178 |
class ResidualCouplingBlock(nn.Module):
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
|
|
|
|
209 |
|
210 |
|
211 |
class PosteriorEncoder(nn.Module):
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
|
242 |
|
243 |
class Generator(torch.nn.Module):
|
244 |
-
def __init__(self, initial_channel, resblock, resblock_kernel_sizes, resblock_dilation_sizes, upsample_rates,
|
|
|
245 |
super(Generator, self).__init__()
|
246 |
self.num_kernels = len(resblock_kernel_sizes)
|
247 |
self.num_upsamples = len(upsample_rates)
|
@@ -251,12 +256,12 @@ class Generator(torch.nn.Module):
|
|
251 |
self.ups = nn.ModuleList()
|
252 |
for i, (u, k) in enumerate(zip(upsample_rates, upsample_kernel_sizes)):
|
253 |
self.ups.append(weight_norm(
|
254 |
-
ConvTranspose1d(upsample_initial_channel//(2**i), upsample_initial_channel//(2**(i+1)),
|
255 |
-
k, u, padding=(k-u)//2)))
|
256 |
|
257 |
self.resblocks = nn.ModuleList()
|
258 |
for i in range(len(self.ups)):
|
259 |
-
ch = upsample_initial_channel//(2**(i+1))
|
260 |
for j, (k, d) in enumerate(zip(resblock_kernel_sizes, resblock_dilation_sizes)):
|
261 |
self.resblocks.append(resblock(ch, k, d))
|
262 |
|
@@ -269,7 +274,7 @@ class Generator(torch.nn.Module):
|
|
269 |
def forward(self, x, g=None):
|
270 |
x = self.conv_pre(x)
|
271 |
if g is not None:
|
272 |
-
|
273 |
|
274 |
for i in range(self.num_upsamples):
|
275 |
x = F.leaky_relu(x, modules.LRELU_SLOPE)
|
@@ -277,9 +282,9 @@ class Generator(torch.nn.Module):
|
|
277 |
xs = None
|
278 |
for j in range(self.num_kernels):
|
279 |
if xs is None:
|
280 |
-
xs = self.resblocks[i*self.num_kernels+j](x)
|
281 |
else:
|
282 |
-
xs += self.resblocks[i*self.num_kernels+j](x)
|
283 |
x = xs / self.num_kernels
|
284 |
x = F.leaky_relu(x)
|
285 |
x = self.conv_post(x)
|
@@ -315,7 +320,7 @@ class DiscriminatorP(torch.nn.Module):
|
|
315 |
|
316 |
# 1d to 2d
|
317 |
b, c, t = x.shape
|
318 |
-
if t % self.period != 0:
|
319 |
n_pad = self.period - (t % self.period)
|
320 |
x = F.pad(x, (0, n_pad), "reflect")
|
321 |
t = t + n_pad
|
@@ -363,7 +368,7 @@ class DiscriminatorS(torch.nn.Module):
|
|
363 |
class MultiPeriodDiscriminator(torch.nn.Module):
|
364 |
def __init__(self, use_spectral_norm=False):
|
365 |
super(MultiPeriodDiscriminator, self).__init__()
|
366 |
-
periods = [2,3,5,7,11]
|
367 |
|
368 |
discs = [DiscriminatorS(use_spectral_norm=use_spectral_norm)]
|
369 |
discs = discs + [DiscriminatorP(i, use_spectral_norm=use_spectral_norm) for i in periods]
|
@@ -385,149 +390,151 @@ class MultiPeriodDiscriminator(torch.nn.Module):
|
|
385 |
return y_d_rs, y_d_gs, fmap_rs, fmap_gs
|
386 |
|
387 |
|
388 |
-
|
389 |
class SynthesizerTrn(nn.Module):
|
390 |
-
|
391 |
Synthesizer for Training
|
392 |
"""
|
393 |
|
394 |
-
|
395 |
-
|
396 |
-
|
397 |
-
|
398 |
-
|
399 |
-
|
400 |
-
|
401 |
-
|
402 |
-
|
403 |
-
|
404 |
-
|
405 |
-
|
406 |
-
|
407 |
-
|
408 |
-
|
409 |
-
|
410 |
-
|
411 |
-
|
412 |
-
|
413 |
-
|
414 |
-
|
415 |
-
|
416 |
-
|
417 |
-
|
418 |
-
|
419 |
-
|
420 |
-
|
421 |
-
|
422 |
-
|
423 |
-
|
424 |
-
|
425 |
-
|
426 |
-
|
427 |
-
|
428 |
-
|
429 |
-
|
430 |
-
|
431 |
-
|
432 |
-
|
433 |
-
|
434 |
-
|
435 |
-
|
436 |
-
|
437 |
-
|
438 |
-
|
439 |
-
|
440 |
-
|
441 |
-
|
442 |
-
|
443 |
-
|
444 |
-
|
445 |
-
|
446 |
-
|
447 |
-
|
448 |
-
|
449 |
-
|
450 |
-
|
451 |
-
|
452 |
-
|
453 |
-
|
454 |
-
|
455 |
-
|
456 |
-
|
457 |
-
|
458 |
-
|
459 |
-
|
460 |
-
x,
|
461 |
-
|
462 |
-
|
463 |
-
|
464 |
-
|
465 |
-
|
466 |
-
|
467 |
-
|
468 |
-
|
469 |
-
|
470 |
-
|
471 |
-
|
472 |
-
|
473 |
-
|
474 |
-
|
475 |
-
|
476 |
-
|
477 |
-
|
478 |
-
|
479 |
-
|
480 |
-
|
481 |
-
|
482 |
-
|
483 |
-
|
484 |
-
|
485 |
-
|
486 |
-
|
487 |
-
|
488 |
-
|
489 |
-
|
490 |
-
|
491 |
-
|
492 |
-
|
493 |
-
|
494 |
-
|
495 |
-
|
496 |
-
|
497 |
-
|
498 |
-
|
499 |
-
|
500 |
-
|
501 |
-
|
502 |
-
|
503 |
-
|
504 |
-
|
505 |
-
|
506 |
-
|
507 |
-
|
508 |
-
|
509 |
-
|
510 |
-
|
511 |
-
|
512 |
-
|
513 |
-
|
514 |
-
|
515 |
-
|
516 |
-
|
517 |
-
|
518 |
-
|
519 |
-
|
520 |
-
|
521 |
-
|
522 |
-
|
523 |
-
|
524 |
-
|
525 |
-
|
526 |
-
|
527 |
-
|
528 |
-
|
529 |
-
|
530 |
-
|
531 |
-
|
532 |
-
|
533 |
-
|
|
|
|
|
|
|
|
14 |
|
15 |
|
16 |
class StochasticDurationPredictor(nn.Module):
|
17 |
+
def __init__(self, in_channels, filter_channels, kernel_size, p_dropout, n_flows=4, gin_channels=0):
|
18 |
+
super().__init__()
|
19 |
+
filter_channels = in_channels # it needs to be removed from future version.
|
20 |
+
self.in_channels = in_channels
|
21 |
+
self.filter_channels = filter_channels
|
22 |
+
self.kernel_size = kernel_size
|
23 |
+
self.p_dropout = p_dropout
|
24 |
+
self.n_flows = n_flows
|
25 |
+
self.gin_channels = gin_channels
|
26 |
+
|
27 |
+
self.log_flow = modules.Log()
|
28 |
+
self.flows = nn.ModuleList()
|
29 |
+
self.flows.append(modules.ElementwiseAffine(2))
|
30 |
+
for i in range(n_flows):
|
31 |
+
self.flows.append(modules.ConvFlow(2, filter_channels, kernel_size, n_layers=3))
|
32 |
+
self.flows.append(modules.Flip())
|
33 |
+
|
34 |
+
self.post_pre = nn.Conv1d(1, filter_channels, 1)
|
35 |
+
self.post_proj = nn.Conv1d(filter_channels, filter_channels, 1)
|
36 |
+
self.post_convs = modules.DDSConv(filter_channels, kernel_size, n_layers=3, p_dropout=p_dropout)
|
37 |
+
self.post_flows = nn.ModuleList()
|
38 |
+
self.post_flows.append(modules.ElementwiseAffine(2))
|
39 |
+
for i in range(4):
|
40 |
+
self.post_flows.append(modules.ConvFlow(2, filter_channels, kernel_size, n_layers=3))
|
41 |
+
self.post_flows.append(modules.Flip())
|
42 |
+
|
43 |
+
self.pre = nn.Conv1d(in_channels, filter_channels, 1)
|
44 |
+
self.proj = nn.Conv1d(filter_channels, filter_channels, 1)
|
45 |
+
self.convs = modules.DDSConv(filter_channels, kernel_size, n_layers=3, p_dropout=p_dropout)
|
46 |
+
if gin_channels != 0:
|
47 |
+
self.cond = nn.Conv1d(gin_channels, filter_channels, 1)
|
48 |
+
|
49 |
+
def forward(self, x, x_mask, w=None, g=None, reverse=False, noise_scale=1.0):
|
50 |
+
x = torch.detach(x)
|
51 |
+
x = self.pre(x)
|
52 |
+
if g is not None:
|
53 |
+
g = torch.detach(g)
|
54 |
+
x = x + self.cond(g)
|
55 |
+
x = self.convs(x, x_mask)
|
56 |
+
x = self.proj(x) * x_mask
|
57 |
+
|
58 |
+
if not reverse:
|
59 |
+
flows = self.flows
|
60 |
+
assert w is not None
|
61 |
+
|
62 |
+
logdet_tot_q = 0
|
63 |
+
h_w = self.post_pre(w)
|
64 |
+
h_w = self.post_convs(h_w, x_mask)
|
65 |
+
h_w = self.post_proj(h_w) * x_mask
|
66 |
+
e_q = torch.randn(w.size(0), 2, w.size(2)).to(device=x.device, dtype=x.dtype) * x_mask
|
67 |
+
z_q = e_q
|
68 |
+
for flow in self.post_flows:
|
69 |
+
z_q, logdet_q = flow(z_q, x_mask, g=(x + h_w))
|
70 |
+
logdet_tot_q += logdet_q
|
71 |
+
z_u, z1 = torch.split(z_q, [1, 1], 1)
|
72 |
+
u = torch.sigmoid(z_u) * x_mask
|
73 |
+
z0 = (w - u) * x_mask
|
74 |
+
logdet_tot_q += torch.sum((F.logsigmoid(z_u) + F.logsigmoid(-z_u)) * x_mask, [1, 2])
|
75 |
+
logq = torch.sum(-0.5 * (math.log(2 * math.pi) + (e_q ** 2)) * x_mask, [1, 2]) - logdet_tot_q
|
76 |
+
|
77 |
+
logdet_tot = 0
|
78 |
+
z0, logdet = self.log_flow(z0, x_mask)
|
79 |
+
logdet_tot += logdet
|
80 |
+
z = torch.cat([z0, z1], 1)
|
81 |
+
for flow in flows:
|
82 |
+
z, logdet = flow(z, x_mask, g=x, reverse=reverse)
|
83 |
+
logdet_tot = logdet_tot + logdet
|
84 |
+
nll = torch.sum(0.5 * (math.log(2 * math.pi) + (z ** 2)) * x_mask, [1, 2]) - logdet_tot
|
85 |
+
return nll + logq # [b]
|
86 |
+
else:
|
87 |
+
flows = list(reversed(self.flows))
|
88 |
+
flows = flows[:-2] + [flows[-1]] # remove a useless vflow
|
89 |
+
z = torch.randn(x.size(0), 2, x.size(2)).to(device=x.device, dtype=x.dtype) * noise_scale
|
90 |
+
for flow in flows:
|
91 |
+
z = flow(z, x_mask, g=x, reverse=reverse)
|
92 |
+
z0, z1 = torch.split(z, [1, 1], 1)
|
93 |
+
logw = z0
|
94 |
+
return logw
|
95 |
|
96 |
|
97 |
class DurationPredictor(nn.Module):
|
98 |
+
def __init__(self, in_channels, filter_channels, kernel_size, p_dropout, gin_channels=0):
|
99 |
+
super().__init__()
|
100 |
+
|
101 |
+
self.in_channels = in_channels
|
102 |
+
self.filter_channels = filter_channels
|
103 |
+
self.kernel_size = kernel_size
|
104 |
+
self.p_dropout = p_dropout
|
105 |
+
self.gin_channels = gin_channels
|
106 |
+
|
107 |
+
self.drop = nn.Dropout(p_dropout)
|
108 |
+
self.conv_1 = nn.Conv1d(in_channels, filter_channels, kernel_size, padding=kernel_size // 2)
|
109 |
+
self.norm_1 = modules.LayerNorm(filter_channels)
|
110 |
+
self.conv_2 = nn.Conv1d(filter_channels, filter_channels, kernel_size, padding=kernel_size // 2)
|
111 |
+
self.norm_2 = modules.LayerNorm(filter_channels)
|
112 |
+
self.proj = nn.Conv1d(filter_channels, 1, 1)
|
113 |
+
|
114 |
+
if gin_channels != 0:
|
115 |
+
self.cond = nn.Conv1d(gin_channels, in_channels, 1)
|
116 |
+
|
117 |
+
def forward(self, x, x_mask, g=None):
|
118 |
+
x = torch.detach(x)
|
119 |
+
if g is not None:
|
120 |
+
g = torch.detach(g)
|
121 |
+
x = x + self.cond(g)
|
122 |
+
x = self.conv_1(x * x_mask)
|
123 |
+
x = torch.relu(x)
|
124 |
+
x = self.norm_1(x)
|
125 |
+
x = self.drop(x)
|
126 |
+
x = self.conv_2(x * x_mask)
|
127 |
+
x = torch.relu(x)
|
128 |
+
x = self.norm_2(x)
|
129 |
+
x = self.drop(x)
|
130 |
+
x = self.proj(x * x_mask)
|
131 |
+
return x * x_mask
|
132 |
|
133 |
|
134 |
class TextEncoder(nn.Module):
|
135 |
+
def __init__(self,
|
136 |
+
n_vocab,
|
137 |
+
out_channels,
|
138 |
+
hidden_channels,
|
139 |
+
filter_channels,
|
140 |
+
n_heads,
|
141 |
+
n_layers,
|
142 |
+
kernel_size,
|
143 |
+
p_dropout):
|
144 |
+
super().__init__()
|
145 |
+
self.n_vocab = n_vocab
|
146 |
+
self.out_channels = out_channels
|
147 |
+
self.hidden_channels = hidden_channels
|
148 |
+
self.filter_channels = filter_channels
|
149 |
+
self.n_heads = n_heads
|
150 |
+
self.n_layers = n_layers
|
151 |
+
self.kernel_size = kernel_size
|
152 |
+
self.p_dropout = p_dropout
|
153 |
+
|
154 |
+
if self.n_vocab != 0:
|
155 |
+
self.emb = nn.Embedding(n_vocab, hidden_channels)
|
156 |
+
nn.init.normal_(self.emb.weight, 0.0, hidden_channels ** -0.5)
|
157 |
+
|
158 |
+
self.encoder = attentions.Encoder(
|
159 |
+
hidden_channels,
|
160 |
+
filter_channels,
|
161 |
+
n_heads,
|
162 |
+
n_layers,
|
163 |
+
kernel_size,
|
164 |
+
p_dropout)
|
165 |
+
self.proj = nn.Conv1d(hidden_channels, out_channels * 2, 1)
|
166 |
+
|
167 |
+
def forward(self, x, x_lengths):
|
168 |
+
if self.n_vocab != 0:
|
169 |
+
x = self.emb(x) * math.sqrt(self.hidden_channels) # [b, t, h]
|
170 |
+
x = torch.transpose(x, 1, -1) # [b, h, t]
|
171 |
+
x_mask = torch.unsqueeze(commons.sequence_mask(x_lengths, x.size(2)), 1).to(x.dtype)
|
172 |
+
|
173 |
+
x = self.encoder(x * x_mask, x_mask)
|
174 |
+
stats = self.proj(x) * x_mask
|
175 |
+
|
176 |
+
m, logs = torch.split(stats, self.out_channels, dim=1)
|
177 |
+
return x, m, logs, x_mask
|
178 |
|
179 |
|
180 |
class ResidualCouplingBlock(nn.Module):
|
181 |
+
def __init__(self,
|
182 |
+
channels,
|
183 |
+
hidden_channels,
|
184 |
+
kernel_size,
|
185 |
+
dilation_rate,
|
186 |
+
n_layers,
|
187 |
+
n_flows=4,
|
188 |
+
gin_channels=0):
|
189 |
+
super().__init__()
|
190 |
+
self.channels = channels
|
191 |
+
self.hidden_channels = hidden_channels
|
192 |
+
self.kernel_size = kernel_size
|
193 |
+
self.dilation_rate = dilation_rate
|
194 |
+
self.n_layers = n_layers
|
195 |
+
self.n_flows = n_flows
|
196 |
+
self.gin_channels = gin_channels
|
197 |
+
|
198 |
+
self.flows = nn.ModuleList()
|
199 |
+
for i in range(n_flows):
|
200 |
+
self.flows.append(
|
201 |
+
modules.ResidualCouplingLayer(channels, hidden_channels, kernel_size, dilation_rate, n_layers,
|
202 |
+
gin_channels=gin_channels, mean_only=True))
|
203 |
+
self.flows.append(modules.Flip())
|
204 |
+
|
205 |
+
def forward(self, x, x_mask, g=None, reverse=False):
|
206 |
+
if not reverse:
|
207 |
+
for flow in self.flows:
|
208 |
+
x, _ = flow(x, x_mask, g=g, reverse=reverse)
|
209 |
+
else:
|
210 |
+
for flow in reversed(self.flows):
|
211 |
+
x = flow(x, x_mask, g=g, reverse=reverse)
|
212 |
+
return x
|
213 |
|
214 |
|
215 |
class PosteriorEncoder(nn.Module):
|
216 |
+
def __init__(self,
|
217 |
+
in_channels,
|
218 |
+
out_channels,
|
219 |
+
hidden_channels,
|
220 |
+
kernel_size,
|
221 |
+
dilation_rate,
|
222 |
+
n_layers,
|
223 |
+
gin_channels=0):
|
224 |
+
super().__init__()
|
225 |
+
self.in_channels = in_channels
|
226 |
+
self.out_channels = out_channels
|
227 |
+
self.hidden_channels = hidden_channels
|
228 |
+
self.kernel_size = kernel_size
|
229 |
+
self.dilation_rate = dilation_rate
|
230 |
+
self.n_layers = n_layers
|
231 |
+
self.gin_channels = gin_channels
|
232 |
+
|
233 |
+
self.pre = nn.Conv1d(in_channels, hidden_channels, 1)
|
234 |
+
self.enc = modules.WN(hidden_channels, kernel_size, dilation_rate, n_layers, gin_channels=gin_channels)
|
235 |
+
self.proj = nn.Conv1d(hidden_channels, out_channels * 2, 1)
|
236 |
+
|
237 |
+
def forward(self, x, x_lengths, g=None):
|
238 |
+
x_mask = torch.unsqueeze(commons.sequence_mask(x_lengths, x.size(2)), 1).to(x.dtype)
|
239 |
+
x = self.pre(x) * x_mask
|
240 |
+
x = self.enc(x, x_mask, g=g)
|
241 |
+
stats = self.proj(x) * x_mask
|
242 |
+
m, logs = torch.split(stats, self.out_channels, dim=1)
|
243 |
+
z = (m + torch.randn_like(m) * torch.exp(logs)) * x_mask
|
244 |
+
return z, m, logs, x_mask
|
245 |
|
246 |
|
247 |
class Generator(torch.nn.Module):
|
248 |
+
def __init__(self, initial_channel, resblock, resblock_kernel_sizes, resblock_dilation_sizes, upsample_rates,
|
249 |
+
upsample_initial_channel, upsample_kernel_sizes, gin_channels=0):
|
250 |
super(Generator, self).__init__()
|
251 |
self.num_kernels = len(resblock_kernel_sizes)
|
252 |
self.num_upsamples = len(upsample_rates)
|
|
|
256 |
self.ups = nn.ModuleList()
|
257 |
for i, (u, k) in enumerate(zip(upsample_rates, upsample_kernel_sizes)):
|
258 |
self.ups.append(weight_norm(
|
259 |
+
ConvTranspose1d(upsample_initial_channel // (2 ** i), upsample_initial_channel // (2 ** (i + 1)),
|
260 |
+
k, u, padding=(k - u) // 2)))
|
261 |
|
262 |
self.resblocks = nn.ModuleList()
|
263 |
for i in range(len(self.ups)):
|
264 |
+
ch = upsample_initial_channel // (2 ** (i + 1))
|
265 |
for j, (k, d) in enumerate(zip(resblock_kernel_sizes, resblock_dilation_sizes)):
|
266 |
self.resblocks.append(resblock(ch, k, d))
|
267 |
|
|
|
274 |
def forward(self, x, g=None):
|
275 |
x = self.conv_pre(x)
|
276 |
if g is not None:
|
277 |
+
x = x + self.cond(g)
|
278 |
|
279 |
for i in range(self.num_upsamples):
|
280 |
x = F.leaky_relu(x, modules.LRELU_SLOPE)
|
|
|
282 |
xs = None
|
283 |
for j in range(self.num_kernels):
|
284 |
if xs is None:
|
285 |
+
xs = self.resblocks[i * self.num_kernels + j](x)
|
286 |
else:
|
287 |
+
xs += self.resblocks[i * self.num_kernels + j](x)
|
288 |
x = xs / self.num_kernels
|
289 |
x = F.leaky_relu(x)
|
290 |
x = self.conv_post(x)
|
|
|
320 |
|
321 |
# 1d to 2d
|
322 |
b, c, t = x.shape
|
323 |
+
if t % self.period != 0: # pad first
|
324 |
n_pad = self.period - (t % self.period)
|
325 |
x = F.pad(x, (0, n_pad), "reflect")
|
326 |
t = t + n_pad
|
|
|
368 |
class MultiPeriodDiscriminator(torch.nn.Module):
|
369 |
def __init__(self, use_spectral_norm=False):
|
370 |
super(MultiPeriodDiscriminator, self).__init__()
|
371 |
+
periods = [2, 3, 5, 7, 11]
|
372 |
|
373 |
discs = [DiscriminatorS(use_spectral_norm=use_spectral_norm)]
|
374 |
discs = discs + [DiscriminatorP(i, use_spectral_norm=use_spectral_norm) for i in periods]
|
|
|
390 |
return y_d_rs, y_d_gs, fmap_rs, fmap_gs
|
391 |
|
392 |
|
|
|
393 |
class SynthesizerTrn(nn.Module):
|
394 |
+
"""
|
395 |
Synthesizer for Training
|
396 |
"""
|
397 |
|
398 |
+
def __init__(self,
|
399 |
+
n_vocab,
|
400 |
+
spec_channels,
|
401 |
+
segment_size,
|
402 |
+
inter_channels,
|
403 |
+
hidden_channels,
|
404 |
+
filter_channels,
|
405 |
+
n_heads,
|
406 |
+
n_layers,
|
407 |
+
kernel_size,
|
408 |
+
p_dropout,
|
409 |
+
resblock,
|
410 |
+
resblock_kernel_sizes,
|
411 |
+
resblock_dilation_sizes,
|
412 |
+
upsample_rates,
|
413 |
+
upsample_initial_channel,
|
414 |
+
upsample_kernel_sizes,
|
415 |
+
n_speakers=0,
|
416 |
+
gin_channels=0,
|
417 |
+
use_sdp=True,
|
418 |
+
**kwargs):
|
419 |
+
|
420 |
+
super().__init__()
|
421 |
+
self.n_vocab = n_vocab
|
422 |
+
self.spec_channels = spec_channels
|
423 |
+
self.inter_channels = inter_channels
|
424 |
+
self.hidden_channels = hidden_channels
|
425 |
+
self.filter_channels = filter_channels
|
426 |
+
self.n_heads = n_heads
|
427 |
+
self.n_layers = n_layers
|
428 |
+
self.kernel_size = kernel_size
|
429 |
+
self.p_dropout = p_dropout
|
430 |
+
self.resblock = resblock
|
431 |
+
self.resblock_kernel_sizes = resblock_kernel_sizes
|
432 |
+
self.resblock_dilation_sizes = resblock_dilation_sizes
|
433 |
+
self.upsample_rates = upsample_rates
|
434 |
+
self.upsample_initial_channel = upsample_initial_channel
|
435 |
+
self.upsample_kernel_sizes = upsample_kernel_sizes
|
436 |
+
self.segment_size = segment_size
|
437 |
+
self.n_speakers = n_speakers
|
438 |
+
self.gin_channels = gin_channels
|
439 |
+
|
440 |
+
self.use_sdp = use_sdp
|
441 |
+
|
442 |
+
self.enc_p = TextEncoder(n_vocab,
|
443 |
+
inter_channels,
|
444 |
+
hidden_channels,
|
445 |
+
filter_channels,
|
446 |
+
n_heads,
|
447 |
+
n_layers,
|
448 |
+
kernel_size,
|
449 |
+
p_dropout)
|
450 |
+
self.dec = Generator(inter_channels, resblock, resblock_kernel_sizes, resblock_dilation_sizes, upsample_rates,
|
451 |
+
upsample_initial_channel, upsample_kernel_sizes, gin_channels=gin_channels)
|
452 |
+
self.enc_q = PosteriorEncoder(spec_channels, inter_channels, hidden_channels, 5, 1, 16,
|
453 |
+
gin_channels=gin_channels)
|
454 |
+
self.flow = ResidualCouplingBlock(inter_channels, hidden_channels, 5, 1, 4, gin_channels=gin_channels)
|
455 |
+
|
456 |
+
if use_sdp:
|
457 |
+
self.dp = StochasticDurationPredictor(hidden_channels, 192, 3, 0.5, 4, gin_channels=gin_channels)
|
458 |
+
else:
|
459 |
+
self.dp = DurationPredictor(hidden_channels, 256, 3, 0.5, gin_channels=gin_channels)
|
460 |
+
|
461 |
+
if n_speakers > 1:
|
462 |
+
self.emb_g = nn.Embedding(n_speakers, gin_channels)
|
463 |
+
|
464 |
+
def forward(self, x, x_lengths, y, y_lengths, sid=None):
|
465 |
+
|
466 |
+
x, m_p, logs_p, x_mask = self.enc_p(x, x_lengths)
|
467 |
+
if self.n_speakers > 1:
|
468 |
+
g = self.emb_g(sid).unsqueeze(-1) # [b, h, 1]
|
469 |
+
else:
|
470 |
+
g = None
|
471 |
+
|
472 |
+
z, m_q, logs_q, y_mask = self.enc_q(y, y_lengths, g=g)
|
473 |
+
z_p = self.flow(z, y_mask, g=g)
|
474 |
+
|
475 |
+
with torch.no_grad():
|
476 |
+
# negative cross-entropy
|
477 |
+
s_p_sq_r = torch.exp(-2 * logs_p) # [b, d, t]
|
478 |
+
neg_cent1 = torch.sum(-0.5 * math.log(2 * math.pi) - logs_p, [1], keepdim=True) # [b, 1, t_s]
|
479 |
+
neg_cent2 = torch.matmul(-0.5 * (z_p ** 2).transpose(1, 2),
|
480 |
+
s_p_sq_r) # [b, t_t, d] x [b, d, t_s] = [b, t_t, t_s]
|
481 |
+
neg_cent3 = torch.matmul(z_p.transpose(1, 2), (m_p * s_p_sq_r)) # [b, t_t, d] x [b, d, t_s] = [b, t_t, t_s]
|
482 |
+
neg_cent4 = torch.sum(-0.5 * (m_p ** 2) * s_p_sq_r, [1], keepdim=True) # [b, 1, t_s]
|
483 |
+
neg_cent = neg_cent1 + neg_cent2 + neg_cent3 + neg_cent4
|
484 |
+
|
485 |
+
attn_mask = torch.unsqueeze(x_mask, 2) * torch.unsqueeze(y_mask, -1)
|
486 |
+
attn = monotonic_align.maximum_path(neg_cent, attn_mask.squeeze(1)).unsqueeze(1).detach()
|
487 |
+
|
488 |
+
w = attn.sum(2)
|
489 |
+
if self.use_sdp:
|
490 |
+
l_length = self.dp(x, x_mask, w, g=g)
|
491 |
+
l_length = l_length / torch.sum(x_mask)
|
492 |
+
else:
|
493 |
+
logw_ = torch.log(w + 1e-6) * x_mask
|
494 |
+
logw = self.dp(x, x_mask, g=g)
|
495 |
+
l_length = torch.sum((logw - logw_) ** 2, [1, 2]) / torch.sum(x_mask) # for averaging
|
496 |
+
|
497 |
+
# expand prior
|
498 |
+
m_p = torch.matmul(attn.squeeze(1), m_p.transpose(1, 2)).transpose(1, 2)
|
499 |
+
logs_p = torch.matmul(attn.squeeze(1), logs_p.transpose(1, 2)).transpose(1, 2)
|
500 |
+
|
501 |
+
z_slice, ids_slice = commons.rand_slice_segments(z, y_lengths, self.segment_size)
|
502 |
+
o = self.dec(z_slice, g=g)
|
503 |
+
return o, l_length, attn, ids_slice, x_mask, y_mask, (z, z_p, m_p, logs_p, m_q, logs_q)
|
504 |
+
|
505 |
+
def infer(self, x, x_lengths, sid=None, noise_scale=1, length_scale=1, noise_scale_w=1., max_len=None):
|
506 |
+
x, m_p, logs_p, x_mask = self.enc_p(x, x_lengths)
|
507 |
+
if self.n_speakers > 1:
|
508 |
+
g = self.emb_g(sid).unsqueeze(-1) # [b, h, 1]
|
509 |
+
else:
|
510 |
+
g = None
|
511 |
+
|
512 |
+
if self.use_sdp:
|
513 |
+
logw = self.dp(x, x_mask, g=g, reverse=True, noise_scale=noise_scale_w)
|
514 |
+
else:
|
515 |
+
logw = self.dp(x, x_mask, g=g)
|
516 |
+
w = torch.exp(logw) * x_mask * length_scale
|
517 |
+
w_ceil = torch.ceil(w)
|
518 |
+
y_lengths = torch.clamp_min(torch.sum(w_ceil, [1, 2]), 1).long()
|
519 |
+
y_mask = torch.unsqueeze(commons.sequence_mask(y_lengths, None), 1).to(x_mask.dtype)
|
520 |
+
attn_mask = torch.unsqueeze(x_mask, 2) * torch.unsqueeze(y_mask, -1)
|
521 |
+
attn = commons.generate_path(w_ceil, attn_mask)
|
522 |
+
|
523 |
+
m_p = torch.matmul(attn.squeeze(1), m_p.transpose(1, 2)).transpose(1, 2) # [b, t', t], [b, t, d] -> [b, d, t']
|
524 |
+
logs_p = torch.matmul(attn.squeeze(1), logs_p.transpose(1, 2)).transpose(1,
|
525 |
+
2) # [b, t', t], [b, t, d] -> [b, d, t']
|
526 |
+
|
527 |
+
z_p = m_p + torch.randn_like(m_p) * torch.exp(logs_p) * noise_scale
|
528 |
+
z = self.flow(z_p, y_mask, g=g, reverse=True)
|
529 |
+
o = self.dec((z * y_mask)[:, :, :max_len], g=g)
|
530 |
+
return o, attn, y_mask, (z, z_p, m_p, logs_p)
|
531 |
+
|
532 |
+
def voice_conversion(self, y, y_lengths, sid_src, sid_tgt):
|
533 |
+
assert self.n_speakers > 1, "n_speakers have to be larger than 1."
|
534 |
+
g_src = self.emb_g(sid_src).unsqueeze(-1)
|
535 |
+
g_tgt = self.emb_g(sid_tgt).unsqueeze(-1)
|
536 |
+
z, m_q, logs_q, y_mask = self.enc_q(y, y_lengths, g=g_src)
|
537 |
+
z_p = self.flow(z, y_mask, g=g_src)
|
538 |
+
z_hat = self.flow(z_p, y_mask, g=g_tgt, reverse=True)
|
539 |
+
o_hat = self.dec(z_hat * y_mask, g=g_tgt)
|
540 |
+
return o_hat, y_mask, (z, z_p, z_hat)
|
saved_model/10/config.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:06b3c77565155ac550a3264e24f5c59627c6f8e4f9953a5f2423f6d375823e52
|
3 |
+
size 1228
|
saved_model/10/cover.jpg
ADDED
Git LFS Details
|
saved_model/10/model.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2d7d3dc42ad38c3479b41c1060c442ba33018069be637e664fefafb4bb4ad764
|
3 |
+
size 220972879
|
saved_model/11/config.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c2aa2128f54f61bf1b01951f7d2e0e2d5a835a9750a4a9ef8b4854ac25324823
|
3 |
+
size 1187
|
saved_model/11/cover.jpg
ADDED
Git LFS Details
|
saved_model/11/model.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:56d55e4672c5f335ebae30728529e5efb8a9c3975a9b63e6590454ef8769ae70
|
3 |
+
size 203264375
|
saved_model/8/config.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4304293bb85d90daa3b5fa2dc3a35ce0842f0282f54298df68103932fee0e9f2
|
3 |
+
size 1873
|
saved_model/8/cover.jpg
ADDED
Git LFS Details
|
saved_model/8/model.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:70ba2e6a192f836d58dcb4e36b8d5dde2e2a06c88d03dda107c07b9aa35ee4db
|
3 |
+
size 158902605
|
saved_model/9/config.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2040ad22b30868bb031f4d2e2af91fdcfe057753f68e8cb135be5459374cba73
|
3 |
+
size 816
|
saved_model/9/cover.jpg
ADDED
Git LFS Details
|
saved_model/9/model.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:20b38cc55191ec02c2809e80d758ff0d56bd44760841704feb9921aa58a4d9de
|
3 |
+
size 203264375
|
saved_model/info.json
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:33b7a2042589990eb609c4e87044b7d5d6d80da206c88f54b70175ce0d2a535c
|
3 |
+
size 1616
|