Spaces:
Build error
Build error
update demo
Browse files
README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
---
|
2 |
title: Musika
|
3 |
-
emoji:
|
4 |
colorFrom: purple
|
5 |
colorTo: blue
|
6 |
sdk: gradio
|
|
|
1 |
---
|
2 |
title: Musika
|
3 |
+
emoji: 🎵
|
4 |
colorFrom: purple
|
5 |
colorTo: blue
|
6 |
sdk: gradio
|
utils.py
CHANGED
@@ -34,11 +34,7 @@ class Utils_functions:
|
|
34 |
)
|
35 |
mel_f = tf.convert_to_tensor(librosa.mel_frequencies(n_mels=args.mel_bins + 2, fmin=0.0, fmax=args.sr // 2))
|
36 |
enorm = tf.cast(
|
37 |
-
tf.expand_dims(
|
38 |
-
tf.constant(2.0 / (mel_f[2 : args.mel_bins + 2] - mel_f[: args.mel_bins])),
|
39 |
-
0,
|
40 |
-
),
|
41 |
-
tf.float32,
|
42 |
)
|
43 |
melmat = tf.multiply(melmat, enorm)
|
44 |
melmat = tf.divide(melmat, tf.reduce_sum(melmat, axis=0))
|
@@ -165,8 +161,9 @@ class Utils_functions:
|
|
165 |
outls.append(model(x[i * bs : i * bs + bs], training=False))
|
166 |
|
167 |
if dual_out:
|
168 |
-
return
|
169 |
-
[outls[k][
|
|
|
170 |
)
|
171 |
else:
|
172 |
return np.concatenate(outls, 0)
|
@@ -199,8 +196,9 @@ class Utils_functions:
|
|
199 |
inp = tf.concat(inpls, 0)
|
200 |
res = model(inp, training=False)
|
201 |
outls.append(res)
|
202 |
-
return
|
203 |
-
[outls[k][
|
|
|
204 |
)
|
205 |
|
206 |
def distribute_dec2(self, x, model, bs=64):
|
@@ -228,17 +226,7 @@ class Utils_functions:
|
|
228 |
return tf.image.random_crop(noisetot, [1, self.args.latlen, 64 + 64])
|
229 |
|
230 |
def generate_example_stereo(self, models_ls):
|
231 |
-
(
|
232 |
-
critic,
|
233 |
-
gen,
|
234 |
-
enc,
|
235 |
-
dec,
|
236 |
-
enc2,
|
237 |
-
dec2,
|
238 |
-
critic_rec,
|
239 |
-
gen_ema,
|
240 |
-
[opt_dec, opt_disc],
|
241 |
-
) = models_ls
|
242 |
abb = gen_ema(self.get_noise_interp(), training=False)
|
243 |
abbls = tf.split(abb, abb.shape[-2] // 16, -2)
|
244 |
abb = tf.concat(abbls, 0)
|
@@ -247,13 +235,7 @@ class Utils_functions:
|
|
247 |
for channel in range(2):
|
248 |
|
249 |
ab = self.distribute_dec2(
|
250 |
-
abb[
|
251 |
-
:,
|
252 |
-
:,
|
253 |
-
:,
|
254 |
-
channel * self.args.latdepth : channel * self.args.latdepth + self.args.latdepth,
|
255 |
-
],
|
256 |
-
dec2,
|
257 |
)
|
258 |
abls = tf.split(ab, ab.shape[-2] // self.args.shape, -2)
|
259 |
ab = tf.concat(abls, 0)
|
@@ -291,28 +273,14 @@ class Utils_functions:
|
|
291 |
|
292 |
fig, axs = plt.subplots(nrows=4, ncols=1, figsize=(20, 20))
|
293 |
axs[0].imshow(
|
294 |
-
np.flip(
|
295 |
-
np.array(
|
296 |
-
tf.transpose(
|
297 |
-
self.wv2spec_hop((abwv[:, 0] + abwv[:, 1]) / 2.0, 80.0, 256),
|
298 |
-
[1, 0],
|
299 |
-
)
|
300 |
-
),
|
301 |
-
-2,
|
302 |
-
),
|
303 |
cmap=None,
|
304 |
)
|
305 |
axs[0].axis("off")
|
306 |
axs[0].set_title("Generated1")
|
307 |
axs[1].imshow(
|
308 |
np.flip(
|
309 |
-
np.array(
|
310 |
-
tf.transpose(
|
311 |
-
self.wv2spec_hop((abwv2[:, 0] + abwv2[:, 1]) / 2.0, 80.0, 256),
|
312 |
-
[1, 0],
|
313 |
-
)
|
314 |
-
),
|
315 |
-
-2,
|
316 |
),
|
317 |
cmap=None,
|
318 |
)
|
@@ -320,13 +288,7 @@ class Utils_functions:
|
|
320 |
axs[1].set_title("Generated2")
|
321 |
axs[2].imshow(
|
322 |
np.flip(
|
323 |
-
np.array(
|
324 |
-
tf.transpose(
|
325 |
-
self.wv2spec_hop((abwv3[:, 0] + abwv3[:, 1]) / 2.0, 80.0, 256),
|
326 |
-
[1, 0],
|
327 |
-
)
|
328 |
-
),
|
329 |
-
-2,
|
330 |
),
|
331 |
cmap=None,
|
332 |
)
|
@@ -334,13 +296,7 @@ class Utils_functions:
|
|
334 |
axs[2].set_title("Generated3")
|
335 |
axs[3].imshow(
|
336 |
np.flip(
|
337 |
-
np.array(
|
338 |
-
tf.transpose(
|
339 |
-
self.wv2spec_hop((abwv4[:, 0] + abwv4[:, 1]) / 2.0, 80.0, 256),
|
340 |
-
[1, 0],
|
341 |
-
)
|
342 |
-
),
|
343 |
-
-2,
|
344 |
),
|
345 |
cmap=None,
|
346 |
)
|
@@ -351,26 +307,9 @@ class Utils_functions:
|
|
351 |
|
352 |
# Save in training loop
|
353 |
def save_end(
|
354 |
-
self,
|
355 |
-
epoch,
|
356 |
-
gloss,
|
357 |
-
closs,
|
358 |
-
mloss,
|
359 |
-
models_ls=None,
|
360 |
-
n_save=3,
|
361 |
-
save_path="checkpoints",
|
362 |
):
|
363 |
-
(
|
364 |
-
critic,
|
365 |
-
gen,
|
366 |
-
enc,
|
367 |
-
dec,
|
368 |
-
enc2,
|
369 |
-
dec2,
|
370 |
-
critic_rec,
|
371 |
-
gen_ema,
|
372 |
-
[opt_dec, opt_disc],
|
373 |
-
) = models_ls
|
374 |
if epoch % n_save == 0:
|
375 |
print("Saving...")
|
376 |
path = f"{save_path}/MUSIKA!_-{str(gloss)[:9]}-{str(closs)[:9]}-{str(mloss)[:9]}"
|
@@ -502,7 +441,7 @@ class Utils_functions:
|
|
502 |
)
|
503 |
|
504 |
def render_gradio(self, models_ls_techno, models_ls_classical, train=True):
|
505 |
-
article_text = "Original work by Marco Pasini ([Twitter](https://twitter.com/marco_ppasini)) at Johannes Kepler Universität Linz.
|
506 |
|
507 |
def gradio_func(x, y, z):
|
508 |
return self.stfunc(x, y, z, models_ls_techno, models_ls_classical)
|
@@ -514,13 +453,10 @@ class Utils_functions:
|
|
514 |
choices=["Techno/Experimental", "Classical"],
|
515 |
type="index",
|
516 |
default="Classical",
|
517 |
-
label="Music Genre to Generate
|
518 |
),
|
519 |
gr.inputs.Radio(
|
520 |
-
choices=["
|
521 |
-
type="index",
|
522 |
-
default="115 s",
|
523 |
-
label="Generated Music Length",
|
524 |
),
|
525 |
gr.inputs.Slider(
|
526 |
minimum=0,
|
@@ -536,7 +472,7 @@ class Utils_functions:
|
|
536 |
],
|
537 |
allow_screenshot=False,
|
538 |
title="musika!",
|
539 |
-
description="Blazingly Fast Stereo Waveform Music Generation of Arbitrary Length",
|
540 |
article=article_text,
|
541 |
layout="vertical",
|
542 |
theme="huggingface",
|
@@ -551,7 +487,7 @@ class Utils_functions:
|
|
551 |
if train:
|
552 |
iface.launch(prevent_thread_lock=True)
|
553 |
else:
|
554 |
-
iface.launch()
|
555 |
# iface.launch(share=True, enable_queue=True)
|
556 |
print("--------------------------------")
|
557 |
print("--------------------------------")
|
|
|
34 |
)
|
35 |
mel_f = tf.convert_to_tensor(librosa.mel_frequencies(n_mels=args.mel_bins + 2, fmin=0.0, fmax=args.sr // 2))
|
36 |
enorm = tf.cast(
|
37 |
+
tf.expand_dims(tf.constant(2.0 / (mel_f[2 : args.mel_bins + 2] - mel_f[: args.mel_bins])), 0,), tf.float32,
|
|
|
|
|
|
|
|
|
38 |
)
|
39 |
melmat = tf.multiply(melmat, enorm)
|
40 |
melmat = tf.divide(melmat, tf.reduce_sum(melmat, axis=0))
|
|
|
161 |
outls.append(model(x[i * bs : i * bs + bs], training=False))
|
162 |
|
163 |
if dual_out:
|
164 |
+
return (
|
165 |
+
np.concatenate([outls[k][0] for k in range(len(outls))], 0),
|
166 |
+
np.concatenate([outls[k][1] for k in range(len(outls))], 0),
|
167 |
)
|
168 |
else:
|
169 |
return np.concatenate(outls, 0)
|
|
|
196 |
inp = tf.concat(inpls, 0)
|
197 |
res = model(inp, training=False)
|
198 |
outls.append(res)
|
199 |
+
return (
|
200 |
+
np.concatenate([outls[k][0] for k in range(len(outls))], 0),
|
201 |
+
np.concatenate([outls[k][1] for k in range(len(outls))], 0),
|
202 |
)
|
203 |
|
204 |
def distribute_dec2(self, x, model, bs=64):
|
|
|
226 |
return tf.image.random_crop(noisetot, [1, self.args.latlen, 64 + 64])
|
227 |
|
228 |
def generate_example_stereo(self, models_ls):
|
229 |
+
(critic, gen, enc, dec, enc2, dec2, critic_rec, gen_ema, [opt_dec, opt_disc],) = models_ls
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
230 |
abb = gen_ema(self.get_noise_interp(), training=False)
|
231 |
abbls = tf.split(abb, abb.shape[-2] // 16, -2)
|
232 |
abb = tf.concat(abbls, 0)
|
|
|
235 |
for channel in range(2):
|
236 |
|
237 |
ab = self.distribute_dec2(
|
238 |
+
abb[:, :, :, channel * self.args.latdepth : channel * self.args.latdepth + self.args.latdepth,], dec2,
|
|
|
|
|
|
|
|
|
|
|
|
|
239 |
)
|
240 |
abls = tf.split(ab, ab.shape[-2] // self.args.shape, -2)
|
241 |
ab = tf.concat(abls, 0)
|
|
|
273 |
|
274 |
fig, axs = plt.subplots(nrows=4, ncols=1, figsize=(20, 20))
|
275 |
axs[0].imshow(
|
276 |
+
np.flip(np.array(tf.transpose(self.wv2spec_hop((abwv[:, 0] + abwv[:, 1]) / 2.0, 80.0, 256), [1, 0],)), -2,),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
277 |
cmap=None,
|
278 |
)
|
279 |
axs[0].axis("off")
|
280 |
axs[0].set_title("Generated1")
|
281 |
axs[1].imshow(
|
282 |
np.flip(
|
283 |
+
np.array(tf.transpose(self.wv2spec_hop((abwv2[:, 0] + abwv2[:, 1]) / 2.0, 80.0, 256), [1, 0],)), -2,
|
|
|
|
|
|
|
|
|
|
|
|
|
284 |
),
|
285 |
cmap=None,
|
286 |
)
|
|
|
288 |
axs[1].set_title("Generated2")
|
289 |
axs[2].imshow(
|
290 |
np.flip(
|
291 |
+
np.array(tf.transpose(self.wv2spec_hop((abwv3[:, 0] + abwv3[:, 1]) / 2.0, 80.0, 256), [1, 0],)), -2,
|
|
|
|
|
|
|
|
|
|
|
|
|
292 |
),
|
293 |
cmap=None,
|
294 |
)
|
|
|
296 |
axs[2].set_title("Generated3")
|
297 |
axs[3].imshow(
|
298 |
np.flip(
|
299 |
+
np.array(tf.transpose(self.wv2spec_hop((abwv4[:, 0] + abwv4[:, 1]) / 2.0, 80.0, 256), [1, 0],)), -2,
|
|
|
|
|
|
|
|
|
|
|
|
|
300 |
),
|
301 |
cmap=None,
|
302 |
)
|
|
|
307 |
|
308 |
# Save in training loop
|
309 |
def save_end(
|
310 |
+
self, epoch, gloss, closs, mloss, models_ls=None, n_save=3, save_path="checkpoints",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
311 |
):
|
312 |
+
(critic, gen, enc, dec, enc2, dec2, critic_rec, gen_ema, [opt_dec, opt_disc],) = models_ls
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
313 |
if epoch % n_save == 0:
|
314 |
print("Saving...")
|
315 |
path = f"{save_path}/MUSIKA!_-{str(gloss)[:9]}-{str(closs)[:9]}-{str(mloss)[:9]}"
|
|
|
441 |
)
|
442 |
|
443 |
def render_gradio(self, models_ls_techno, models_ls_classical, train=True):
|
444 |
+
article_text = "Original work by Marco Pasini ([Twitter](https://twitter.com/marco_ppasini)) and Jan Schlüter at Johannes Kepler Universität Linz."
|
445 |
|
446 |
def gradio_func(x, y, z):
|
447 |
return self.stfunc(x, y, z, models_ls_techno, models_ls_classical)
|
|
|
453 |
choices=["Techno/Experimental", "Classical"],
|
454 |
type="index",
|
455 |
default="Classical",
|
456 |
+
label="Music Genre to Generate",
|
457 |
),
|
458 |
gr.inputs.Radio(
|
459 |
+
choices=["23s", "1m 58s", "3m 57s"], type="index", default="1m 58s", label="Generated Music Length",
|
|
|
|
|
|
|
460 |
),
|
461 |
gr.inputs.Slider(
|
462 |
minimum=0,
|
|
|
472 |
],
|
473 |
allow_screenshot=False,
|
474 |
title="musika!",
|
475 |
+
description="Blazingly Fast Stereo Waveform Music Generation of Arbitrary Length. Be patient and enjoy the weirdness!",
|
476 |
article=article_text,
|
477 |
layout="vertical",
|
478 |
theme="huggingface",
|
|
|
487 |
if train:
|
488 |
iface.launch(prevent_thread_lock=True)
|
489 |
else:
|
490 |
+
iface.launch(enable_queue=True)
|
491 |
# iface.launch(share=True, enable_queue=True)
|
492 |
print("--------------------------------")
|
493 |
print("--------------------------------")
|