SplitTrack2MusicGen

Paused

App Files Files Community

fffiloni commited on Jun 12, 2023

Commit

b2107ed

•

1 Parent(s): 97fe401

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -6

app.py CHANGED Viewed

@@ -9,6 +9,8 @@ LICENSE file in the root directory of this source tree.
 from tempfile import NamedTemporaryFile
 import torch
 import gradio as gr
 from audiocraft.models import MusicGen
 from audiocraft.data.audio import audio_write
@@ -16,16 +18,20 @@ from audiocraft.data.audio import audio_write
 MODEL = None
-img_to_text = gr.load(name="spaces/fffiloni/CLIP-Interrogator-2")
 def load_model(version):
     print("Loading model", version)
     return MusicGen.get_pretrained(version)
 def predict(uploaded_image, melody, duration):
-    text = img_to_text(uploaded_image, 'best', 4, fn_index=1)[0]
     global MODEL
     topk = int(250)
     if MODEL is None or MODEL.name != "melody":
@@ -67,9 +73,9 @@ def predict(uploaded_image, melody, duration):
 with gr.Blocks() as demo:
     gr.Markdown(
         """
-        # Image to MusicGen
-        This is the demo by @fffiloni for Image to [MusicGen](https://github.com/facebookresearch/audiocraft), a simple and controllable model for music generation
         presented at: ["Simple and Controllable Music Generation"](https://huggingface.co/papers/2306.05284), using Clip Interrogator to get an image description as init text.
         <br/>
         <a href="https://huggingface.co/spaces/musicgen/MusicGen?duplicate=true" style="display: inline-block;margin-top: .5em;margin-right: .25em;" target="_blank">
@@ -79,6 +85,13 @@ with gr.Blocks() as demo:
     )
     with gr.Row():
         with gr.Column():
             with gr.Row():
                 uploaded_image = gr.Image(label="Input Image", interactive=True, source="upload", type="filepath")
                 melody = gr.Audio(source="upload", type="numpy", label="Melody Condition (optional)", interactive=True)
@@ -95,6 +108,7 @@ with gr.Blocks() as demo:
             #   cfg_coef = gr.Number(label="Classifier Free Guidance", value=3.0, interactive=True)
         with gr.Column():
             output = gr.Audio(label="Generated Music")
     submit.click(predict, inputs=[uploaded_image, melody, duration], outputs=[output])
     gr.Markdown(

 from tempfile import NamedTemporaryFile
 import torch
 import gradio as gr
+from scipy.io.wavfile import write
 from audiocraft.models import MusicGen
 from audiocraft.data.audio import audio_write
 MODEL = None
+def split_process(audio):
+  os.makedirs("out", exist_ok=True)
+  write('test.wav', audio[0], audio[1])
+  os.system("python3 -m demucs.separate -n mdx_extra_q -d cpu test.wav -o out")
+  return "./out/mdx_extra_q/test/vocals.wav","./out/mdx_extra_q/test/bass.wav",\
+"./out/mdx_extra_q/test/drums.wav","./out/mdx_extra_q/test/other.wav"
 def load_model(version):
     print("Loading model", version)
     return MusicGen.get_pretrained(version)
 def predict(uploaded_image, melody, duration):
+    text = #à remplacer
     global MODEL
     topk = int(250)
     if MODEL is None or MODEL.name != "melody":
 with gr.Blocks() as demo:
     gr.Markdown(
         """
+        # Split to MusicGen
+        This is the demo by @fffiloni for Split to [MusicGen](https://github.com/facebookresearch/audiocraft), a simple and controllable model for music generation
         presented at: ["Simple and Controllable Music Generation"](https://huggingface.co/papers/2306.05284), using Clip Interrogator to get an image description as init text.
         <br/>
         <a href="https://huggingface.co/spaces/musicgen/MusicGen?duplicate=true" style="display: inline-block;margin-top: .5em;margin-right: .25em;" target="_blank">
     )
     with gr.Row():
         with gr.Column():
+            with gr.Row():
+                uploaded_sound = gr.Audio(type="numpy", label="Input", source="microphone")
+                load_sound_btn = gr.Button('Load sound')
+                split_vocals = gr.Audio(type="filepath", label="Vocals")
+                #split_bass = gr.Audio(type="filepath", label="Bass")
+                #split_drums = gr.Audio(type="filepath", label="Drums")
+                #split_others = gr.Audio(type="filepath", label="Other")
             with gr.Row():
                 uploaded_image = gr.Image(label="Input Image", interactive=True, source="upload", type="filepath")
                 melody = gr.Audio(source="upload", type="numpy", label="Melody Condition (optional)", interactive=True)
             #   cfg_coef = gr.Number(label="Classifier Free Guidance", value=3.0, interactive=True)
         with gr.Column():
             output = gr.Audio(label="Generated Music")
+    load_sound_btn.click(split_process, inputs=[uploaded_sound], outputs=[split_vocals])
     submit.click(predict, inputs=[uploaded_image, melody, duration], outputs=[output])
     gr.Markdown(