fffiloni commited on
Commit
b2107ed
1 Parent(s): 97fe401

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -6
app.py CHANGED
@@ -9,6 +9,8 @@ LICENSE file in the root directory of this source tree.
9
  from tempfile import NamedTemporaryFile
10
  import torch
11
  import gradio as gr
 
 
12
  from audiocraft.models import MusicGen
13
 
14
  from audiocraft.data.audio import audio_write
@@ -16,16 +18,20 @@ from audiocraft.data.audio import audio_write
16
 
17
  MODEL = None
18
 
19
- img_to_text = gr.load(name="spaces/fffiloni/CLIP-Interrogator-2")
20
-
21
-
 
 
 
 
22
  def load_model(version):
23
  print("Loading model", version)
24
  return MusicGen.get_pretrained(version)
25
 
26
 
27
  def predict(uploaded_image, melody, duration):
28
- text = img_to_text(uploaded_image, 'best', 4, fn_index=1)[0]
29
  global MODEL
30
  topk = int(250)
31
  if MODEL is None or MODEL.name != "melody":
@@ -67,9 +73,9 @@ def predict(uploaded_image, melody, duration):
67
  with gr.Blocks() as demo:
68
  gr.Markdown(
69
  """
70
- # Image to MusicGen
71
 
72
- This is the demo by @fffiloni for Image to [MusicGen](https://github.com/facebookresearch/audiocraft), a simple and controllable model for music generation
73
  presented at: ["Simple and Controllable Music Generation"](https://huggingface.co/papers/2306.05284), using Clip Interrogator to get an image description as init text.
74
  <br/>
75
  <a href="https://huggingface.co/spaces/musicgen/MusicGen?duplicate=true" style="display: inline-block;margin-top: .5em;margin-right: .25em;" target="_blank">
@@ -79,6 +85,13 @@ with gr.Blocks() as demo:
79
  )
80
  with gr.Row():
81
  with gr.Column():
 
 
 
 
 
 
 
82
  with gr.Row():
83
  uploaded_image = gr.Image(label="Input Image", interactive=True, source="upload", type="filepath")
84
  melody = gr.Audio(source="upload", type="numpy", label="Melody Condition (optional)", interactive=True)
@@ -95,6 +108,7 @@ with gr.Blocks() as demo:
95
  # cfg_coef = gr.Number(label="Classifier Free Guidance", value=3.0, interactive=True)
96
  with gr.Column():
97
  output = gr.Audio(label="Generated Music")
 
98
  submit.click(predict, inputs=[uploaded_image, melody, duration], outputs=[output])
99
 
100
  gr.Markdown(
 
9
  from tempfile import NamedTemporaryFile
10
  import torch
11
  import gradio as gr
12
+ from scipy.io.wavfile import write
13
+
14
  from audiocraft.models import MusicGen
15
 
16
  from audiocraft.data.audio import audio_write
 
18
 
19
  MODEL = None
20
 
21
+ def split_process(audio):
22
+ os.makedirs("out", exist_ok=True)
23
+ write('test.wav', audio[0], audio[1])
24
+ os.system("python3 -m demucs.separate -n mdx_extra_q -d cpu test.wav -o out")
25
+ return "./out/mdx_extra_q/test/vocals.wav","./out/mdx_extra_q/test/bass.wav",\
26
+ "./out/mdx_extra_q/test/drums.wav","./out/mdx_extra_q/test/other.wav"
27
+
28
  def load_model(version):
29
  print("Loading model", version)
30
  return MusicGen.get_pretrained(version)
31
 
32
 
33
  def predict(uploaded_image, melody, duration):
34
+ text = remplacer
35
  global MODEL
36
  topk = int(250)
37
  if MODEL is None or MODEL.name != "melody":
 
73
  with gr.Blocks() as demo:
74
  gr.Markdown(
75
  """
76
+ # Split to MusicGen
77
 
78
+ This is the demo by @fffiloni for Split to [MusicGen](https://github.com/facebookresearch/audiocraft), a simple and controllable model for music generation
79
  presented at: ["Simple and Controllable Music Generation"](https://huggingface.co/papers/2306.05284), using Clip Interrogator to get an image description as init text.
80
  <br/>
81
  <a href="https://huggingface.co/spaces/musicgen/MusicGen?duplicate=true" style="display: inline-block;margin-top: .5em;margin-right: .25em;" target="_blank">
 
85
  )
86
  with gr.Row():
87
  with gr.Column():
88
+ with gr.Row():
89
+ uploaded_sound = gr.Audio(type="numpy", label="Input", source="microphone")
90
+ load_sound_btn = gr.Button('Load sound')
91
+ split_vocals = gr.Audio(type="filepath", label="Vocals")
92
+ #split_bass = gr.Audio(type="filepath", label="Bass")
93
+ #split_drums = gr.Audio(type="filepath", label="Drums")
94
+ #split_others = gr.Audio(type="filepath", label="Other")
95
  with gr.Row():
96
  uploaded_image = gr.Image(label="Input Image", interactive=True, source="upload", type="filepath")
97
  melody = gr.Audio(source="upload", type="numpy", label="Melody Condition (optional)", interactive=True)
 
108
  # cfg_coef = gr.Number(label="Classifier Free Guidance", value=3.0, interactive=True)
109
  with gr.Column():
110
  output = gr.Audio(label="Generated Music")
111
+ load_sound_btn.click(split_process, inputs=[uploaded_sound], outputs=[split_vocals])
112
  submit.click(predict, inputs=[uploaded_image, melody, duration], outputs=[output])
113
 
114
  gr.Markdown(