fffiloni commited on
Commit
3e7eefe
β€’
1 Parent(s): a169bce

added video visualizer export

Browse files
Files changed (1) hide show
  1. app.py +63 -19
app.py CHANGED
@@ -8,6 +8,7 @@ from gradio_client import Client
8
 
9
  client = Client("https://fffiloni-test-llama-api.hf.space/", hf_token=hf_token)
10
  lyrics_client = Client("https://fffiloni-music-to-lyrics.hf.space/")
 
11
 
12
  from share_btn import community_icon_html, loading_icon_html, share_js
13
 
@@ -60,23 +61,31 @@ def get_text_after_colon(input_text):
60
 
61
 
62
  def solo_xd(prompt):
63
- # β€”β€”β€”
64
- print("""β€”β€”β€”
65
- Calling SD-XL for another image...
66
- """)
67
- prompt = prompt
68
- conditioning, pooled = compel(prompt)
69
- images = pipe(prompt_embeds=conditioning, pooled_prompt_embeds=pooled).images[0]
70
-
71
- print("Finished")
72
  return images
73
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
  def infer(audio_file, has_lyrics):
75
  print("NEW INFERENCE ...")
76
-
77
  truncated_audio = cut_audio(audio_file, "trunc_audio.mp3")
 
78
 
79
  print("Calling LP Music Caps...")
 
80
  cap_result = lpmc_client(
81
  truncated_audio, # str (filepath or URL to file) in 'audio_path' Audio component
82
  api_name="predict"
@@ -87,6 +96,7 @@ def infer(audio_file, has_lyrics):
87
  print("""β€”β€”β€”
88
  Getting Lyrics ...
89
  """)
 
90
  lyrics_result = lyrics_client.predict(
91
  audio_file, # str (filepath or URL to file) in 'Song input' Audio component
92
  fn_index=0
@@ -123,6 +133,7 @@ def infer(audio_file, has_lyrics):
123
  print("""β€”β€”β€”
124
  Calling Llama2 ...
125
  """)
 
126
  result = client.predict(
127
  llama_q, # str in 'Message' Textbox component
128
  api_name="/predict"
@@ -132,18 +143,32 @@ def infer(audio_file, has_lyrics):
132
 
133
  print(f"Llama2 result: {result}")
134
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
  # β€”β€”β€”
136
  print("""β€”β€”β€”
137
  Calling SD-XL ...
138
  """)
139
- prompt = result
 
140
  conditioning, pooled = compel(prompt)
141
  images = pipe(prompt_embeds=conditioning, pooled_prompt_embeds=pooled).images[0]
142
 
143
  print("Finished")
144
 
145
  #return cap_result, result, images
146
- return images, result, gr.update(visible=True), gr.Group.update(visible=True)
147
 
148
  css = """
149
  #col-container {max-width: 780px; margin-left: auto; margin-right: auto;}
@@ -245,25 +270,43 @@ with gr.Blocks(css=css) as demo:
245
  </div>""")
246
 
247
  audio_input = gr.Audio(label="Music input", type="filepath", source="upload")
 
248
  with gr.Row():
249
  has_lyrics = gr.Radio(label="Does your audio has lyrics ?", choices=["Yes", "No"], value="No", info="If yes, the image should reflect the lyrics, but be aware that because we add a step (getting lyrics), inference will take more time.")
250
- song_title = gr.Textbox(label="Song Title", value="Title: ", interactive=True, info="If you want to share your result, please provide the title of your audio sample :)", elem_id="song-title")
 
251
  infer_btn = gr.Button("Generate Image from Music")
252
  #lpmc_cap = gr.Textbox(label="Lp Music Caps caption")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
253
  with gr.Row():
254
- llama_trans_cap = gr.Textbox(label="Llama Image Suggestion", placeholder="Llama2 image prompt suggestion will be displayed here ;)", visible=True, lines=12, elem_id="llama-prompt")
255
- img_result = gr.Image(label="Image Result", elem_id="image-out")
256
- with gr.Row():
257
  tryagain_btn = gr.Button("Try another image ?", visible=False)
 
258
  with gr.Group(elem_id="share-btn-container", visible=False) as share_group:
259
  community_icon = gr.HTML(community_icon_html)
260
  loading_icon = gr.HTML(loading_icon_html)
261
  share_button = gr.Button("Share to community", elem_id="share-btn")
262
-
263
  gr.Examples(examples=[["./examples/electronic.mp3", "No"],["./examples/folk.wav", "No"], ["./examples/orchestra.wav", "No"]],
264
  fn=infer,
265
  inputs=[audio_input, has_lyrics],
266
- outputs=[img_result, llama_trans_cap, tryagain_btn, share_group],
267
  cache_examples=True
268
  )
269
 
@@ -286,8 +329,9 @@ with gr.Blocks(css=css) as demo:
286
  """)
287
 
288
  #infer_btn.click(fn=infer, inputs=[audio_input], outputs=[lpmc_cap, llama_trans_cap, img_result])
289
- infer_btn.click(fn=infer, inputs=[audio_input, has_lyrics], outputs=[img_result, llama_trans_cap, tryagain_btn, share_group])
290
  share_button.click(None, [], [], _js=share_js)
291
  tryagain_btn.click(fn=solo_xd, inputs=[llama_trans_cap], outputs=[img_result])
 
292
 
293
  demo.queue(max_size=20).launch()
 
8
 
9
  client = Client("https://fffiloni-test-llama-api.hf.space/", hf_token=hf_token)
10
  lyrics_client = Client("https://fffiloni-music-to-lyrics.hf.space/")
11
+ visualizer_client = Client("https://fffiloni-animated-audio-visualizer.hf.space/")
12
 
13
  from share_btn import community_icon_html, loading_icon_html, share_js
14
 
 
61
 
62
 
63
  def solo_xd(prompt):
64
+ images = pipe(prompt=prompt).images[0]
 
 
 
 
 
 
 
 
65
  return images
66
 
67
+ def get_visualizer_video(audio_in, image_in, song_title):
68
+
69
+ title = f"""{song_title.upper()}\nMusic-to-Image demo by @fffiloni | HuggingFace
70
+ """
71
+
72
+ visualizer_video = visualizer_client.predict(
73
+ title, # str in 'title' Textbox component
74
+ audio_in, # str (filepath or URL to file) in 'audio_in' Audio component
75
+ image_in, # str (filepath or URL to image) in 'image_in' Image component
76
+ api_name="/predict"
77
+ )
78
+
79
+ return visualizer_video[0]
80
+
81
  def infer(audio_file, has_lyrics):
82
  print("NEW INFERENCE ...")
83
+ gr.Info('Truncating your audio to the first 30 seconds')
84
  truncated_audio = cut_audio(audio_file, "trunc_audio.mp3")
85
+ processed_audio = truncated_audio
86
 
87
  print("Calling LP Music Caps...")
88
+ gr.Info('Calling LP Music Caps...')
89
  cap_result = lpmc_client(
90
  truncated_audio, # str (filepath or URL to file) in 'audio_path' Audio component
91
  api_name="predict"
 
96
  print("""β€”β€”β€”
97
  Getting Lyrics ...
98
  """)
99
+ gr.Info("Getting Lyrics ...")
100
  lyrics_result = lyrics_client.predict(
101
  audio_file, # str (filepath or URL to file) in 'Song input' Audio component
102
  fn_index=0
 
133
  print("""β€”β€”β€”
134
  Calling Llama2 ...
135
  """)
136
+ gr.Info("Calling Llama2 ...")
137
  result = client.predict(
138
  llama_q, # str in 'Message' Textbox component
139
  api_name="/predict"
 
143
 
144
  print(f"Llama2 result: {result}")
145
 
146
+ gr.Info("Prompt Optimization ...")
147
+ get_shorter_prompt = f"""
148
+ From this image description, please provide a short but efficient summary for a good Stable Diffusion prompt:
149
+ '{result}'
150
+ """
151
+
152
+ shorten = client.predict(
153
+ get_shorter_prompt, # str in 'Message' Textbox component
154
+ api_name="/predict"
155
+ )
156
+
157
+ print(f'SHORTEN PROMPT: {shorten}')
158
+
159
  # β€”β€”β€”
160
  print("""β€”β€”β€”
161
  Calling SD-XL ...
162
  """)
163
+ gr.Info('Calling SD-XL ...')
164
+ prompt = shorten
165
  conditioning, pooled = compel(prompt)
166
  images = pipe(prompt_embeds=conditioning, pooled_prompt_embeds=pooled).images[0]
167
 
168
  print("Finished")
169
 
170
  #return cap_result, result, images
171
+ return processed_audio, images, result, gr.update(visible=True), gr.Group.update(visible=True)
172
 
173
  css = """
174
  #col-container {max-width: 780px; margin-left: auto; margin-right: auto;}
 
270
  </div>""")
271
 
272
  audio_input = gr.Audio(label="Music input", type="filepath", source="upload")
273
+
274
  with gr.Row():
275
  has_lyrics = gr.Radio(label="Does your audio has lyrics ?", choices=["Yes", "No"], value="No", info="If yes, the image should reflect the lyrics, but be aware that because we add a step (getting lyrics), inference will take more time.")
276
+ song_title = gr.Textbox(label="Song Title", placeholder="Title: ", interactive=True, info="If you want to share your result, please provide the title of your audio sample :)", elem_id="song-title")
277
+
278
  infer_btn = gr.Button("Generate Image from Music")
279
  #lpmc_cap = gr.Textbox(label="Lp Music Caps caption")
280
+
281
+ with gr.Group():
282
+
283
+ with gr.Row():
284
+
285
+ llama_trans_cap = gr.Textbox(label="Llama Image Suggestion", placeholder="Llama2 image prompt suggestion will be displayed here ;)", visible=True, lines=12, max_lines=18, elem_id="llama-prompt")
286
+
287
+ with gr.Tab("Image Result"):
288
+ img_result = gr.Image(label="Image Result", elem_id="image-out", interactive=False, type="filepath")
289
+
290
+ with gr.Tab("Video visualizer"):
291
+
292
+ with gr.Column():
293
+ processed_audio = gr.Audio(type="filepath", visible=False)
294
+ visualizer_video = gr.Video(label="Video visualizer output")
295
+ get_visualizer_vid = gr.Button("Export as video !")
296
+
297
  with gr.Row():
298
+
 
 
299
  tryagain_btn = gr.Button("Try another image ?", visible=False)
300
+
301
  with gr.Group(elem_id="share-btn-container", visible=False) as share_group:
302
  community_icon = gr.HTML(community_icon_html)
303
  loading_icon = gr.HTML(loading_icon_html)
304
  share_button = gr.Button("Share to community", elem_id="share-btn")
305
+
306
  gr.Examples(examples=[["./examples/electronic.mp3", "No"],["./examples/folk.wav", "No"], ["./examples/orchestra.wav", "No"]],
307
  fn=infer,
308
  inputs=[audio_input, has_lyrics],
309
+ outputs=[processed_audio, img_result, llama_trans_cap, tryagain_btn, share_group],
310
  cache_examples=True
311
  )
312
 
 
329
  """)
330
 
331
  #infer_btn.click(fn=infer, inputs=[audio_input], outputs=[lpmc_cap, llama_trans_cap, img_result])
332
+ infer_btn.click(fn=infer, inputs=[audio_input, has_lyrics], outputs=[processed_audio, img_result, llama_trans_cap, tryagain_btn, share_group])
333
  share_button.click(None, [], [], _js=share_js)
334
  tryagain_btn.click(fn=solo_xd, inputs=[llama_trans_cap], outputs=[img_result])
335
+ get_visualizer_vid.click(fn=get_visualizer_video, inputs=[processed_audio, img_result, song_title], outputs=[visualizer_video], queue=False)
336
 
337
  demo.queue(max_size=20).launch()