Spaces:
Runtime error
Runtime error
added video visualizer export
Browse files
app.py
CHANGED
@@ -8,6 +8,7 @@ from gradio_client import Client
|
|
8 |
|
9 |
client = Client("https://fffiloni-test-llama-api.hf.space/", hf_token=hf_token)
|
10 |
lyrics_client = Client("https://fffiloni-music-to-lyrics.hf.space/")
|
|
|
11 |
|
12 |
from share_btn import community_icon_html, loading_icon_html, share_js
|
13 |
|
@@ -60,23 +61,31 @@ def get_text_after_colon(input_text):
|
|
60 |
|
61 |
|
62 |
def solo_xd(prompt):
|
63 |
-
|
64 |
-
print("""βββ
|
65 |
-
Calling SD-XL for another image...
|
66 |
-
""")
|
67 |
-
prompt = prompt
|
68 |
-
conditioning, pooled = compel(prompt)
|
69 |
-
images = pipe(prompt_embeds=conditioning, pooled_prompt_embeds=pooled).images[0]
|
70 |
-
|
71 |
-
print("Finished")
|
72 |
return images
|
73 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
74 |
def infer(audio_file, has_lyrics):
|
75 |
print("NEW INFERENCE ...")
|
76 |
-
|
77 |
truncated_audio = cut_audio(audio_file, "trunc_audio.mp3")
|
|
|
78 |
|
79 |
print("Calling LP Music Caps...")
|
|
|
80 |
cap_result = lpmc_client(
|
81 |
truncated_audio, # str (filepath or URL to file) in 'audio_path' Audio component
|
82 |
api_name="predict"
|
@@ -87,6 +96,7 @@ def infer(audio_file, has_lyrics):
|
|
87 |
print("""βββ
|
88 |
Getting Lyrics ...
|
89 |
""")
|
|
|
90 |
lyrics_result = lyrics_client.predict(
|
91 |
audio_file, # str (filepath or URL to file) in 'Song input' Audio component
|
92 |
fn_index=0
|
@@ -123,6 +133,7 @@ def infer(audio_file, has_lyrics):
|
|
123 |
print("""βββ
|
124 |
Calling Llama2 ...
|
125 |
""")
|
|
|
126 |
result = client.predict(
|
127 |
llama_q, # str in 'Message' Textbox component
|
128 |
api_name="/predict"
|
@@ -132,18 +143,32 @@ def infer(audio_file, has_lyrics):
|
|
132 |
|
133 |
print(f"Llama2 result: {result}")
|
134 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
135 |
# βββ
|
136 |
print("""βββ
|
137 |
Calling SD-XL ...
|
138 |
""")
|
139 |
-
|
|
|
140 |
conditioning, pooled = compel(prompt)
|
141 |
images = pipe(prompt_embeds=conditioning, pooled_prompt_embeds=pooled).images[0]
|
142 |
|
143 |
print("Finished")
|
144 |
|
145 |
#return cap_result, result, images
|
146 |
-
return images, result, gr.update(visible=True), gr.Group.update(visible=True)
|
147 |
|
148 |
css = """
|
149 |
#col-container {max-width: 780px; margin-left: auto; margin-right: auto;}
|
@@ -245,25 +270,43 @@ with gr.Blocks(css=css) as demo:
|
|
245 |
</div>""")
|
246 |
|
247 |
audio_input = gr.Audio(label="Music input", type="filepath", source="upload")
|
|
|
248 |
with gr.Row():
|
249 |
has_lyrics = gr.Radio(label="Does your audio has lyrics ?", choices=["Yes", "No"], value="No", info="If yes, the image should reflect the lyrics, but be aware that because we add a step (getting lyrics), inference will take more time.")
|
250 |
-
song_title = gr.Textbox(label="Song Title",
|
|
|
251 |
infer_btn = gr.Button("Generate Image from Music")
|
252 |
#lpmc_cap = gr.Textbox(label="Lp Music Caps caption")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
253 |
with gr.Row():
|
254 |
-
|
255 |
-
img_result = gr.Image(label="Image Result", elem_id="image-out")
|
256 |
-
with gr.Row():
|
257 |
tryagain_btn = gr.Button("Try another image ?", visible=False)
|
|
|
258 |
with gr.Group(elem_id="share-btn-container", visible=False) as share_group:
|
259 |
community_icon = gr.HTML(community_icon_html)
|
260 |
loading_icon = gr.HTML(loading_icon_html)
|
261 |
share_button = gr.Button("Share to community", elem_id="share-btn")
|
262 |
-
|
263 |
gr.Examples(examples=[["./examples/electronic.mp3", "No"],["./examples/folk.wav", "No"], ["./examples/orchestra.wav", "No"]],
|
264 |
fn=infer,
|
265 |
inputs=[audio_input, has_lyrics],
|
266 |
-
outputs=[img_result, llama_trans_cap, tryagain_btn, share_group],
|
267 |
cache_examples=True
|
268 |
)
|
269 |
|
@@ -286,8 +329,9 @@ with gr.Blocks(css=css) as demo:
|
|
286 |
""")
|
287 |
|
288 |
#infer_btn.click(fn=infer, inputs=[audio_input], outputs=[lpmc_cap, llama_trans_cap, img_result])
|
289 |
-
infer_btn.click(fn=infer, inputs=[audio_input, has_lyrics], outputs=[img_result, llama_trans_cap, tryagain_btn, share_group])
|
290 |
share_button.click(None, [], [], _js=share_js)
|
291 |
tryagain_btn.click(fn=solo_xd, inputs=[llama_trans_cap], outputs=[img_result])
|
|
|
292 |
|
293 |
demo.queue(max_size=20).launch()
|
|
|
8 |
|
9 |
client = Client("https://fffiloni-test-llama-api.hf.space/", hf_token=hf_token)
|
10 |
lyrics_client = Client("https://fffiloni-music-to-lyrics.hf.space/")
|
11 |
+
visualizer_client = Client("https://fffiloni-animated-audio-visualizer.hf.space/")
|
12 |
|
13 |
from share_btn import community_icon_html, loading_icon_html, share_js
|
14 |
|
|
|
61 |
|
62 |
|
63 |
def solo_xd(prompt):
|
64 |
+
images = pipe(prompt=prompt).images[0]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
return images
|
66 |
|
67 |
+
def get_visualizer_video(audio_in, image_in, song_title):
|
68 |
+
|
69 |
+
title = f"""{song_title.upper()}\nMusic-to-Image demo by @fffiloni | HuggingFace
|
70 |
+
"""
|
71 |
+
|
72 |
+
visualizer_video = visualizer_client.predict(
|
73 |
+
title, # str in 'title' Textbox component
|
74 |
+
audio_in, # str (filepath or URL to file) in 'audio_in' Audio component
|
75 |
+
image_in, # str (filepath or URL to image) in 'image_in' Image component
|
76 |
+
api_name="/predict"
|
77 |
+
)
|
78 |
+
|
79 |
+
return visualizer_video[0]
|
80 |
+
|
81 |
def infer(audio_file, has_lyrics):
|
82 |
print("NEW INFERENCE ...")
|
83 |
+
gr.Info('Truncating your audio to the first 30 seconds')
|
84 |
truncated_audio = cut_audio(audio_file, "trunc_audio.mp3")
|
85 |
+
processed_audio = truncated_audio
|
86 |
|
87 |
print("Calling LP Music Caps...")
|
88 |
+
gr.Info('Calling LP Music Caps...')
|
89 |
cap_result = lpmc_client(
|
90 |
truncated_audio, # str (filepath or URL to file) in 'audio_path' Audio component
|
91 |
api_name="predict"
|
|
|
96 |
print("""βββ
|
97 |
Getting Lyrics ...
|
98 |
""")
|
99 |
+
gr.Info("Getting Lyrics ...")
|
100 |
lyrics_result = lyrics_client.predict(
|
101 |
audio_file, # str (filepath or URL to file) in 'Song input' Audio component
|
102 |
fn_index=0
|
|
|
133 |
print("""βββ
|
134 |
Calling Llama2 ...
|
135 |
""")
|
136 |
+
gr.Info("Calling Llama2 ...")
|
137 |
result = client.predict(
|
138 |
llama_q, # str in 'Message' Textbox component
|
139 |
api_name="/predict"
|
|
|
143 |
|
144 |
print(f"Llama2 result: {result}")
|
145 |
|
146 |
+
gr.Info("Prompt Optimization ...")
|
147 |
+
get_shorter_prompt = f"""
|
148 |
+
From this image description, please provide a short but efficient summary for a good Stable Diffusion prompt:
|
149 |
+
'{result}'
|
150 |
+
"""
|
151 |
+
|
152 |
+
shorten = client.predict(
|
153 |
+
get_shorter_prompt, # str in 'Message' Textbox component
|
154 |
+
api_name="/predict"
|
155 |
+
)
|
156 |
+
|
157 |
+
print(f'SHORTEN PROMPT: {shorten}')
|
158 |
+
|
159 |
# βββ
|
160 |
print("""βββ
|
161 |
Calling SD-XL ...
|
162 |
""")
|
163 |
+
gr.Info('Calling SD-XL ...')
|
164 |
+
prompt = shorten
|
165 |
conditioning, pooled = compel(prompt)
|
166 |
images = pipe(prompt_embeds=conditioning, pooled_prompt_embeds=pooled).images[0]
|
167 |
|
168 |
print("Finished")
|
169 |
|
170 |
#return cap_result, result, images
|
171 |
+
return processed_audio, images, result, gr.update(visible=True), gr.Group.update(visible=True)
|
172 |
|
173 |
css = """
|
174 |
#col-container {max-width: 780px; margin-left: auto; margin-right: auto;}
|
|
|
270 |
</div>""")
|
271 |
|
272 |
audio_input = gr.Audio(label="Music input", type="filepath", source="upload")
|
273 |
+
|
274 |
with gr.Row():
|
275 |
has_lyrics = gr.Radio(label="Does your audio has lyrics ?", choices=["Yes", "No"], value="No", info="If yes, the image should reflect the lyrics, but be aware that because we add a step (getting lyrics), inference will take more time.")
|
276 |
+
song_title = gr.Textbox(label="Song Title", placeholder="Title: ", interactive=True, info="If you want to share your result, please provide the title of your audio sample :)", elem_id="song-title")
|
277 |
+
|
278 |
infer_btn = gr.Button("Generate Image from Music")
|
279 |
#lpmc_cap = gr.Textbox(label="Lp Music Caps caption")
|
280 |
+
|
281 |
+
with gr.Group():
|
282 |
+
|
283 |
+
with gr.Row():
|
284 |
+
|
285 |
+
llama_trans_cap = gr.Textbox(label="Llama Image Suggestion", placeholder="Llama2 image prompt suggestion will be displayed here ;)", visible=True, lines=12, max_lines=18, elem_id="llama-prompt")
|
286 |
+
|
287 |
+
with gr.Tab("Image Result"):
|
288 |
+
img_result = gr.Image(label="Image Result", elem_id="image-out", interactive=False, type="filepath")
|
289 |
+
|
290 |
+
with gr.Tab("Video visualizer"):
|
291 |
+
|
292 |
+
with gr.Column():
|
293 |
+
processed_audio = gr.Audio(type="filepath", visible=False)
|
294 |
+
visualizer_video = gr.Video(label="Video visualizer output")
|
295 |
+
get_visualizer_vid = gr.Button("Export as video !")
|
296 |
+
|
297 |
with gr.Row():
|
298 |
+
|
|
|
|
|
299 |
tryagain_btn = gr.Button("Try another image ?", visible=False)
|
300 |
+
|
301 |
with gr.Group(elem_id="share-btn-container", visible=False) as share_group:
|
302 |
community_icon = gr.HTML(community_icon_html)
|
303 |
loading_icon = gr.HTML(loading_icon_html)
|
304 |
share_button = gr.Button("Share to community", elem_id="share-btn")
|
305 |
+
|
306 |
gr.Examples(examples=[["./examples/electronic.mp3", "No"],["./examples/folk.wav", "No"], ["./examples/orchestra.wav", "No"]],
|
307 |
fn=infer,
|
308 |
inputs=[audio_input, has_lyrics],
|
309 |
+
outputs=[processed_audio, img_result, llama_trans_cap, tryagain_btn, share_group],
|
310 |
cache_examples=True
|
311 |
)
|
312 |
|
|
|
329 |
""")
|
330 |
|
331 |
#infer_btn.click(fn=infer, inputs=[audio_input], outputs=[lpmc_cap, llama_trans_cap, img_result])
|
332 |
+
infer_btn.click(fn=infer, inputs=[audio_input, has_lyrics], outputs=[processed_audio, img_result, llama_trans_cap, tryagain_btn, share_group])
|
333 |
share_button.click(None, [], [], _js=share_js)
|
334 |
tryagain_btn.click(fn=solo_xd, inputs=[llama_trans_cap], outputs=[img_result])
|
335 |
+
get_visualizer_vid.click(fn=get_visualizer_video, inputs=[processed_audio, img_result, song_title], outputs=[visualizer_video], queue=False)
|
336 |
|
337 |
demo.queue(max_size=20).launch()
|