Spaces:
Running
on
Zero
Running
on
Zero
thecollabagepatch
commited on
did i break it
Browse files
app.py
CHANGED
@@ -12,6 +12,7 @@ from pydub import AudioSegment
|
|
12 |
import spaces
|
13 |
import tempfile
|
14 |
from pydub import AudioSegment
|
|
|
15 |
|
16 |
# Check if CUDA is available
|
17 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
@@ -188,17 +189,7 @@ def continue_music(input_audio_path, prompt_duration, musicgen_model, num_iterat
|
|
188 |
# Load the audio from the given file path
|
189 |
song, sr = torchaudio.load(input_audio_path)
|
190 |
song = song.to(device)
|
191 |
-
|
192 |
-
# Calculate the slice from the end of the song based on prompt_duration
|
193 |
-
num_samples = int(prompt_duration * sr)
|
194 |
-
if song.shape[-1] < num_samples:
|
195 |
-
raise ValueError("The prompt_duration is longer than the audio length.")
|
196 |
-
start_idx = song.shape[-1] - num_samples
|
197 |
-
prompt_waveform = song[..., start_idx:]
|
198 |
-
|
199 |
-
# Prepare the audio slice for generation
|
200 |
-
prompt_waveform = preprocess_audio(prompt_waveform)
|
201 |
-
|
202 |
# Load the model and set generation parameters
|
203 |
model_continue = MusicGen.get_pretrained(musicgen_model.split(" ")[0])
|
204 |
model_continue.set_generation_params(
|
@@ -209,14 +200,32 @@ def continue_music(input_audio_path, prompt_duration, musicgen_model, num_iterat
|
|
209 |
duration=calculate_duration(bpm),
|
210 |
cfg_coef=3
|
211 |
)
|
212 |
-
|
213 |
original_audio = AudioSegment.from_mp3(input_audio_path)
|
214 |
-
|
|
|
215 |
file_paths_for_cleanup = [] # List to track generated file paths for cleanup
|
216 |
|
217 |
for i in range(num_iterations):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
218 |
output = model_continue.generate_continuation(prompt_waveform, prompt_sample_rate=sr, progress=True)
|
219 |
output = output.cpu() # Move the output tensor back to CPU
|
|
|
220 |
if len(output.size()) > 2:
|
221 |
output = output.squeeze()
|
222 |
|
@@ -225,15 +234,15 @@ def continue_music(input_audio_path, prompt_duration, musicgen_model, num_iterat
|
|
225 |
correct_filename_extension = f'{filename_without_extension}.wav.wav' # Apply the workaround for audio_write
|
226 |
|
227 |
audio_write(filename_with_extension, output, model_continue.sample_rate, strategy="loudness", loudness_compressor=True)
|
228 |
-
|
229 |
-
all_audio_files.append(new_audio_segment)
|
230 |
-
file_paths_for_cleanup.append(correct_filename_extension) # Add to cleanup list
|
231 |
|
232 |
-
|
233 |
-
|
|
|
|
|
234 |
|
235 |
combined_audio_filename = f"combined_audio_{random.randint(1, 10000)}.mp3"
|
236 |
-
|
237 |
|
238 |
# Clean up temporary files using the list of file paths
|
239 |
for file_path in file_paths_for_cleanup:
|
@@ -266,8 +275,6 @@ the fine-tunes hosted on the huggingface hub are provided collectively by the mu
|
|
266 |
[<img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab" style="vertical-align:middle"> fine-tuning colab notebook by lyra](https://colab.research.google.com/drive/13tbcC3A42KlaUZ21qvUXd25SFLu8WIvb)
|
267 |
"""
|
268 |
|
269 |
-
|
270 |
-
|
271 |
# Define the fine-tunes blurb for each model
|
272 |
fine_tunes_info = """
|
273 |
## thepatch/vanya_ai_dnb_0.1
|
@@ -286,21 +293,19 @@ thepatch/hoenn_lofi is a large fine-tune by hoenn. [![Twitter](https://huggingfa
|
|
286 |
thepatch/PhonkV2 was trained by MJ BERSABEph. there are multiple versions in the discord.
|
287 |
"""
|
288 |
|
289 |
-
|
290 |
-
|
291 |
# Create the Gradio interface
|
292 |
with gr.Blocks() as iface:
|
293 |
gr.Markdown("# the-slot-machine")
|
294 |
gr.Markdown("two ai's jamming. warning: outputs will be very strange, likely stupid, and possibly rad.")
|
295 |
-
gr.Markdown("this is a musical slot machine. using musiclang, we get a midi output. then, we let a musicgen model
|
296 |
|
297 |
with gr.Accordion("more info", open=False):
|
298 |
gr.Markdown(musiclang_blurb)
|
299 |
gr.Markdown(musicgen_blurb)
|
300 |
gr.Markdown(finetunes_blurb)
|
301 |
|
302 |
-
with gr.Accordion("fine-tunes info", open=False):
|
303 |
-
gr.Markdown(
|
304 |
|
305 |
with gr.Row():
|
306 |
with gr.Column():
|
@@ -320,7 +325,7 @@ with gr.Accordion("fine-tunes info", open=False):
|
|
320 |
"thepatch/bleeps-medium (medium)",
|
321 |
"thepatch/hoenn_lofi (large)"
|
322 |
], value="thepatch/vanya_ai_dnb_0.1 (small)")
|
323 |
-
|
324 |
generate_music_button = gr.Button("Generate Music")
|
325 |
output_audio = gr.Audio(label="Generated Music", type="filepath")
|
326 |
continue_button = gr.Button("Continue Generating Music")
|
|
|
12 |
import spaces
|
13 |
import tempfile
|
14 |
from pydub import AudioSegment
|
15 |
+
import io
|
16 |
|
17 |
# Check if CUDA is available
|
18 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
|
189 |
# Load the audio from the given file path
|
190 |
song, sr = torchaudio.load(input_audio_path)
|
191 |
song = song.to(device)
|
192 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
193 |
# Load the model and set generation parameters
|
194 |
model_continue = MusicGen.get_pretrained(musicgen_model.split(" ")[0])
|
195 |
model_continue.set_generation_params(
|
|
|
200 |
duration=calculate_duration(bpm),
|
201 |
cfg_coef=3
|
202 |
)
|
203 |
+
|
204 |
original_audio = AudioSegment.from_mp3(input_audio_path)
|
205 |
+
current_audio = original_audio
|
206 |
+
|
207 |
file_paths_for_cleanup = [] # List to track generated file paths for cleanup
|
208 |
|
209 |
for i in range(num_iterations):
|
210 |
+
# Calculate the slice from the end of the current audio based on prompt_duration
|
211 |
+
num_samples = int(prompt_duration * sr)
|
212 |
+
if current_audio.duration_seconds * 1000 < prompt_duration * 1000:
|
213 |
+
raise ValueError("The prompt_duration is longer than the current audio length.")
|
214 |
+
|
215 |
+
start_time = current_audio.duration_seconds * 1000 - prompt_duration * 1000
|
216 |
+
prompt_audio = current_audio[start_time:]
|
217 |
+
|
218 |
+
# Convert the prompt audio to a PyTorch tensor
|
219 |
+
prompt_bytes = prompt_audio.export(format="wav").read()
|
220 |
+
prompt_waveform, _ = torchaudio.load(io.BytesIO(prompt_bytes))
|
221 |
+
prompt_waveform = prompt_waveform.to(device)
|
222 |
+
|
223 |
+
# Prepare the audio slice for generation
|
224 |
+
prompt_waveform = preprocess_audio(prompt_waveform)
|
225 |
+
|
226 |
output = model_continue.generate_continuation(prompt_waveform, prompt_sample_rate=sr, progress=True)
|
227 |
output = output.cpu() # Move the output tensor back to CPU
|
228 |
+
|
229 |
if len(output.size()) > 2:
|
230 |
output = output.squeeze()
|
231 |
|
|
|
234 |
correct_filename_extension = f'{filename_without_extension}.wav.wav' # Apply the workaround for audio_write
|
235 |
|
236 |
audio_write(filename_with_extension, output, model_continue.sample_rate, strategy="loudness", loudness_compressor=True)
|
237 |
+
generated_audio_segment = AudioSegment.from_wav(correct_filename_extension)
|
|
|
|
|
238 |
|
239 |
+
# Replace the prompt portion with the generated audio
|
240 |
+
current_audio = current_audio[:start_time] + generated_audio_segment
|
241 |
+
|
242 |
+
file_paths_for_cleanup.append(correct_filename_extension) # Add to cleanup list
|
243 |
|
244 |
combined_audio_filename = f"combined_audio_{random.randint(1, 10000)}.mp3"
|
245 |
+
current_audio.export(combined_audio_filename, format="mp3")
|
246 |
|
247 |
# Clean up temporary files using the list of file paths
|
248 |
for file_path in file_paths_for_cleanup:
|
|
|
275 |
[<img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab" style="vertical-align:middle"> fine-tuning colab notebook by lyra](https://colab.research.google.com/drive/13tbcC3A42KlaUZ21qvUXd25SFLu8WIvb)
|
276 |
"""
|
277 |
|
|
|
|
|
278 |
# Define the fine-tunes blurb for each model
|
279 |
fine_tunes_info = """
|
280 |
## thepatch/vanya_ai_dnb_0.1
|
|
|
293 |
thepatch/PhonkV2 was trained by MJ BERSABEph. there are multiple versions in the discord.
|
294 |
"""
|
295 |
|
|
|
|
|
296 |
# Create the Gradio interface
|
297 |
with gr.Blocks() as iface:
|
298 |
gr.Markdown("# the-slot-machine")
|
299 |
gr.Markdown("two ai's jamming. warning: outputs will be very strange, likely stupid, and possibly rad.")
|
300 |
+
gr.Markdown("this is a musical slot machine. using musiclang, we get a midi output. then, we let a musicgen model continue, semi-randomly, from different sections of the midi track. the slot machine combines em all at the end into something very bizarre. pick a number for the seed between 1 and 10k, or leave it blank to unlock the full rnjesus powers. if you wanna be lame, you can control the chord progression, prompt duration, musicgen model, number of iterations, and BPM.")
|
301 |
|
302 |
with gr.Accordion("more info", open=False):
|
303 |
gr.Markdown(musiclang_blurb)
|
304 |
gr.Markdown(musicgen_blurb)
|
305 |
gr.Markdown(finetunes_blurb)
|
306 |
|
307 |
+
with gr.Accordion("fine-tunes info", open=False):
|
308 |
+
gr.Markdown(fine_tunes_info)
|
309 |
|
310 |
with gr.Row():
|
311 |
with gr.Column():
|
|
|
325 |
"thepatch/bleeps-medium (medium)",
|
326 |
"thepatch/hoenn_lofi (large)"
|
327 |
], value="thepatch/vanya_ai_dnb_0.1 (small)")
|
328 |
+
num_iterations = gr.Slider(label="this does nothing rn", minimum=1, maximum=1, step=1, value=1)
|
329 |
generate_music_button = gr.Button("Generate Music")
|
330 |
output_audio = gr.Audio(label="Generated Music", type="filepath")
|
331 |
continue_button = gr.Button("Continue Generating Music")
|