thecollabagepatch commited on
Commit
ed7d0fe
2 Parent(s): 46b3885 9a31575

did i break it

Browse files
Files changed (1) hide show
  1. app.py +32 -27
app.py CHANGED
@@ -12,6 +12,7 @@ from pydub import AudioSegment
12
  import spaces
13
  import tempfile
14
  from pydub import AudioSegment
 
15
 
16
  # Check if CUDA is available
17
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
@@ -188,17 +189,7 @@ def continue_music(input_audio_path, prompt_duration, musicgen_model, num_iterat
188
  # Load the audio from the given file path
189
  song, sr = torchaudio.load(input_audio_path)
190
  song = song.to(device)
191
-
192
- # Calculate the slice from the end of the song based on prompt_duration
193
- num_samples = int(prompt_duration * sr)
194
- if song.shape[-1] < num_samples:
195
- raise ValueError("The prompt_duration is longer than the audio length.")
196
- start_idx = song.shape[-1] - num_samples
197
- prompt_waveform = song[..., start_idx:]
198
-
199
- # Prepare the audio slice for generation
200
- prompt_waveform = preprocess_audio(prompt_waveform)
201
-
202
  # Load the model and set generation parameters
203
  model_continue = MusicGen.get_pretrained(musicgen_model.split(" ")[0])
204
  model_continue.set_generation_params(
@@ -209,14 +200,32 @@ def continue_music(input_audio_path, prompt_duration, musicgen_model, num_iterat
209
  duration=calculate_duration(bpm),
210
  cfg_coef=3
211
  )
212
-
213
  original_audio = AudioSegment.from_mp3(input_audio_path)
214
- all_audio_files = [original_audio] # Start with the original audio
 
215
  file_paths_for_cleanup = [] # List to track generated file paths for cleanup
216
 
217
  for i in range(num_iterations):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
218
  output = model_continue.generate_continuation(prompt_waveform, prompt_sample_rate=sr, progress=True)
219
  output = output.cpu() # Move the output tensor back to CPU
 
220
  if len(output.size()) > 2:
221
  output = output.squeeze()
222
 
@@ -225,15 +234,15 @@ def continue_music(input_audio_path, prompt_duration, musicgen_model, num_iterat
225
  correct_filename_extension = f'{filename_without_extension}.wav.wav' # Apply the workaround for audio_write
226
 
227
  audio_write(filename_with_extension, output, model_continue.sample_rate, strategy="loudness", loudness_compressor=True)
228
- new_audio_segment = AudioSegment.from_wav(correct_filename_extension)
229
- all_audio_files.append(new_audio_segment)
230
- file_paths_for_cleanup.append(correct_filename_extension) # Add to cleanup list
231
 
232
- # Combine all audio files into one continuous segment
233
- combined_audio = sum(all_audio_files)
 
 
234
 
235
  combined_audio_filename = f"combined_audio_{random.randint(1, 10000)}.mp3"
236
- combined_audio.export(combined_audio_filename, format="mp3")
237
 
238
  # Clean up temporary files using the list of file paths
239
  for file_path in file_paths_for_cleanup:
@@ -266,8 +275,6 @@ the fine-tunes hosted on the huggingface hub are provided collectively by the mu
266
  [<img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab" style="vertical-align:middle"> fine-tuning colab notebook by lyra](https://colab.research.google.com/drive/13tbcC3A42KlaUZ21qvUXd25SFLu8WIvb)
267
  """
268
 
269
-
270
-
271
  # Define the fine-tunes blurb for each model
272
  fine_tunes_info = """
273
  ## thepatch/vanya_ai_dnb_0.1
@@ -286,21 +293,19 @@ thepatch/hoenn_lofi is a large fine-tune by hoenn. [![Twitter](https://huggingfa
286
  thepatch/PhonkV2 was trained by MJ BERSABEph. there are multiple versions in the discord.
287
  """
288
 
289
-
290
-
291
  # Create the Gradio interface
292
  with gr.Blocks() as iface:
293
  gr.Markdown("# the-slot-machine")
294
  gr.Markdown("two ai's jamming. warning: outputs will be very strange, likely stupid, and possibly rad.")
295
- gr.Markdown("this is a musical slot machine. using musiclang, we get a midi output. then, we let a musicgen model to continue the from the beginning of the midi model's generation. then, musicgen can continue from the end of its own output. re-upload, trim and repeat with a different fine-tune and prompt duration for the coolest outputs.")
296
 
297
  with gr.Accordion("more info", open=False):
298
  gr.Markdown(musiclang_blurb)
299
  gr.Markdown(musicgen_blurb)
300
  gr.Markdown(finetunes_blurb)
301
 
302
- with gr.Accordion("fine-tunes info", open=False):
303
- gr.Markdown(fine_tunes_blurb)
304
 
305
  with gr.Row():
306
  with gr.Column():
@@ -320,7 +325,7 @@ with gr.Accordion("fine-tunes info", open=False):
320
  "thepatch/bleeps-medium (medium)",
321
  "thepatch/hoenn_lofi (large)"
322
  ], value="thepatch/vanya_ai_dnb_0.1 (small)")
323
-
324
  generate_music_button = gr.Button("Generate Music")
325
  output_audio = gr.Audio(label="Generated Music", type="filepath")
326
  continue_button = gr.Button("Continue Generating Music")
 
12
  import spaces
13
  import tempfile
14
  from pydub import AudioSegment
15
+ import io
16
 
17
  # Check if CUDA is available
18
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
189
  # Load the audio from the given file path
190
  song, sr = torchaudio.load(input_audio_path)
191
  song = song.to(device)
192
+
 
 
 
 
 
 
 
 
 
 
193
  # Load the model and set generation parameters
194
  model_continue = MusicGen.get_pretrained(musicgen_model.split(" ")[0])
195
  model_continue.set_generation_params(
 
200
  duration=calculate_duration(bpm),
201
  cfg_coef=3
202
  )
203
+
204
  original_audio = AudioSegment.from_mp3(input_audio_path)
205
+ current_audio = original_audio
206
+
207
  file_paths_for_cleanup = [] # List to track generated file paths for cleanup
208
 
209
  for i in range(num_iterations):
210
+ # Calculate the slice from the end of the current audio based on prompt_duration
211
+ num_samples = int(prompt_duration * sr)
212
+ if current_audio.duration_seconds * 1000 < prompt_duration * 1000:
213
+ raise ValueError("The prompt_duration is longer than the current audio length.")
214
+
215
+ start_time = current_audio.duration_seconds * 1000 - prompt_duration * 1000
216
+ prompt_audio = current_audio[start_time:]
217
+
218
+ # Convert the prompt audio to a PyTorch tensor
219
+ prompt_bytes = prompt_audio.export(format="wav").read()
220
+ prompt_waveform, _ = torchaudio.load(io.BytesIO(prompt_bytes))
221
+ prompt_waveform = prompt_waveform.to(device)
222
+
223
+ # Prepare the audio slice for generation
224
+ prompt_waveform = preprocess_audio(prompt_waveform)
225
+
226
  output = model_continue.generate_continuation(prompt_waveform, prompt_sample_rate=sr, progress=True)
227
  output = output.cpu() # Move the output tensor back to CPU
228
+
229
  if len(output.size()) > 2:
230
  output = output.squeeze()
231
 
 
234
  correct_filename_extension = f'{filename_without_extension}.wav.wav' # Apply the workaround for audio_write
235
 
236
  audio_write(filename_with_extension, output, model_continue.sample_rate, strategy="loudness", loudness_compressor=True)
237
+ generated_audio_segment = AudioSegment.from_wav(correct_filename_extension)
 
 
238
 
239
+ # Replace the prompt portion with the generated audio
240
+ current_audio = current_audio[:start_time] + generated_audio_segment
241
+
242
+ file_paths_for_cleanup.append(correct_filename_extension) # Add to cleanup list
243
 
244
  combined_audio_filename = f"combined_audio_{random.randint(1, 10000)}.mp3"
245
+ current_audio.export(combined_audio_filename, format="mp3")
246
 
247
  # Clean up temporary files using the list of file paths
248
  for file_path in file_paths_for_cleanup:
 
275
  [<img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab" style="vertical-align:middle"> fine-tuning colab notebook by lyra](https://colab.research.google.com/drive/13tbcC3A42KlaUZ21qvUXd25SFLu8WIvb)
276
  """
277
 
 
 
278
  # Define the fine-tunes blurb for each model
279
  fine_tunes_info = """
280
  ## thepatch/vanya_ai_dnb_0.1
 
293
  thepatch/PhonkV2 was trained by MJ BERSABEph. there are multiple versions in the discord.
294
  """
295
 
 
 
296
  # Create the Gradio interface
297
  with gr.Blocks() as iface:
298
  gr.Markdown("# the-slot-machine")
299
  gr.Markdown("two ai's jamming. warning: outputs will be very strange, likely stupid, and possibly rad.")
300
+ gr.Markdown("this is a musical slot machine. using musiclang, we get a midi output. then, we let a musicgen model continue, semi-randomly, from different sections of the midi track. the slot machine combines em all at the end into something very bizarre. pick a number for the seed between 1 and 10k, or leave it blank to unlock the full rnjesus powers. if you wanna be lame, you can control the chord progression, prompt duration, musicgen model, number of iterations, and BPM.")
301
 
302
  with gr.Accordion("more info", open=False):
303
  gr.Markdown(musiclang_blurb)
304
  gr.Markdown(musicgen_blurb)
305
  gr.Markdown(finetunes_blurb)
306
 
307
+ with gr.Accordion("fine-tunes info", open=False):
308
+ gr.Markdown(fine_tunes_info)
309
 
310
  with gr.Row():
311
  with gr.Column():
 
325
  "thepatch/bleeps-medium (medium)",
326
  "thepatch/hoenn_lofi (large)"
327
  ], value="thepatch/vanya_ai_dnb_0.1 (small)")
328
+ num_iterations = gr.Slider(label="this does nothing rn", minimum=1, maximum=1, step=1, value=1)
329
  generate_music_button = gr.Button("Generate Music")
330
  output_audio = gr.Audio(label="Generated Music", type="filepath")
331
  continue_button = gr.Button("Continue Generating Music")