Spaces:

thepatch
/

zero-gpu-slot-machine

Running on Zero

App Files Files Community

thecollabagepatch commited on Apr 21

Commit

ed7d0fe

•

2 Parent(s): 46b3885 9a31575

did i break it

Browse files

Files changed (1) hide show

app.py +32 -27

app.py CHANGED Viewed

@@ -12,6 +12,7 @@ from pydub import AudioSegment
 import spaces
 import tempfile
 from pydub import AudioSegment
 # Check if CUDA is available
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
@@ -188,17 +189,7 @@ def continue_music(input_audio_path, prompt_duration, musicgen_model, num_iterat
     # Load the audio from the given file path
     song, sr = torchaudio.load(input_audio_path)
     song = song.to(device)
-    # Calculate the slice from the end of the song based on prompt_duration
-    num_samples = int(prompt_duration * sr)
-    if song.shape[-1] < num_samples:
-        raise ValueError("The prompt_duration is longer than the audio length.")
-    start_idx = song.shape[-1] - num_samples
-    prompt_waveform = song[..., start_idx:]
-    # Prepare the audio slice for generation
-    prompt_waveform = preprocess_audio(prompt_waveform)
     # Load the model and set generation parameters
     model_continue = MusicGen.get_pretrained(musicgen_model.split(" ")[0])
     model_continue.set_generation_params(
@@ -209,14 +200,32 @@ def continue_music(input_audio_path, prompt_duration, musicgen_model, num_iterat
         duration=calculate_duration(bpm),
         cfg_coef=3
     )
     original_audio = AudioSegment.from_mp3(input_audio_path)
-    all_audio_files = [original_audio]  # Start with the original audio
     file_paths_for_cleanup = []  # List to track generated file paths for cleanup
     for i in range(num_iterations):
         output = model_continue.generate_continuation(prompt_waveform, prompt_sample_rate=sr, progress=True)
         output = output.cpu()  # Move the output tensor back to CPU
         if len(output.size()) > 2:
             output = output.squeeze()
@@ -225,15 +234,15 @@ def continue_music(input_audio_path, prompt_duration, musicgen_model, num_iterat
         correct_filename_extension = f'{filename_without_extension}.wav.wav'  # Apply the workaround for audio_write
         audio_write(filename_with_extension, output, model_continue.sample_rate, strategy="loudness", loudness_compressor=True)
-        new_audio_segment = AudioSegment.from_wav(correct_filename_extension)
-        all_audio_files.append(new_audio_segment)
-        file_paths_for_cleanup.append(correct_filename_extension)  # Add to cleanup list
-    # Combine all audio files into one continuous segment
-    combined_audio = sum(all_audio_files)
     combined_audio_filename = f"combined_audio_{random.randint(1, 10000)}.mp3"
-    combined_audio.export(combined_audio_filename, format="mp3")
     # Clean up temporary files using the list of file paths
     for file_path in file_paths_for_cleanup:
@@ -266,8 +275,6 @@ the fine-tunes hosted on the huggingface hub are provided collectively by the mu
 [<img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab" style="vertical-align:middle"> fine-tuning colab notebook by lyra](https://colab.research.google.com/drive/13tbcC3A42KlaUZ21qvUXd25SFLu8WIvb)
 """
 # Define the fine-tunes blurb for each model
 fine_tunes_info = """
 ## thepatch/vanya_ai_dnb_0.1
@@ -286,21 +293,19 @@ thepatch/hoenn_lofi is a large fine-tune by hoenn. [![Twitter](https://huggingfa
 thepatch/PhonkV2 was trained by MJ BERSABEph. there are multiple versions in the discord.
 """
 # Create the Gradio interface
 with gr.Blocks() as iface:
     gr.Markdown("# the-slot-machine")
     gr.Markdown("two ai's jamming. warning: outputs will be very strange, likely stupid, and possibly rad.")
-    gr.Markdown("this is a musical slot machine. using musiclang, we get a midi output. then, we let a musicgen model to continue the from the beginning of the midi model's generation. then, musicgen can continue from the end of its own output. re-upload, trim and repeat with a different fine-tune and prompt duration for the coolest outputs.")
     with gr.Accordion("more info", open=False):
         gr.Markdown(musiclang_blurb)
         gr.Markdown(musicgen_blurb)
         gr.Markdown(finetunes_blurb)
-with gr.Accordion("fine-tunes info", open=False):
-        gr.Markdown(fine_tunes_blurb)
     with gr.Row():
         with gr.Column():
@@ -320,7 +325,7 @@ with gr.Accordion("fine-tunes info", open=False):
                 "thepatch/bleeps-medium (medium)",
                 "thepatch/hoenn_lofi (large)"
             ], value="thepatch/vanya_ai_dnb_0.1 (small)")
             generate_music_button = gr.Button("Generate Music")
             output_audio = gr.Audio(label="Generated Music", type="filepath")
             continue_button = gr.Button("Continue Generating Music")

 import spaces
 import tempfile
 from pydub import AudioSegment
+import io
 # Check if CUDA is available
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     # Load the audio from the given file path
     song, sr = torchaudio.load(input_audio_path)
     song = song.to(device)
     # Load the model and set generation parameters
     model_continue = MusicGen.get_pretrained(musicgen_model.split(" ")[0])
     model_continue.set_generation_params(
         duration=calculate_duration(bpm),
         cfg_coef=3
     )
     original_audio = AudioSegment.from_mp3(input_audio_path)
+    current_audio = original_audio
     file_paths_for_cleanup = []  # List to track generated file paths for cleanup
     for i in range(num_iterations):
+        # Calculate the slice from the end of the current audio based on prompt_duration
+        num_samples = int(prompt_duration * sr)
+        if current_audio.duration_seconds * 1000 < prompt_duration * 1000:
+            raise ValueError("The prompt_duration is longer than the current audio length.")
+        start_time = current_audio.duration_seconds * 1000 - prompt_duration * 1000
+        prompt_audio = current_audio[start_time:]
+        # Convert the prompt audio to a PyTorch tensor
+        prompt_bytes = prompt_audio.export(format="wav").read()
+        prompt_waveform, _ = torchaudio.load(io.BytesIO(prompt_bytes))
+        prompt_waveform = prompt_waveform.to(device)
+        # Prepare the audio slice for generation
+        prompt_waveform = preprocess_audio(prompt_waveform)
         output = model_continue.generate_continuation(prompt_waveform, prompt_sample_rate=sr, progress=True)
         output = output.cpu()  # Move the output tensor back to CPU
         if len(output.size()) > 2:
             output = output.squeeze()
         correct_filename_extension = f'{filename_without_extension}.wav.wav'  # Apply the workaround for audio_write
         audio_write(filename_with_extension, output, model_continue.sample_rate, strategy="loudness", loudness_compressor=True)
+        generated_audio_segment = AudioSegment.from_wav(correct_filename_extension)
+        # Replace the prompt portion with the generated audio
+        current_audio = current_audio[:start_time] + generated_audio_segment
+        file_paths_for_cleanup.append(correct_filename_extension)  # Add to cleanup list
     combined_audio_filename = f"combined_audio_{random.randint(1, 10000)}.mp3"
+    current_audio.export(combined_audio_filename, format="mp3")
     # Clean up temporary files using the list of file paths
     for file_path in file_paths_for_cleanup:
 [<img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab" style="vertical-align:middle"> fine-tuning colab notebook by lyra](https://colab.research.google.com/drive/13tbcC3A42KlaUZ21qvUXd25SFLu8WIvb)
 """
 # Define the fine-tunes blurb for each model
 fine_tunes_info = """
 ## thepatch/vanya_ai_dnb_0.1
 thepatch/PhonkV2 was trained by MJ BERSABEph. there are multiple versions in the discord.
 """
 # Create the Gradio interface
 with gr.Blocks() as iface:
     gr.Markdown("# the-slot-machine")
     gr.Markdown("two ai's jamming. warning: outputs will be very strange, likely stupid, and possibly rad.")
+    gr.Markdown("this is a musical slot machine. using musiclang, we get a midi output. then, we let a musicgen model continue, semi-randomly, from different sections of the midi track. the slot machine combines em all at the end into something very bizarre. pick a number for the seed between 1 and 10k, or leave it blank to unlock the full rnjesus powers. if you wanna be lame, you can control the chord progression, prompt duration, musicgen model, number of iterations, and BPM.")
     with gr.Accordion("more info", open=False):
         gr.Markdown(musiclang_blurb)
         gr.Markdown(musicgen_blurb)
         gr.Markdown(finetunes_blurb)
+    with gr.Accordion("fine-tunes info", open=False):
+        gr.Markdown(fine_tunes_info)
     with gr.Row():
         with gr.Column():
                 "thepatch/bleeps-medium (medium)",
                 "thepatch/hoenn_lofi (large)"
             ], value="thepatch/vanya_ai_dnb_0.1 (small)")
+            num_iterations = gr.Slider(label="this does nothing rn", minimum=1, maximum=1, step=1, value=1)
             generate_music_button = gr.Button("Generate Music")
             output_audio = gr.Audio(label="Generated Music", type="filepath")
             continue_button = gr.Button("Continue Generating Music")