thecollabagepatch commited on
Commit
b6ff5af
1 Parent(s): 821005a

attempt 1 runtime error

Browse files
Files changed (1) hide show
  1. app.py +241 -0
app.py CHANGED
@@ -0,0 +1,241 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from musiclang_predict import MusicLangPredictor
3
+ import random
4
+ import subprocess
5
+ import os
6
+ import torchaudio
7
+ import torch
8
+ import numpy as np
9
+ from audiocraft.models import MusicGen
10
+ from audiocraft.data.audio import audio_write
11
+ from pydub import AudioSegment
12
+
13
+
14
+ # Utility Functions
15
+ def peak_normalize(y, target_peak=0.97):
16
+ return target_peak * (y / np.max(np.abs(y)))
17
+
18
+ def rms_normalize(y, target_rms=0.05):
19
+ return y * (target_rms / np.sqrt(np.mean(y**2)))
20
+
21
+ def preprocess_audio(waveform):
22
+ waveform_np = waveform.cpu().squeeze().numpy() # Move to CPU before converting to NumPy
23
+ # processed_waveform_np = rms_normalize(peak_normalize(waveform_np))
24
+ return torch.from_numpy(waveform_np).unsqueeze(0).to(device)
25
+
26
+ def create_slices(song, sr, slice_duration, bpm, num_slices=5):
27
+ song_length = song.shape[-1] / sr
28
+ slices = []
29
+
30
+ # Ensure the first slice is from the beginning of the song
31
+ first_slice_waveform = song[..., :int(slice_duration * sr)]
32
+ slices.append(first_slice_waveform)
33
+
34
+ for i in range(1, num_slices):
35
+ possible_start_indices = list(range(int(slice_duration * sr), int(song_length * sr), int(4 * 60 / bpm * sr)))
36
+ if not possible_start_indices:
37
+ # If there are no valid start indices, duplicate the first slice
38
+ slices.append(first_slice_waveform)
39
+ continue
40
+
41
+ random_start = random.choice(possible_start_indices)
42
+ slice_end = random_start + int(slice_duration * sr)
43
+
44
+ if slice_end > song_length * sr:
45
+ # Wrap around to the beginning of the song
46
+ remaining_samples = int(slice_end - song_length * sr)
47
+ slice_waveform = torch.cat([song[..., random_start:], song[..., :remaining_samples]], dim=-1)
48
+ else:
49
+ slice_waveform = song[..., random_start:slice_end]
50
+
51
+ if len(slice_waveform.squeeze()) < int(slice_duration * sr):
52
+ additional_samples_needed = int(slice_duration * sr) - len(slice_waveform.squeeze())
53
+ slice_waveform = torch.cat([slice_waveform, song[..., :additional_samples_needed]], dim=-1)
54
+
55
+ slices.append(slice_waveform)
56
+
57
+ return slices
58
+
59
+ def calculate_duration(bpm, min_duration=29, max_duration=30):
60
+ single_bar_duration = 4 * 60 / bpm
61
+ bars = max(min_duration // single_bar_duration, 1)
62
+
63
+ while single_bar_duration * bars < min_duration:
64
+ bars += 1
65
+
66
+ duration = single_bar_duration * bars
67
+
68
+ while duration > max_duration and bars > 1:
69
+ bars -= 1
70
+ duration = single_bar_duration * bars
71
+
72
+ return duration
73
+
74
+ @spaces.GPU
75
+ def generate_music(seed, use_chords, chord_progression, prompt_duration, musicgen_model, num_iterations, bpm):
76
+ while True:
77
+ try:
78
+ if seed == "":
79
+ seed = random.randint(1, 10000)
80
+
81
+ ml = MusicLangPredictor('musiclang/musiclang-v2')
82
+
83
+ try:
84
+ seed = int(seed)
85
+ except ValueError:
86
+ seed = random.randint(1, 10000)
87
+
88
+ nb_tokens = 1024
89
+ temperature = 0.9
90
+ top_p = 1.0
91
+
92
+ if use_chords and chord_progression.strip():
93
+ score = ml.predict_chords(
94
+ chord_progression,
95
+ time_signature=(4, 4),
96
+ temperature=temperature,
97
+ topp=top_p,
98
+ rng_seed=seed
99
+ )
100
+ else:
101
+ score = ml.predict(
102
+ nb_tokens=nb_tokens,
103
+ temperature=temperature,
104
+ topp=top_p,
105
+ rng_seed=seed
106
+ )
107
+
108
+ midi_filename = f"output_{seed}.mid"
109
+ wav_filename = midi_filename.replace(".mid", ".wav")
110
+
111
+ score.to_midi(midi_filename, tempo=bpm, time_signature=(4, 4))
112
+
113
+ subprocess.run(["fluidsynth", "-ni", "font.sf2", midi_filename, "-F", wav_filename, "-r", "44100"])
114
+
115
+ # Load the generated audio
116
+ song, sr = torchaudio.load(wav_filename)
117
+ song = song.to(device)
118
+
119
+ # Use the user-provided BPM value for duration calculation
120
+ duration = calculate_duration(bpm)
121
+
122
+ # Create slices from the song using the user-provided BPM value
123
+ slices = create_slices(song, sr, 35, bpm, num_slices=5)
124
+
125
+ # Load the model
126
+ model_name = musicgen_model.split(" ")[0]
127
+ model_continue = MusicGen.get_pretrained(model_name)
128
+
129
+ # Setting generation parameters
130
+ model_continue.set_generation_params(
131
+ use_sampling=True,
132
+ top_k=250,
133
+ top_p=0.0,
134
+ temperature=1.0,
135
+ duration=duration,
136
+ cfg_coef=3
137
+ )
138
+
139
+ all_audio_files = []
140
+
141
+ for i in range(num_iterations):
142
+ slice_idx = i % len(slices)
143
+
144
+ print(f"Running iteration {i + 1} using slice {slice_idx}...")
145
+
146
+ prompt_waveform = slices[slice_idx][..., :int(prompt_duration * sr)]
147
+ prompt_waveform = preprocess_audio(prompt_waveform)
148
+
149
+ output = model_continue.generate_continuation(prompt_waveform, prompt_sample_rate=sr, progress=True)
150
+ output = output.cpu() # Move the output tensor back to CPU
151
+
152
+ # Make sure the output tensor has at most 2 dimensions
153
+ if len(output.size()) > 2:
154
+ output = output.squeeze()
155
+
156
+ filename_without_extension = f'continue_{i}'
157
+ filename_with_extension = f'{filename_without_extension}.wav'
158
+
159
+ audio_write(filename_with_extension, output, model_continue.sample_rate, strategy="loudness", loudness_compressor=True)
160
+ all_audio_files.append(f'{filename_without_extension}.wav.wav') # Assuming the library appends an extra .wav
161
+
162
+ # Combine all audio files
163
+ combined_audio = AudioSegment.empty()
164
+ for filename in all_audio_files:
165
+ combined_audio += AudioSegment.from_wav(filename)
166
+
167
+ combined_audio_filename = f"combined_audio_{seed}.mp3"
168
+ combined_audio.export(combined_audio_filename, format="mp3")
169
+
170
+ # Clean up temporary files
171
+ os.remove(midi_filename)
172
+ os.remove(wav_filename)
173
+ for filename in all_audio_files:
174
+ os.remove(filename)
175
+
176
+ return combined_audio_filename
177
+ except IndexError:
178
+ # Retry with a new random seed if an IndexError is raised
179
+ seed = random.randint(1, 10000)
180
+
181
+ # Check if CUDA is available
182
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
183
+
184
+ # Define the expandable sections
185
+ musiclang_blurb = """
186
+ ## musiclang
187
+ musiclang is a controllable ai midi model. it can generate midi sequences based on user-provided parameters, or unconditionally.
188
+ [<img src="https://github.githubassets.com/images/modules/logos_page/GitHub-Mark.png" alt="GitHub" width="20" style="vertical-align:middle"> musiclang github](https://github.com/MusicLang/musiclang_predict)
189
+ [<img src="https://huggingface.co/front/assets/huggingface_logo-noborder.svg" alt="Hugging Face" width="20" style="vertical-align:middle"> musiclang huggingface space](https://huggingface.co/spaces/musiclang/musiclang-predict)
190
+ """
191
+
192
+ musicgen_blurb = """
193
+ ## musicgen
194
+ musicgen is a transformer-based music model that generates audio. It can also do something called a continuation, which was initially meant to extend musicgen outputs beyond 30 seconds. it can be used with any input audio to produce surprising results.
195
+ [<img src="https://github.githubassets.com/images/modules/logos_page/GitHub-Mark.png" alt="GitHub" width="20" style="vertical-align:middle"> audiocraft github](https://github.com/facebookresearch/audiocraft)
196
+ visit https://thecollabagepatch.com/infinitepolo.mp3 or https://thecollabagepatch.com/audiocraft.mp3 to hear continuations in action.
197
+ see also https://youtube.com/@thecollabagepatch
198
+ """
199
+
200
+ finetunes_blurb = """
201
+ ## fine-tuned models
202
+ the fine-tunes hosted on the huggingface hub are provided collectively by the musicgen discord community. thanks to vanya, mj, hoenn, septicDNB and of course, lyra.
203
+ [<img src="https://cdn.iconscout.com/icon/free/png-256/discord-3691244-3073764.png" alt="Discord" width="20" style="vertical-align:middle"> musicgen discord](https://discord.gg/93kX8rGZ)
204
+ [<img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab" style="vertical-align:middle"> fine-tuning colab notebook by lyra](https://colab.research.google.com/drive/13tbcC3A42KlaUZ21qvUXd25SFLu8WIvb)
205
+ """
206
+
207
+ # Create the Gradio interface
208
+ with gr.Blocks() as iface:
209
+ gr.Markdown("# the-slot-machine")
210
+ gr.Markdown("two ai's jamming. warning: outputs will be very strange, likely stupid, and possibly rad.")
211
+ gr.Markdown("this is a musical slot machine. using musiclang, we get a midi output. then, we let a musicgen model continue, semi-randomly, from different sections of the midi track. the slot machine combines em all at the end into something very bizarre. pick a number for the seed between 1 and 10k, or leave it blank to unlock the full rnjesus powers. if you wanna be lame, you can control the chord progression, prompt duration, musicgen model, number of iterations, and BPM.")
212
+
213
+ with gr.Accordion("more info", open=False):
214
+ gr.Markdown(musiclang_blurb)
215
+ gr.Markdown(musicgen_blurb)
216
+ gr.Markdown(finetunes_blurb)
217
+
218
+ with gr.Row():
219
+ with gr.Column():
220
+ seed = gr.Textbox(label="seed (leave blank for random)", value="")
221
+ use_chords = gr.Checkbox(label="control chord progression", value=False)
222
+ chord_progression = gr.Textbox(label="chord progression (e.g., Am CM Dm E7 Am)", visible=True)
223
+ prompt_duration = gr.Dropdown(label="prompt duration (seconds)", choices=list(range(1, 11)), value=7)
224
+ musicgen_models = [
225
+ "thepatch/vanya_ai_dnb_0.1 (small)",
226
+ "thepatch/budots_remix (small)",
227
+ "thepatch/PhonkV2 (small)",
228
+ "thepatch/bleeps-medium (medium)",
229
+ "thepatch/hoenn_lofi (large)"
230
+ ]
231
+
232
+ musicgen_model = gr.Dropdown(label="musicGen model", choices=musicgen_models, value=musicgen_models[0])
233
+ num_iterations = gr.Slider(label="number of iterations", minimum=1, maximum=10, step=1, value=3)
234
+ bpm = gr.Slider(label="BPM", minimum=60, maximum=200, step=1, value=140)
235
+ generate_button = gr.Button("generate music")
236
+ with gr.Column():
237
+ output_audio = gr.Audio(label="your track")
238
+
239
+ generate_button.click(generate_music, inputs=[seed, use_chords, chord_progression, prompt_duration, musicgen_model, num_iterations, bpm], outputs=output_audio)
240
+
241
+ iface.launch()