thecollabagepatch commited on
Commit
8bdf8d9
1 Parent(s): 32f56a6

breaking up functions

Browse files
Files changed (1) hide show
  1. app.py +119 -114
app.py CHANGED
@@ -11,6 +11,8 @@ from audiocraft.data.audio import audio_write
11
  from pydub import AudioSegment
12
  import spaces
13
 
 
 
14
 
15
  # Utility Functions
16
  def peak_normalize(y, target_peak=0.97):
@@ -72,115 +74,115 @@ def calculate_duration(bpm, min_duration=29, max_duration=30):
72
 
73
  return duration
74
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  @spaces.GPU(duration=120)
76
- def generate_music(seed, use_chords, chord_progression, prompt_duration, musicgen_model, num_iterations, bpm):
77
- while True:
78
- try:
79
- if seed == "":
80
- seed = random.randint(1, 10000)
81
-
82
- ml = MusicLangPredictor('musiclang/musiclang-v2')
83
-
84
- try:
85
- seed = int(seed)
86
- except ValueError:
87
- seed = random.randint(1, 10000)
88
-
89
- nb_tokens = 1024
90
- temperature = 0.9
91
- top_p = 1.0
92
-
93
- if use_chords and chord_progression.strip():
94
- score = ml.predict_chords(
95
- chord_progression,
96
- time_signature=(4, 4),
97
- temperature=temperature,
98
- topp=top_p,
99
- rng_seed=seed
100
- )
101
- else:
102
- score = ml.predict(
103
- nb_tokens=nb_tokens,
104
- temperature=temperature,
105
- topp=top_p,
106
- rng_seed=seed
107
- )
108
-
109
- midi_filename = f"output_{seed}.mid"
110
- wav_filename = midi_filename.replace(".mid", ".wav")
111
-
112
- score.to_midi(midi_filename, tempo=bpm, time_signature=(4, 4))
113
-
114
- subprocess.run(["fluidsynth", "-ni", "font.sf2", midi_filename, "-F", wav_filename, "-r", "44100"])
115
-
116
- # Load the generated audio
117
- song, sr = torchaudio.load(wav_filename)
118
- song = song.to(device)
119
-
120
- # Use the user-provided BPM value for duration calculation
121
- duration = calculate_duration(bpm)
122
-
123
- # Create slices from the song using the user-provided BPM value
124
- slices = create_slices(song, sr, 35, bpm, num_slices=5)
125
-
126
- # Load the model
127
- model_name = musicgen_model.split(" ")[0]
128
- model_continue = MusicGen.get_pretrained(model_name)
129
-
130
- # Setting generation parameters
131
- model_continue.set_generation_params(
132
- use_sampling=True,
133
- top_k=250,
134
- top_p=0.0,
135
- temperature=1.0,
136
- duration=duration,
137
- cfg_coef=3
138
- )
139
-
140
- all_audio_files = []
141
-
142
- for i in range(num_iterations):
143
- slice_idx = i % len(slices)
144
-
145
- print(f"Running iteration {i + 1} using slice {slice_idx}...")
146
-
147
- prompt_waveform = slices[slice_idx][..., :int(prompt_duration * sr)]
148
- prompt_waveform = preprocess_audio(prompt_waveform)
149
-
150
- output = model_continue.generate_continuation(prompt_waveform, prompt_sample_rate=sr, progress=True)
151
- output = output.cpu() # Move the output tensor back to CPU
152
-
153
- # Make sure the output tensor has at most 2 dimensions
154
- if len(output.size()) > 2:
155
- output = output.squeeze()
156
-
157
- filename_without_extension = f'continue_{i}'
158
- filename_with_extension = f'{filename_without_extension}.wav'
159
-
160
- audio_write(filename_with_extension, output, model_continue.sample_rate, strategy="loudness", loudness_compressor=True)
161
- all_audio_files.append(f'{filename_without_extension}.wav.wav') # Assuming the library appends an extra .wav
162
-
163
- # Combine all audio files
164
- combined_audio = AudioSegment.empty()
165
- for filename in all_audio_files:
166
- combined_audio += AudioSegment.from_wav(filename)
167
-
168
- combined_audio_filename = f"combined_audio_{seed}.mp3"
169
- combined_audio.export(combined_audio_filename, format="mp3")
170
-
171
- # Clean up temporary files
172
- os.remove(midi_filename)
173
- os.remove(wav_filename)
174
- for filename in all_audio_files:
175
- os.remove(filename)
176
-
177
- return combined_audio_filename
178
- except IndexError:
179
- # Retry with a new random seed if an IndexError is raised
180
- seed = random.randint(1, 10000)
181
 
182
- # Check if CUDA is available
183
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
184
 
185
  # Define the expandable sections
186
  musiclang_blurb = """
@@ -221,6 +223,11 @@ with gr.Blocks() as iface:
221
  seed = gr.Textbox(label="seed (leave blank for random)", value="")
222
  use_chords = gr.Checkbox(label="control chord progression", value=False)
223
  chord_progression = gr.Textbox(label="chord progression (e.g., Am CM Dm E7 Am)", visible=True)
 
 
 
 
 
224
  prompt_duration = gr.Dropdown(label="prompt duration (seconds)", choices=list(range(1, 11)), value=7)
225
  musicgen_models = [
226
  "thepatch/vanya_ai_dnb_0.1 (small)",
@@ -229,14 +236,12 @@ with gr.Blocks() as iface:
229
  "thepatch/bleeps-medium (medium)",
230
  "thepatch/hoenn_lofi (large)"
231
  ]
232
-
233
  musicgen_model = gr.Dropdown(label="musicGen model", choices=musicgen_models, value=musicgen_models[0])
234
- num_iterations = gr.Slider(label="number of iterations", minimum=1, maximum=10, step=1, value=3)
235
- bpm = gr.Slider(label="BPM", minimum=60, maximum=200, step=1, value=140)
236
- generate_button = gr.Button("generate music")
237
- with gr.Column():
238
- output_audio = gr.Audio(label="your track")
239
 
240
- generate_button.click(generate_music, inputs=[seed, use_chords, chord_progression, prompt_duration, musicgen_model, num_iterations, bpm], outputs=output_audio)
 
241
 
242
  iface.launch()
 
11
  from pydub import AudioSegment
12
  import spaces
13
 
14
+ # Check if CUDA is available
15
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
16
 
17
  # Utility Functions
18
  def peak_normalize(y, target_peak=0.97):
 
74
 
75
  return duration
76
 
77
+ @spaces.GPU(duration=60)
78
+ def generate_midi(seed, use_chords, chord_progression, bpm):
79
+ if seed == "":
80
+ seed = random.randint(1, 10000)
81
+
82
+ ml = MusicLangPredictor('musiclang/musiclang-v2')
83
+
84
+ try:
85
+ seed = int(seed)
86
+ except ValueError:
87
+ seed = random.randint(1, 10000)
88
+
89
+ nb_tokens = 1024
90
+ temperature = 0.9
91
+ top_p = 1.0
92
+
93
+ if use_chords and chord_progression.strip():
94
+ score = ml.predict_chords(
95
+ chord_progression,
96
+ time_signature=(4, 4),
97
+ temperature=temperature,
98
+ topp=top_p,
99
+ rng_seed=seed
100
+ )
101
+ else:
102
+ score = ml.predict(
103
+ nb_tokens=nb_tokens,
104
+ temperature=temperature,
105
+ topp=top_p,
106
+ rng_seed=seed
107
+ )
108
+
109
+ midi_filename = f"output_{seed}.mid"
110
+ wav_filename = midi_filename.replace(".mid", ".wav")
111
+
112
+ score.to_midi(midi_filename, tempo=bpm, time_signature=(4, 4))
113
+
114
+ subprocess.run(["fluidsynth", "-ni", "font.sf2", midi_filename, "-F", wav_filename, "-r", "44100"])
115
+
116
+ # Clean up temporary MIDI file
117
+ os.remove(midi_filename)
118
+
119
+ return wav_filename
120
+
121
  @spaces.GPU(duration=120)
122
+ def generate_music(wav_filename, prompt_duration, musicgen_model, num_iterations, bpm):
123
+ # Load the generated audio
124
+ song, sr = torchaudio.load(wav_filename)
125
+ song = song.to(device)
126
+
127
+ # Use the user-provided BPM value for duration calculation
128
+ duration = calculate_duration(bpm)
129
+
130
+ # Create slices from the song using the user-provided BPM value
131
+ slices = create_slices(song, sr, 35, bpm, num_slices=5)
132
+
133
+ # Load the model
134
+ model_name = musicgen_model.split(" ")[0]
135
+ model_continue = MusicGen.get_pretrained(model_name)
136
+
137
+ # Setting generation parameters
138
+ model_continue.set_generation_params(
139
+ use_sampling=True,
140
+ top_k=250,
141
+ top_p=0.0,
142
+ temperature=1.0,
143
+ duration=duration,
144
+ cfg_coef=3
145
+ )
146
+
147
+ all_audio_files = []
148
+
149
+ for i in range(num_iterations):
150
+ slice_idx = i % len(slices)
151
+
152
+ print(f"Running iteration {i + 1} using slice {slice_idx}...")
153
+
154
+ prompt_waveform = slices[slice_idx][..., :int(prompt_duration * sr)]
155
+ prompt_waveform = preprocess_audio(prompt_waveform)
156
+
157
+ output = model_continue.generate_continuation(prompt_waveform, prompt_sample_rate=sr, progress=True)
158
+ output = output.cpu() # Move the output tensor back to CPU
159
+
160
+ # Make sure the output tensor has at most 2 dimensions
161
+ if len(output.size()) > 2:
162
+ output = output.squeeze()
163
+
164
+ filename_without_extension = f'continue_{i}'
165
+ filename_with_extension = f'{filename_without_extension}.wav'
166
+
167
+ audio_write(filename_with_extension, output, model_continue.sample_rate, strategy="loudness", loudness_compressor=True)
168
+ all_audio_files.append(f'{filename_without_extension}.wav.wav') # Assuming the library appends an extra .wav
169
+
170
+ # Combine all audio files
171
+ combined_audio = AudioSegment.empty()
172
+ for filename in all_audio_files:
173
+ combined_audio += AudioSegment.from_wav(filename)
174
+
175
+ combined_audio_filename = f"combined_audio_{random.randint(1, 10000)}.mp3"
176
+ combined_audio.export(combined_audio_filename, format="mp3")
177
+
178
+ # Clean up temporary files
179
+ os.remove(wav_filename)
180
+ for filename in all_audio_files:
181
+ os.remove(filename)
182
+
183
+ return combined_audio_filename
184
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
185
 
 
 
186
 
187
  # Define the expandable sections
188
  musiclang_blurb = """
 
223
  seed = gr.Textbox(label="seed (leave blank for random)", value="")
224
  use_chords = gr.Checkbox(label="control chord progression", value=False)
225
  chord_progression = gr.Textbox(label="chord progression (e.g., Am CM Dm E7 Am)", visible=True)
226
+ bpm = gr.Slider(label="BPM", minimum=60, maximum=200, step=1, value=110)
227
+ generate_midi_button = gr.Button("Generate MIDI")
228
+ midi_audio = gr.Audio(label="Generated MIDI Audio")
229
+
230
+ with gr.Column():
231
  prompt_duration = gr.Dropdown(label="prompt duration (seconds)", choices=list(range(1, 11)), value=7)
232
  musicgen_models = [
233
  "thepatch/vanya_ai_dnb_0.1 (small)",
 
236
  "thepatch/bleeps-medium (medium)",
237
  "thepatch/hoenn_lofi (large)"
238
  ]
 
239
  musicgen_model = gr.Dropdown(label="musicGen model", choices=musicgen_models, value=musicgen_models[0])
240
+ num_iterations = gr.Slider(label="number of iterations", minimum=1, maximum=3, step=1, value=3)
241
+ generate_music_button = gr.Button("Generate Music")
242
+ output_audio = gr.Audio(label="Generated Music")
 
 
243
 
244
+ generate_midi_button.click(generate_midi, inputs=[seed, use_chords, chord_progression, bpm], outputs=midi_audio)
245
+ generate_music_button.click(generate_music, inputs=[midi_audio, prompt_duration, musicgen_model, num_iterations, bpm], outputs=output_audio)
246
 
247
  iface.launch()