Spaces:
Running
Running
modify app
Browse files
app.py
CHANGED
@@ -7,6 +7,10 @@ from inference import MasteringStyleTransfer
|
|
7 |
from utils import download_youtube_audio
|
8 |
from config import args
|
9 |
import pyloudnorm as pyln
|
|
|
|
|
|
|
|
|
10 |
|
11 |
mastering_transfer = MasteringStyleTransfer(args)
|
12 |
|
@@ -87,35 +91,46 @@ def perform_ito(input_audio, reference_audio, ito_reference_audio, num_steps, op
|
|
87 |
initial_reference_feature = mastering_transfer.get_reference_embedding(reference_tensor)
|
88 |
|
89 |
ito_log = ""
|
90 |
-
|
|
|
91 |
input_tensor, ito_reference_tensor, ito_config, initial_reference_feature
|
92 |
):
|
93 |
ito_log += log_entry
|
94 |
ito_param_output = mastering_transfer.get_param_output_string(current_params)
|
|
|
95 |
|
96 |
# Convert current_output to numpy array if it's a tensor
|
97 |
if isinstance(current_output, torch.Tensor):
|
98 |
-
current_output = current_output.
|
99 |
|
100 |
-
#
|
101 |
-
|
102 |
|
103 |
# Denormalize the audio to int16
|
104 |
current_output = denormalize_audio(current_output, dtype=np.int16)
|
105 |
|
|
|
106 |
if current_output.ndim == 1:
|
107 |
current_output = current_output.reshape(-1, 1)
|
108 |
elif current_output.ndim > 2:
|
109 |
current_output = current_output.squeeze()
|
|
|
|
|
110 |
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
|
|
|
|
|
|
|
|
|
|
118 |
|
|
|
119 |
with gr.Blocks() as demo:
|
120 |
gr.Markdown("# Mastering Style Transfer Demo")
|
121 |
|
@@ -151,28 +166,12 @@ with gr.Blocks() as demo:
|
|
151 |
with gr.Row():
|
152 |
with gr.Column():
|
153 |
ito_output_audio = gr.Audio(label="ITO Output Audio")
|
154 |
-
ito_param_output = gr.Textbox(label="ITO Predicted Parameters", lines=
|
155 |
with gr.Column():
|
156 |
ito_steps_taken = gr.Number(label="ITO Steps Taken")
|
|
|
157 |
ito_log = gr.Textbox(label="ITO Log", lines=10)
|
158 |
|
159 |
-
# with gr.Row():
|
160 |
-
# with gr.Column(scale=2):
|
161 |
-
# ito_reference_audio = gr.Audio(label="ITO Reference Audio (optional)")
|
162 |
-
# num_steps = gr.Slider(minimum=1, maximum=100, value=10, step=1, label="Number of Steps")
|
163 |
-
# optimizer = gr.Dropdown(["Adam", "RAdam", "SGD"], value="RAdam", label="Optimizer")
|
164 |
-
# learning_rate = gr.Slider(minimum=0.0001, maximum=0.1, value=0.001, step=0.0001, label="Learning Rate")
|
165 |
-
# af_weights = gr.Textbox(label="AudioFeatureLoss Weights (comma-separated)", value="0.1,0.001,1.0,1.0,0.1")
|
166 |
-
|
167 |
-
# ito_button = gr.Button("Perform ITO")
|
168 |
-
|
169 |
-
# ito_output_audio = gr.Audio(label="ITO Output Audio")
|
170 |
-
# ito_param_output = gr.Textbox(label="ITO Predicted Parameters", lines=10)
|
171 |
-
# ito_steps_taken = gr.Number(label="ITO Steps Taken")
|
172 |
-
|
173 |
-
# with gr.Column(scale=1):
|
174 |
-
# ito_log = gr.Textbox(label="ITO Log", lines=30)
|
175 |
-
|
176 |
def run_ito(input_audio, reference_audio, ito_reference_audio, num_steps, optimizer, learning_rate, af_weights):
|
177 |
af_weights = [float(w.strip()) for w in af_weights.split(',')]
|
178 |
ito_generator = perform_ito(
|
@@ -186,134 +185,22 @@ with gr.Blocks() as demo:
|
|
186 |
final_log = ""
|
187 |
|
188 |
# Iterate through the generator to get the final results
|
189 |
-
for audio, params, steps, log in ito_generator:
|
190 |
final_audio = audio
|
191 |
final_params = params
|
192 |
final_steps = steps
|
193 |
final_log = log
|
|
|
|
|
|
|
194 |
|
195 |
-
return final_audio, final_params, final_steps, final_log
|
196 |
|
197 |
ito_button.click(
|
198 |
run_ito,
|
199 |
inputs=[input_audio, reference_audio, ito_reference_audio, num_steps, optimizer, learning_rate, af_weights],
|
200 |
-
outputs=[ito_output_audio, ito_param_output, ito_steps_taken, ito_log]
|
201 |
)
|
202 |
|
203 |
demo.launch()
|
204 |
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
# import gradio as gr
|
209 |
-
# import torch
|
210 |
-
# import soundfile as sf
|
211 |
-
# import numpy as np
|
212 |
-
# import yaml
|
213 |
-
# from inference import MasteringStyleTransfer
|
214 |
-
# from utils import download_youtube_audio
|
215 |
-
# from config import args
|
216 |
-
|
217 |
-
# mastering_transfer = MasteringStyleTransfer(args)
|
218 |
-
|
219 |
-
# def process_audio(input_audio, reference_audio, perform_ito, ito_reference_audio=None):
|
220 |
-
# # Process the audio files
|
221 |
-
# output_audio, predicted_params, ito_output_audio, ito_predicted_params, ito_log, sr = mastering_transfer.process_audio(
|
222 |
-
# input_audio, reference_audio, ito_reference_audio if ito_reference_audio else reference_audio, {}, perform_ito
|
223 |
-
# )
|
224 |
-
|
225 |
-
# # Generate parameter output strings
|
226 |
-
# param_output = mastering_transfer.get_param_output_string(predicted_params)
|
227 |
-
# ito_param_output = mastering_transfer.get_param_output_string(ito_predicted_params) if ito_predicted_params is not None else "ITO not performed"
|
228 |
-
|
229 |
-
# # Generate top 10 differences if ITO was performed
|
230 |
-
# top_10_diff = mastering_transfer.get_top_10_diff_string(predicted_params, ito_predicted_params) if ito_predicted_params is not None else "ITO not performed"
|
231 |
-
|
232 |
-
# return "output_mastered.wav", "ito_output_mastered.wav" if ito_output_audio is not None else None, param_output, ito_param_output, top_10_diff, ito_log
|
233 |
-
|
234 |
-
# def process_with_ito(input_audio, reference_audio, perform_ito, use_same_reference, ito_reference_audio):
|
235 |
-
# ito_ref = reference_audio if use_same_reference else ito_reference_audio
|
236 |
-
# return process_audio(input_audio, reference_audio, perform_ito, ito_ref)
|
237 |
-
|
238 |
-
# def process_youtube_with_ito(input_url, reference_url, perform_ito, use_same_reference, ito_reference_url):
|
239 |
-
# input_audio = download_youtube_audio(input_url)
|
240 |
-
# reference_audio = download_youtube_audio(reference_url)
|
241 |
-
# ito_ref = reference_audio if use_same_reference else download_youtube_audio(ito_reference_url)
|
242 |
-
|
243 |
-
# output_audio, predicted_params, ito_output_audio, ito_predicted_params, ito_log, sr = mastering_transfer.process_audio(
|
244 |
-
# input_audio, reference_audio, ito_ref, {}, perform_ito, log_ito=True
|
245 |
-
# )
|
246 |
-
|
247 |
-
# param_output = mastering_transfer.get_param_output_string(predicted_params)
|
248 |
-
# ito_param_output = mastering_transfer.get_param_output_string(ito_predicted_params) if ito_predicted_params is not None else "ITO not performed"
|
249 |
-
# top_10_diff = mastering_transfer.get_top_10_diff_string(predicted_params, ito_predicted_params) if ito_predicted_params is not None else "ITO not performed"
|
250 |
-
|
251 |
-
# return "output_mastered_yt.wav", "ito_output_mastered_yt.wav" if ito_output_audio is not None else None, param_output, ito_param_output, top_10_diff, ito_log
|
252 |
-
|
253 |
-
|
254 |
-
# with gr.Blocks() as demo:
|
255 |
-
# gr.Markdown("# Mastering Style Transfer Demo")
|
256 |
-
|
257 |
-
# with gr.Tab("Upload Audio"):
|
258 |
-
# input_audio = gr.Audio(label="Input Audio")
|
259 |
-
# reference_audio = gr.Audio(label="Reference Audio")
|
260 |
-
# perform_ito = gr.Checkbox(label="Perform ITO")
|
261 |
-
# with gr.Column(visible=False) as ito_options:
|
262 |
-
# use_same_reference = gr.Checkbox(label="Use same reference audio for ITO", value=True)
|
263 |
-
# ito_reference_audio = gr.Audio(label="ITO Reference Audio", visible=False)
|
264 |
-
|
265 |
-
# def update_ito_options(perform_ito):
|
266 |
-
# return gr.Column.update(visible=perform_ito)
|
267 |
-
|
268 |
-
# def update_ito_reference(use_same):
|
269 |
-
# return gr.Audio.update(visible=not use_same)
|
270 |
-
|
271 |
-
# perform_ito.change(fn=update_ito_options, inputs=perform_ito, outputs=ito_options)
|
272 |
-
# use_same_reference.change(fn=update_ito_reference, inputs=use_same_reference, outputs=ito_reference_audio)
|
273 |
-
|
274 |
-
# submit_button = gr.Button("Process")
|
275 |
-
# output_audio = gr.Audio(label="Output Audio")
|
276 |
-
# ito_output_audio = gr.Audio(label="ITO Output Audio")
|
277 |
-
# param_output = gr.Textbox(label="Predicted Parameters", lines=10)
|
278 |
-
# ito_param_output = gr.Textbox(label="ITO Predicted Parameters", lines=10)
|
279 |
-
# top_10_diff = gr.Textbox(label="Top 10 Parameter Differences", lines=10)
|
280 |
-
# ito_log = gr.Textbox(label="ITO Log", lines=20)
|
281 |
-
|
282 |
-
# submit_button.click(
|
283 |
-
# process_with_ito,
|
284 |
-
# inputs=[input_audio, reference_audio, perform_ito, use_same_reference, ito_reference_audio],
|
285 |
-
# outputs=[output_audio, ito_output_audio, param_output, ito_param_output, top_10_diff, ito_log]
|
286 |
-
# )
|
287 |
-
|
288 |
-
# with gr.Tab("YouTube URLs"):
|
289 |
-
# input_url = gr.Textbox(label="Input YouTube URL")
|
290 |
-
# reference_url = gr.Textbox(label="Reference YouTube URL")
|
291 |
-
# perform_ito_yt = gr.Checkbox(label="Perform ITO")
|
292 |
-
# with gr.Column(visible=False) as ito_options_yt:
|
293 |
-
# use_same_reference_yt = gr.Checkbox(label="Use same reference audio for ITO", value=True)
|
294 |
-
# ito_reference_url = gr.Textbox(label="ITO Reference YouTube URL", visible=False)
|
295 |
-
|
296 |
-
# def update_ito_options_yt(perform_ito):
|
297 |
-
# return gr.Column.update(visible=perform_ito)
|
298 |
-
|
299 |
-
# def update_ito_reference_yt(use_same):
|
300 |
-
# return gr.Textbox.update(visible=not use_same)
|
301 |
-
|
302 |
-
# perform_ito_yt.change(fn=update_ito_options_yt, inputs=perform_ito_yt, outputs=ito_options_yt)
|
303 |
-
# use_same_reference_yt.change(fn=update_ito_reference_yt, inputs=use_same_reference_yt, outputs=ito_reference_url)
|
304 |
-
|
305 |
-
# submit_button_yt = gr.Button("Process")
|
306 |
-
# output_audio_yt = gr.Audio(label="Output Audio")
|
307 |
-
# ito_output_audio_yt = gr.Audio(label="ITO Output Audio")
|
308 |
-
# param_output_yt = gr.Textbox(label="Predicted Parameters", lines=10)
|
309 |
-
# ito_param_output_yt = gr.Textbox(label="ITO Predicted Parameters", lines=10)
|
310 |
-
# top_10_diff_yt = gr.Textbox(label="Top 10 Parameter Differences", lines=10)
|
311 |
-
# ito_log_yt = gr.Textbox(label="ITO Log", lines=20)
|
312 |
-
|
313 |
-
# submit_button_yt.click(
|
314 |
-
# process_youtube_with_ito,
|
315 |
-
# inputs=[input_url, reference_url, perform_ito_yt, use_same_reference_yt, ito_reference_url],
|
316 |
-
# outputs=[output_audio_yt, ito_output_audio_yt, param_output_yt, ito_param_output_yt, top_10_diff_yt, ito_log_yt]
|
317 |
-
# )
|
318 |
-
|
319 |
-
# demo.launch()
|
|
|
7 |
from utils import download_youtube_audio
|
8 |
from config import args
|
9 |
import pyloudnorm as pyln
|
10 |
+
import tempfile
|
11 |
+
import os
|
12 |
+
import matplotlib.pyplot as plt
|
13 |
+
import io
|
14 |
|
15 |
mastering_transfer = MasteringStyleTransfer(args)
|
16 |
|
|
|
91 |
initial_reference_feature = mastering_transfer.get_reference_embedding(reference_tensor)
|
92 |
|
93 |
ito_log = ""
|
94 |
+
loss_values = []
|
95 |
+
for log_entry, current_output, current_params, step, loss in mastering_transfer.inference_time_optimization(
|
96 |
input_tensor, ito_reference_tensor, ito_config, initial_reference_feature
|
97 |
):
|
98 |
ito_log += log_entry
|
99 |
ito_param_output = mastering_transfer.get_param_output_string(current_params)
|
100 |
+
loss_values.append(loss)
|
101 |
|
102 |
# Convert current_output to numpy array if it's a tensor
|
103 |
if isinstance(current_output, torch.Tensor):
|
104 |
+
current_output = current_output.cpu().numpy()
|
105 |
|
106 |
+
# Normalize output audio
|
107 |
+
current_output = loudness_normalize(current_output, args.sample_rate)
|
108 |
|
109 |
# Denormalize the audio to int16
|
110 |
current_output = denormalize_audio(current_output, dtype=np.int16)
|
111 |
|
112 |
+
# Ensure the audio is in the correct shape (samples, channels)
|
113 |
if current_output.ndim == 1:
|
114 |
current_output = current_output.reshape(-1, 1)
|
115 |
elif current_output.ndim > 2:
|
116 |
current_output = current_output.squeeze()
|
117 |
+
|
118 |
+
yield (args.sample_rate, current_output), ito_param_output, step, ito_log, loss_values
|
119 |
|
120 |
+
def plot_loss_curve(loss_values):
|
121 |
+
plt.figure(figsize=(10, 6))
|
122 |
+
plt.plot(loss_values)
|
123 |
+
plt.title('ITO Loss Curve')
|
124 |
+
plt.xlabel('Step')
|
125 |
+
plt.ylabel('Loss')
|
126 |
+
plt.grid(True)
|
127 |
+
|
128 |
+
buf = io.BytesIO()
|
129 |
+
plt.savefig(buf, format='png')
|
130 |
+
buf.seek(0)
|
131 |
+
return buf
|
132 |
|
133 |
+
""" APP display """
|
134 |
with gr.Blocks() as demo:
|
135 |
gr.Markdown("# Mastering Style Transfer Demo")
|
136 |
|
|
|
166 |
with gr.Row():
|
167 |
with gr.Column():
|
168 |
ito_output_audio = gr.Audio(label="ITO Output Audio")
|
169 |
+
ito_param_output = gr.Textbox(label="ITO Predicted Parameters", lines=15)
|
170 |
with gr.Column():
|
171 |
ito_steps_taken = gr.Number(label="ITO Steps Taken")
|
172 |
+
ito_loss_plot = gr.Image(label="ITO Loss Curve")
|
173 |
ito_log = gr.Textbox(label="ITO Log", lines=10)
|
174 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
175 |
def run_ito(input_audio, reference_audio, ito_reference_audio, num_steps, optimizer, learning_rate, af_weights):
|
176 |
af_weights = [float(w.strip()) for w in af_weights.split(',')]
|
177 |
ito_generator = perform_ito(
|
|
|
185 |
final_log = ""
|
186 |
|
187 |
# Iterate through the generator to get the final results
|
188 |
+
for audio, params, steps, log, losses in ito_generator:
|
189 |
final_audio = audio
|
190 |
final_params = params
|
191 |
final_steps = steps
|
192 |
final_log = log
|
193 |
+
loss_values = losses
|
194 |
+
|
195 |
+
loss_plot = plot_loss_curve(loss_values)
|
196 |
|
197 |
+
return final_audio, final_params, final_steps, final_log, loss_plot
|
198 |
|
199 |
ito_button.click(
|
200 |
run_ito,
|
201 |
inputs=[input_audio, reference_audio, ito_reference_audio, num_steps, optimizer, learning_rate, af_weights],
|
202 |
+
outputs=[ito_output_audio, ito_param_output, ito_steps_taken, ito_log, ito_loss_plot]
|
203 |
)
|
204 |
|
205 |
demo.launch()
|
206 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|