jhtonyKoo commited on
Commit
76df10e
1 Parent(s): e3c9443

modify app

Browse files
Files changed (1) hide show
  1. app.py +34 -147
app.py CHANGED
@@ -7,6 +7,10 @@ from inference import MasteringStyleTransfer
7
  from utils import download_youtube_audio
8
  from config import args
9
  import pyloudnorm as pyln
 
 
 
 
10
 
11
  mastering_transfer = MasteringStyleTransfer(args)
12
 
@@ -87,35 +91,46 @@ def perform_ito(input_audio, reference_audio, ito_reference_audio, num_steps, op
87
  initial_reference_feature = mastering_transfer.get_reference_embedding(reference_tensor)
88
 
89
  ito_log = ""
90
- for log_entry, current_output, current_params, step in mastering_transfer.inference_time_optimization(
 
91
  input_tensor, ito_reference_tensor, ito_config, initial_reference_feature
92
  ):
93
  ito_log += log_entry
94
  ito_param_output = mastering_transfer.get_param_output_string(current_params)
 
95
 
96
  # Convert current_output to numpy array if it's a tensor
97
  if isinstance(current_output, torch.Tensor):
98
- current_output = current_output.detach().cpu().numpy()
99
 
100
- # # Normalize output audio
101
- # current_output = loudness_normalize(current_output, args.sample_rate)
102
 
103
  # Denormalize the audio to int16
104
  current_output = denormalize_audio(current_output, dtype=np.int16)
105
 
 
106
  if current_output.ndim == 1:
107
  current_output = current_output.reshape(-1, 1)
108
  elif current_output.ndim > 2:
109
  current_output = current_output.squeeze()
 
 
110
 
111
- # Ensure the audio is in the correct shape (samples, channels)
112
- if current_output.shape[1] > current_output.shape[0]:
113
- current_output = current_output.transpose(1,0)
114
-
115
- yield (args.sample_rate, current_output), ito_param_output, step, ito_log
116
-
117
-
 
 
 
 
 
118
 
 
119
  with gr.Blocks() as demo:
120
  gr.Markdown("# Mastering Style Transfer Demo")
121
 
@@ -151,28 +166,12 @@ with gr.Blocks() as demo:
151
  with gr.Row():
152
  with gr.Column():
153
  ito_output_audio = gr.Audio(label="ITO Output Audio")
154
- ito_param_output = gr.Textbox(label="ITO Predicted Parameters", lines=5)
155
  with gr.Column():
156
  ito_steps_taken = gr.Number(label="ITO Steps Taken")
 
157
  ito_log = gr.Textbox(label="ITO Log", lines=10)
158
 
159
- # with gr.Row():
160
- # with gr.Column(scale=2):
161
- # ito_reference_audio = gr.Audio(label="ITO Reference Audio (optional)")
162
- # num_steps = gr.Slider(minimum=1, maximum=100, value=10, step=1, label="Number of Steps")
163
- # optimizer = gr.Dropdown(["Adam", "RAdam", "SGD"], value="RAdam", label="Optimizer")
164
- # learning_rate = gr.Slider(minimum=0.0001, maximum=0.1, value=0.001, step=0.0001, label="Learning Rate")
165
- # af_weights = gr.Textbox(label="AudioFeatureLoss Weights (comma-separated)", value="0.1,0.001,1.0,1.0,0.1")
166
-
167
- # ito_button = gr.Button("Perform ITO")
168
-
169
- # ito_output_audio = gr.Audio(label="ITO Output Audio")
170
- # ito_param_output = gr.Textbox(label="ITO Predicted Parameters", lines=10)
171
- # ito_steps_taken = gr.Number(label="ITO Steps Taken")
172
-
173
- # with gr.Column(scale=1):
174
- # ito_log = gr.Textbox(label="ITO Log", lines=30)
175
-
176
  def run_ito(input_audio, reference_audio, ito_reference_audio, num_steps, optimizer, learning_rate, af_weights):
177
  af_weights = [float(w.strip()) for w in af_weights.split(',')]
178
  ito_generator = perform_ito(
@@ -186,134 +185,22 @@ with gr.Blocks() as demo:
186
  final_log = ""
187
 
188
  # Iterate through the generator to get the final results
189
- for audio, params, steps, log in ito_generator:
190
  final_audio = audio
191
  final_params = params
192
  final_steps = steps
193
  final_log = log
 
 
 
194
 
195
- return final_audio, final_params, final_steps, final_log
196
 
197
  ito_button.click(
198
  run_ito,
199
  inputs=[input_audio, reference_audio, ito_reference_audio, num_steps, optimizer, learning_rate, af_weights],
200
- outputs=[ito_output_audio, ito_param_output, ito_steps_taken, ito_log]
201
  )
202
 
203
  demo.launch()
204
 
205
-
206
-
207
-
208
- # import gradio as gr
209
- # import torch
210
- # import soundfile as sf
211
- # import numpy as np
212
- # import yaml
213
- # from inference import MasteringStyleTransfer
214
- # from utils import download_youtube_audio
215
- # from config import args
216
-
217
- # mastering_transfer = MasteringStyleTransfer(args)
218
-
219
- # def process_audio(input_audio, reference_audio, perform_ito, ito_reference_audio=None):
220
- # # Process the audio files
221
- # output_audio, predicted_params, ito_output_audio, ito_predicted_params, ito_log, sr = mastering_transfer.process_audio(
222
- # input_audio, reference_audio, ito_reference_audio if ito_reference_audio else reference_audio, {}, perform_ito
223
- # )
224
-
225
- # # Generate parameter output strings
226
- # param_output = mastering_transfer.get_param_output_string(predicted_params)
227
- # ito_param_output = mastering_transfer.get_param_output_string(ito_predicted_params) if ito_predicted_params is not None else "ITO not performed"
228
-
229
- # # Generate top 10 differences if ITO was performed
230
- # top_10_diff = mastering_transfer.get_top_10_diff_string(predicted_params, ito_predicted_params) if ito_predicted_params is not None else "ITO not performed"
231
-
232
- # return "output_mastered.wav", "ito_output_mastered.wav" if ito_output_audio is not None else None, param_output, ito_param_output, top_10_diff, ito_log
233
-
234
- # def process_with_ito(input_audio, reference_audio, perform_ito, use_same_reference, ito_reference_audio):
235
- # ito_ref = reference_audio if use_same_reference else ito_reference_audio
236
- # return process_audio(input_audio, reference_audio, perform_ito, ito_ref)
237
-
238
- # def process_youtube_with_ito(input_url, reference_url, perform_ito, use_same_reference, ito_reference_url):
239
- # input_audio = download_youtube_audio(input_url)
240
- # reference_audio = download_youtube_audio(reference_url)
241
- # ito_ref = reference_audio if use_same_reference else download_youtube_audio(ito_reference_url)
242
-
243
- # output_audio, predicted_params, ito_output_audio, ito_predicted_params, ito_log, sr = mastering_transfer.process_audio(
244
- # input_audio, reference_audio, ito_ref, {}, perform_ito, log_ito=True
245
- # )
246
-
247
- # param_output = mastering_transfer.get_param_output_string(predicted_params)
248
- # ito_param_output = mastering_transfer.get_param_output_string(ito_predicted_params) if ito_predicted_params is not None else "ITO not performed"
249
- # top_10_diff = mastering_transfer.get_top_10_diff_string(predicted_params, ito_predicted_params) if ito_predicted_params is not None else "ITO not performed"
250
-
251
- # return "output_mastered_yt.wav", "ito_output_mastered_yt.wav" if ito_output_audio is not None else None, param_output, ito_param_output, top_10_diff, ito_log
252
-
253
-
254
- # with gr.Blocks() as demo:
255
- # gr.Markdown("# Mastering Style Transfer Demo")
256
-
257
- # with gr.Tab("Upload Audio"):
258
- # input_audio = gr.Audio(label="Input Audio")
259
- # reference_audio = gr.Audio(label="Reference Audio")
260
- # perform_ito = gr.Checkbox(label="Perform ITO")
261
- # with gr.Column(visible=False) as ito_options:
262
- # use_same_reference = gr.Checkbox(label="Use same reference audio for ITO", value=True)
263
- # ito_reference_audio = gr.Audio(label="ITO Reference Audio", visible=False)
264
-
265
- # def update_ito_options(perform_ito):
266
- # return gr.Column.update(visible=perform_ito)
267
-
268
- # def update_ito_reference(use_same):
269
- # return gr.Audio.update(visible=not use_same)
270
-
271
- # perform_ito.change(fn=update_ito_options, inputs=perform_ito, outputs=ito_options)
272
- # use_same_reference.change(fn=update_ito_reference, inputs=use_same_reference, outputs=ito_reference_audio)
273
-
274
- # submit_button = gr.Button("Process")
275
- # output_audio = gr.Audio(label="Output Audio")
276
- # ito_output_audio = gr.Audio(label="ITO Output Audio")
277
- # param_output = gr.Textbox(label="Predicted Parameters", lines=10)
278
- # ito_param_output = gr.Textbox(label="ITO Predicted Parameters", lines=10)
279
- # top_10_diff = gr.Textbox(label="Top 10 Parameter Differences", lines=10)
280
- # ito_log = gr.Textbox(label="ITO Log", lines=20)
281
-
282
- # submit_button.click(
283
- # process_with_ito,
284
- # inputs=[input_audio, reference_audio, perform_ito, use_same_reference, ito_reference_audio],
285
- # outputs=[output_audio, ito_output_audio, param_output, ito_param_output, top_10_diff, ito_log]
286
- # )
287
-
288
- # with gr.Tab("YouTube URLs"):
289
- # input_url = gr.Textbox(label="Input YouTube URL")
290
- # reference_url = gr.Textbox(label="Reference YouTube URL")
291
- # perform_ito_yt = gr.Checkbox(label="Perform ITO")
292
- # with gr.Column(visible=False) as ito_options_yt:
293
- # use_same_reference_yt = gr.Checkbox(label="Use same reference audio for ITO", value=True)
294
- # ito_reference_url = gr.Textbox(label="ITO Reference YouTube URL", visible=False)
295
-
296
- # def update_ito_options_yt(perform_ito):
297
- # return gr.Column.update(visible=perform_ito)
298
-
299
- # def update_ito_reference_yt(use_same):
300
- # return gr.Textbox.update(visible=not use_same)
301
-
302
- # perform_ito_yt.change(fn=update_ito_options_yt, inputs=perform_ito_yt, outputs=ito_options_yt)
303
- # use_same_reference_yt.change(fn=update_ito_reference_yt, inputs=use_same_reference_yt, outputs=ito_reference_url)
304
-
305
- # submit_button_yt = gr.Button("Process")
306
- # output_audio_yt = gr.Audio(label="Output Audio")
307
- # ito_output_audio_yt = gr.Audio(label="ITO Output Audio")
308
- # param_output_yt = gr.Textbox(label="Predicted Parameters", lines=10)
309
- # ito_param_output_yt = gr.Textbox(label="ITO Predicted Parameters", lines=10)
310
- # top_10_diff_yt = gr.Textbox(label="Top 10 Parameter Differences", lines=10)
311
- # ito_log_yt = gr.Textbox(label="ITO Log", lines=20)
312
-
313
- # submit_button_yt.click(
314
- # process_youtube_with_ito,
315
- # inputs=[input_url, reference_url, perform_ito_yt, use_same_reference_yt, ito_reference_url],
316
- # outputs=[output_audio_yt, ito_output_audio_yt, param_output_yt, ito_param_output_yt, top_10_diff_yt, ito_log_yt]
317
- # )
318
-
319
- # demo.launch()
 
7
  from utils import download_youtube_audio
8
  from config import args
9
  import pyloudnorm as pyln
10
+ import tempfile
11
+ import os
12
+ import matplotlib.pyplot as plt
13
+ import io
14
 
15
  mastering_transfer = MasteringStyleTransfer(args)
16
 
 
91
  initial_reference_feature = mastering_transfer.get_reference_embedding(reference_tensor)
92
 
93
  ito_log = ""
94
+ loss_values = []
95
+ for log_entry, current_output, current_params, step, loss in mastering_transfer.inference_time_optimization(
96
  input_tensor, ito_reference_tensor, ito_config, initial_reference_feature
97
  ):
98
  ito_log += log_entry
99
  ito_param_output = mastering_transfer.get_param_output_string(current_params)
100
+ loss_values.append(loss)
101
 
102
  # Convert current_output to numpy array if it's a tensor
103
  if isinstance(current_output, torch.Tensor):
104
+ current_output = current_output.cpu().numpy()
105
 
106
+ # Normalize output audio
107
+ current_output = loudness_normalize(current_output, args.sample_rate)
108
 
109
  # Denormalize the audio to int16
110
  current_output = denormalize_audio(current_output, dtype=np.int16)
111
 
112
+ # Ensure the audio is in the correct shape (samples, channels)
113
  if current_output.ndim == 1:
114
  current_output = current_output.reshape(-1, 1)
115
  elif current_output.ndim > 2:
116
  current_output = current_output.squeeze()
117
+
118
+ yield (args.sample_rate, current_output), ito_param_output, step, ito_log, loss_values
119
 
120
+ def plot_loss_curve(loss_values):
121
+ plt.figure(figsize=(10, 6))
122
+ plt.plot(loss_values)
123
+ plt.title('ITO Loss Curve')
124
+ plt.xlabel('Step')
125
+ plt.ylabel('Loss')
126
+ plt.grid(True)
127
+
128
+ buf = io.BytesIO()
129
+ plt.savefig(buf, format='png')
130
+ buf.seek(0)
131
+ return buf
132
 
133
+ """ APP display """
134
  with gr.Blocks() as demo:
135
  gr.Markdown("# Mastering Style Transfer Demo")
136
 
 
166
  with gr.Row():
167
  with gr.Column():
168
  ito_output_audio = gr.Audio(label="ITO Output Audio")
169
+ ito_param_output = gr.Textbox(label="ITO Predicted Parameters", lines=15)
170
  with gr.Column():
171
  ito_steps_taken = gr.Number(label="ITO Steps Taken")
172
+ ito_loss_plot = gr.Image(label="ITO Loss Curve")
173
  ito_log = gr.Textbox(label="ITO Log", lines=10)
174
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
175
  def run_ito(input_audio, reference_audio, ito_reference_audio, num_steps, optimizer, learning_rate, af_weights):
176
  af_weights = [float(w.strip()) for w in af_weights.split(',')]
177
  ito_generator = perform_ito(
 
185
  final_log = ""
186
 
187
  # Iterate through the generator to get the final results
188
+ for audio, params, steps, log, losses in ito_generator:
189
  final_audio = audio
190
  final_params = params
191
  final_steps = steps
192
  final_log = log
193
+ loss_values = losses
194
+
195
+ loss_plot = plot_loss_curve(loss_values)
196
 
197
+ return final_audio, final_params, final_steps, final_log, loss_plot
198
 
199
  ito_button.click(
200
  run_ito,
201
  inputs=[input_audio, reference_audio, ito_reference_audio, num_steps, optimizer, learning_rate, af_weights],
202
+ outputs=[ito_output_audio, ito_param_output, ito_steps_taken, ito_log, ito_loss_plot]
203
  )
204
 
205
  demo.launch()
206