jhtonyKoo commited on
Commit
043c2d7
1 Parent(s): b901b91

modify app

Browse files
Files changed (1) hide show
  1. app.py +33 -42
app.py CHANGED
@@ -9,8 +9,7 @@ from config import args
9
  import pyloudnorm as pyln
10
  import tempfile
11
  import os
12
- import matplotlib.pyplot as plt
13
- import io
14
 
15
  mastering_transfer = MasteringStyleTransfer(args)
16
 
@@ -52,14 +51,6 @@ def process_audio(input_audio, reference_audio):
52
  if isinstance(output_audio, torch.Tensor):
53
  output_audio = output_audio.cpu().numpy()
54
 
55
- # # Normalize output audio
56
- # output_audio = loudness_normalize(output_audio, sr)
57
- print(output_audio.shape)
58
- print(f"sr: {sr}")
59
-
60
- # Denormalize the audio to int16
61
- output_audio = denormalize_audio(output_audio, dtype=np.int16)
62
-
63
  if output_audio.ndim == 1:
64
  output_audio = output_audio.reshape(-1, 1)
65
  elif output_audio.ndim > 2:
@@ -68,7 +59,15 @@ def process_audio(input_audio, reference_audio):
68
  # Ensure the audio is in the correct shape (samples, channels)
69
  if output_audio.shape[1] > output_audio.shape[0]:
70
  output_audio = output_audio.transpose(1,0)
71
-
 
 
 
 
 
 
 
 
72
  return (sr, output_audio), param_output
73
 
74
  def perform_ito(input_audio, reference_audio, ito_reference_audio, num_steps, optimizer, learning_rate, af_weights):
@@ -96,41 +95,27 @@ def perform_ito(input_audio, reference_audio, ito_reference_audio, num_steps, op
96
  ):
97
  ito_log += log_entry
98
  ito_param_output = mastering_transfer.get_param_output_string(current_params)
99
- loss_values.append(loss)
100
 
101
  # Convert current_output to numpy array if it's a tensor
102
  if isinstance(current_output, torch.Tensor):
103
  current_output = current_output.cpu().numpy()
104
-
105
- # Normalize output audio
106
- # current_output = loudness_normalize(current_output, args.sample_rate)
107
-
108
- # Denormalize the audio to int16
109
- current_output = denormalize_audio(current_output, dtype=np.int16)
110
-
111
  if current_output.ndim == 1:
112
  current_output = current_output.reshape(-1, 1)
113
  elif current_output.ndim > 2:
114
  current_output = current_output.squeeze()
115
-
116
  # Ensure the audio is in the correct shape (samples, channels)
117
  if current_output.shape[1] > current_output.shape[0]:
118
  current_output = current_output.transpose(1,0)
119
-
120
- yield (args.sample_rate, current_output), ito_param_output, step, ito_log, loss_values
121
-
122
- def plot_loss_curve(loss_values):
123
- plt.figure(figsize=(10, 6))
124
- plt.plot(loss_values)
125
- plt.title('ITO Loss Curve')
126
- plt.xlabel('Step')
127
- plt.ylabel('Loss')
128
- plt.grid(True)
129
-
130
- buf = io.BytesIO()
131
- plt.savefig(buf, format='png')
132
- buf.seek(0)
133
- return buf
134
 
135
  """ APP display """
136
  with gr.Blocks() as demo:
@@ -171,8 +156,16 @@ with gr.Blocks() as demo:
171
  ito_param_output = gr.Textbox(label="ITO Predicted Parameters", lines=15)
172
  with gr.Column():
173
  ito_steps_taken = gr.Number(label="ITO Steps Taken")
174
- ito_loss_plot = gr.Image(label="ITO Loss Curve")
175
  ito_log = gr.Textbox(label="ITO Log", lines=10)
 
 
 
 
 
 
 
 
 
176
 
177
  def run_ito(input_audio, reference_audio, ito_reference_audio, num_steps, optimizer, learning_rate, af_weights):
178
  af_weights = [float(w.strip()) for w in af_weights.split(',')]
@@ -185,19 +178,17 @@ with gr.Blocks() as demo:
185
  final_params = None
186
  final_steps = 0
187
  final_log = ""
 
188
 
189
  # Iterate through the generator to get the final results
190
- for audio, params, steps, log, losses in ito_generator:
191
  final_audio = audio
192
  final_params = params
193
  final_steps = steps
194
  final_log = log
195
- loss_values = losses
196
 
197
- # loss_plot = plot_loss_curve(loss_values)
198
- print(loss_values)
199
-
200
- return final_audio, final_params, final_steps, final_log, loss_values
201
 
202
  ito_button.click(
203
  run_ito,
 
9
  import pyloudnorm as pyln
10
  import tempfile
11
  import os
12
+ import pandas as pd
 
13
 
14
  mastering_transfer = MasteringStyleTransfer(args)
15
 
 
51
  if isinstance(output_audio, torch.Tensor):
52
  output_audio = output_audio.cpu().numpy()
53
 
 
 
 
 
 
 
 
 
54
  if output_audio.ndim == 1:
55
  output_audio = output_audio.reshape(-1, 1)
56
  elif output_audio.ndim > 2:
 
59
  # Ensure the audio is in the correct shape (samples, channels)
60
  if output_audio.shape[1] > output_audio.shape[0]:
61
  output_audio = output_audio.transpose(1,0)
62
+
63
+ print(output_audio.shape)
64
+ print(f"sr: {sr}")
65
+
66
+ # Normalize output audio
67
+ output_audio = loudness_normalize(output_audio, sr)
68
+ # Denormalize the audio to int16
69
+ output_audio = denormalize_audio(output_audio, dtype=np.int16)
70
+
71
  return (sr, output_audio), param_output
72
 
73
  def perform_ito(input_audio, reference_audio, ito_reference_audio, num_steps, optimizer, learning_rate, af_weights):
 
95
  ):
96
  ito_log += log_entry
97
  ito_param_output = mastering_transfer.get_param_output_string(current_params)
98
+ loss_values.append({"step": step, "loss": loss})
99
 
100
  # Convert current_output to numpy array if it's a tensor
101
  if isinstance(current_output, torch.Tensor):
102
  current_output = current_output.cpu().numpy()
103
+
 
 
 
 
 
 
104
  if current_output.ndim == 1:
105
  current_output = current_output.reshape(-1, 1)
106
  elif current_output.ndim > 2:
107
  current_output = current_output.squeeze()
 
108
  # Ensure the audio is in the correct shape (samples, channels)
109
  if current_output.shape[1] > current_output.shape[0]:
110
  current_output = current_output.transpose(1,0)
111
+
112
+ # Loudness normalize output audio
113
+ current_output = loudness_normalize(current_output, args.sample_rate)
114
+ # Denormalize the audio to int16
115
+ current_output = denormalize_audio(current_output, dtype=np.int16)
116
+
117
+ yield (args.sample_rate, current_output), ito_param_output, step, ito_log, pd.DataFrame(loss_values)
118
+
 
 
 
 
 
 
 
119
 
120
  """ APP display """
121
  with gr.Blocks() as demo:
 
156
  ito_param_output = gr.Textbox(label="ITO Predicted Parameters", lines=15)
157
  with gr.Column():
158
  ito_steps_taken = gr.Number(label="ITO Steps Taken")
 
159
  ito_log = gr.Textbox(label="ITO Log", lines=10)
160
+ ito_loss_plot = gr.LinePlot(
161
+ x="step",
162
+ y="loss",
163
+ title="ITO Loss Curve",
164
+ x_title="Step",
165
+ y_title="Loss",
166
+ height=400,
167
+ width=600,
168
+ )
169
 
170
  def run_ito(input_audio, reference_audio, ito_reference_audio, num_steps, optimizer, learning_rate, af_weights):
171
  af_weights = [float(w.strip()) for w in af_weights.split(',')]
 
178
  final_params = None
179
  final_steps = 0
180
  final_log = ""
181
+ loss_df = None
182
 
183
  # Iterate through the generator to get the final results
184
+ for audio, params, steps, log, loss_data in ito_generator:
185
  final_audio = audio
186
  final_params = params
187
  final_steps = steps
188
  final_log = log
189
+ loss_df = loss_data
190
 
191
+ return final_audio, final_params, final_steps, final_log, loss_df
 
 
 
192
 
193
  ito_button.click(
194
  run_ito,