jhtonyKoo commited on
Commit
158585c
1 Parent(s): 341951b

modify app

Browse files
Files changed (1) hide show
  1. app.py +43 -33
app.py CHANGED
@@ -83,9 +83,6 @@ def process_audio(input_audio, reference_audio):
83
  if output_audio.shape[1] > output_audio.shape[0]:
84
  output_audio = output_audio.transpose(1,0)
85
 
86
- print(output_audio.shape)
87
- print(f"sr: {sr}")
88
-
89
  # Normalize output audio
90
  output_audio = loudness_normalize(output_audio, sr)
91
  # Denormalize the audio to int16
@@ -122,34 +119,53 @@ def perform_ito(input_audio, reference_audio, ito_reference_audio, num_steps, op
122
  ito_log += result['log']
123
  loss_values.append({"step": result['step'], "loss": result['loss']})
124
 
125
- current_output = result['audio']
126
- ito_param_output = mastering_transfer.get_param_output_string(result['params'])
127
-
128
- # Convert current_output to numpy array if it's a tensor
129
- if isinstance(current_output, torch.Tensor):
130
- current_output = current_output.cpu().numpy()
131
-
132
- if current_output.ndim == 1:
133
- current_output = current_output.reshape(-1, 1)
134
- elif current_output.ndim > 2:
135
- current_output = current_output.squeeze()
136
- # Ensure the audio is in the correct shape (samples, channels)
137
- if current_output.shape[1] > current_output.shape[0]:
138
- current_output = current_output.transpose(1,0)
 
 
139
 
140
- # Loudness normalize output audio
141
- current_output = loudness_normalize(current_output, args.sample_rate)
142
- # Denormalize the audio to int16
143
- current_output = denormalize_audio(current_output, dtype=np.int16)
144
 
145
- yield (args.sample_rate, current_output), ito_param_output, result['step'], ito_log, pd.DataFrame(loss_values), all_results
146
 
147
  def update_ito_output(all_results, selected_step):
148
- print(all_results[selected_step - 1])
149
- print(selected_step)
150
  selected_result = all_results[selected_step - 1]
151
- print(selected_result['audio'].shape)
152
- return (args.sample_rate, selected_result['audio']), selected_result['params'], selected_result['log']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
 
154
 
155
  """ APP display """
@@ -233,13 +249,7 @@ with gr.Blocks() as demo:
233
  ito_log = gr.Textbox(label="ITO Log", lines=10)
234
 
235
  all_results = gr.State([])
236
- min_loss_step = gr.State(0)
237
-
238
- def on_ito_complete(results, min_step, loss_df):
239
- all_results.value = results
240
- min_loss_step.value = min_step
241
- return loss_df, gr.update(maximum=len(results), value=min_step+1)
242
-
243
  ito_button.click(
244
  perform_ito,
245
  inputs=[input_audio, reference_audio, ito_reference_audio, num_steps, optimizer, learning_rate, af_weights],
 
83
  if output_audio.shape[1] > output_audio.shape[0]:
84
  output_audio = output_audio.transpose(1,0)
85
 
 
 
 
86
  # Normalize output audio
87
  output_audio = loudness_normalize(output_audio, sr)
88
  # Denormalize the audio to int16
 
119
  ito_log += result['log']
120
  loss_values.append({"step": result['step'], "loss": result['loss']})
121
 
122
+ # Return the results of the last step
123
+ last_result = all_results[-1]
124
+ current_output = last_result['audio']
125
+ ito_param_output = mastering_transfer.get_param_output_string(last_result['params'])
126
+
127
+ # Convert current_output to numpy array if it's a tensor
128
+ if isinstance(current_output, torch.Tensor):
129
+ current_output = current_output.cpu().numpy()
130
+
131
+ if current_output.ndim == 1:
132
+ current_output = current_output.reshape(-1, 1)
133
+ elif current_output.ndim > 2:
134
+ current_output = current_output.squeeze()
135
+ # Ensure the audio is in the correct shape (samples, channels)
136
+ if current_output.shape[1] > current_output.shape[0]:
137
+ current_output = current_output.transpose(1,0)
138
 
139
+ # Loudness normalize output audio
140
+ current_output = loudness_normalize(current_output, args.sample_rate)
141
+ # Denormalize the audio to int16
142
+ current_output = denormalize_audio(current_output, dtype=np.int16)
143
 
144
+ return (args.sample_rate, current_output), ito_param_output, num_steps, ito_log, pd.DataFrame(loss_values), all_results
145
 
146
  def update_ito_output(all_results, selected_step):
 
 
147
  selected_result = all_results[selected_step - 1]
148
+ current_output = selected_result['audio']
149
+ ito_param_output = mastering_transfer.get_param_output_string(selected_result['params'])
150
+
151
+ # Convert current_output to numpy array if it's a tensor
152
+ if isinstance(current_output, torch.Tensor):
153
+ current_output = current_output.cpu().numpy()
154
+
155
+ if current_output.ndim == 1:
156
+ current_output = current_output.reshape(-1, 1)
157
+ elif current_output.ndim > 2:
158
+ current_output = current_output.squeeze()
159
+ # Ensure the audio is in the correct shape (samples, channels)
160
+ if current_output.shape[1] > current_output.shape[0]:
161
+ current_output = current_output.transpose(1,0)
162
+
163
+ # Loudness normalize output audio
164
+ current_output = loudness_normalize(current_output, args.sample_rate)
165
+ # Denormalize the audio to int16
166
+ current_output = denormalize_audio(current_output, dtype=np.int16)
167
+
168
+ return (args.sample_rate, current_output), ito_param_output, selected_result['log']
169
 
170
 
171
  """ APP display """
 
249
  ito_log = gr.Textbox(label="ITO Log", lines=10)
250
 
251
  all_results = gr.State([])
252
+
 
 
 
 
 
 
253
  ito_button.click(
254
  perform_ito,
255
  inputs=[input_audio, reference_audio, ito_reference_audio, num_steps, optimizer, learning_rate, af_weights],