Spaces:

jhtonyKoo
/

ITO-Master

Running

App Files Files Community

jhtonyKoo commited on 23 days ago

Commit

158585c

•

1 Parent(s): 341951b

modify app

Browse files

Files changed (1) hide show

app.py +43 -33

app.py CHANGED Viewed

@@ -83,9 +83,6 @@ def process_audio(input_audio, reference_audio):
     if output_audio.shape[1] > output_audio.shape[0]:
         output_audio = output_audio.transpose(1,0)
-    print(output_audio.shape)
-    print(f"sr: {sr}")
     # Normalize output audio
     output_audio = loudness_normalize(output_audio, sr)
     # Denormalize the audio to int16
@@ -122,34 +119,53 @@ def perform_ito(input_audio, reference_audio, ito_reference_audio, num_steps, op
         ito_log += result['log']
         loss_values.append({"step": result['step'], "loss": result['loss']})
-        current_output = result['audio']
-        ito_param_output = mastering_transfer.get_param_output_string(result['params'])
-        # Convert current_output to numpy array if it's a tensor
-        if isinstance(current_output, torch.Tensor):
-            current_output = current_output.cpu().numpy()
-        if current_output.ndim == 1:
-            current_output = current_output.reshape(-1, 1)
-        elif current_output.ndim > 2:
-            current_output = current_output.squeeze()
-        # Ensure the audio is in the correct shape (samples, channels)
-        if current_output.shape[1] > current_output.shape[0]:
-            current_output = current_output.transpose(1,0)
-        # Loudness normalize output audio
-        current_output = loudness_normalize(current_output, args.sample_rate)
-        # Denormalize the audio to int16
-        current_output = denormalize_audio(current_output, dtype=np.int16)
-        yield (args.sample_rate, current_output), ito_param_output, result['step'], ito_log, pd.DataFrame(loss_values), all_results
 def update_ito_output(all_results, selected_step):
-    print(all_results[selected_step - 1])
-    print(selected_step)
     selected_result = all_results[selected_step - 1]
-    print(selected_result['audio'].shape)
-    return (args.sample_rate, selected_result['audio']), selected_result['params'], selected_result['log']
 """ APP display """
@@ -233,13 +249,7 @@ with gr.Blocks() as demo:
             ito_log = gr.Textbox(label="ITO Log", lines=10)
     all_results = gr.State([])
-    min_loss_step = gr.State(0)
-    def on_ito_complete(results, min_step, loss_df):
-        all_results.value = results
-        min_loss_step.value = min_step
-        return loss_df, gr.update(maximum=len(results), value=min_step+1)
     ito_button.click(
         perform_ito,
         inputs=[input_audio, reference_audio, ito_reference_audio, num_steps, optimizer, learning_rate, af_weights],

     if output_audio.shape[1] > output_audio.shape[0]:
         output_audio = output_audio.transpose(1,0)
     # Normalize output audio
     output_audio = loudness_normalize(output_audio, sr)
     # Denormalize the audio to int16
         ito_log += result['log']
         loss_values.append({"step": result['step'], "loss": result['loss']})
+    # Return the results of the last step
+    last_result = all_results[-1]
+    current_output = last_result['audio']
+    ito_param_output = mastering_transfer.get_param_output_string(last_result['params'])
+    # Convert current_output to numpy array if it's a tensor
+    if isinstance(current_output, torch.Tensor):
+        current_output = current_output.cpu().numpy()
+    if current_output.ndim == 1:
+        current_output = current_output.reshape(-1, 1)
+    elif current_output.ndim > 2:
+        current_output = current_output.squeeze()
+    # Ensure the audio is in the correct shape (samples, channels)
+    if current_output.shape[1] > current_output.shape[0]:
+        current_output = current_output.transpose(1,0)
+    # Loudness normalize output audio
+    current_output = loudness_normalize(current_output, args.sample_rate)
+    # Denormalize the audio to int16
+    current_output = denormalize_audio(current_output, dtype=np.int16)
+    return (args.sample_rate, current_output), ito_param_output, num_steps, ito_log, pd.DataFrame(loss_values), all_results
 def update_ito_output(all_results, selected_step):
     selected_result = all_results[selected_step - 1]
+    current_output = selected_result['audio']
+    ito_param_output = mastering_transfer.get_param_output_string(selected_result['params'])
+    # Convert current_output to numpy array if it's a tensor
+    if isinstance(current_output, torch.Tensor):
+        current_output = current_output.cpu().numpy()
+    if current_output.ndim == 1:
+        current_output = current_output.reshape(-1, 1)
+    elif current_output.ndim > 2:
+        current_output = current_output.squeeze()
+    # Ensure the audio is in the correct shape (samples, channels)
+    if current_output.shape[1] > current_output.shape[0]:
+        current_output = current_output.transpose(1,0)
+    # Loudness normalize output audio
+    current_output = loudness_normalize(current_output, args.sample_rate)
+    # Denormalize the audio to int16
+    current_output = denormalize_audio(current_output, dtype=np.int16)
+    return (args.sample_rate, current_output), ito_param_output, selected_result['log']
 """ APP display """
             ito_log = gr.Textbox(label="ITO Log", lines=10)
     all_results = gr.State([])
     ito_button.click(
         perform_ito,
         inputs=[input_audio, reference_audio, ito_reference_audio, num_steps, optimizer, learning_rate, af_weights],