Spaces:

jhtonyKoo
/

ITO-Master

Running

App Files Files Community

jhtonyKoo commited on 27 days ago

Commit

043c2d7

•

1 Parent(s): b901b91

modify app

Browse files

Files changed (1) hide show

app.py +33 -42

app.py CHANGED Viewed

@@ -9,8 +9,7 @@ from config import args
 import pyloudnorm as pyln
 import tempfile
 import os
-import matplotlib.pyplot as plt
-import io
 mastering_transfer = MasteringStyleTransfer(args)
@@ -52,14 +51,6 @@ def process_audio(input_audio, reference_audio):
     if isinstance(output_audio, torch.Tensor):
         output_audio = output_audio.cpu().numpy()
-    # # Normalize output audio
-    # output_audio = loudness_normalize(output_audio, sr)
-    print(output_audio.shape)
-    print(f"sr: {sr}")
-    # Denormalize the audio to int16
-    output_audio = denormalize_audio(output_audio, dtype=np.int16)
     if output_audio.ndim == 1:
         output_audio = output_audio.reshape(-1, 1)
     elif output_audio.ndim > 2:
@@ -68,7 +59,15 @@ def process_audio(input_audio, reference_audio):
     # Ensure the audio is in the correct shape (samples, channels)
     if output_audio.shape[1] > output_audio.shape[0]:
         output_audio = output_audio.transpose(1,0)
     return (sr, output_audio), param_output
 def perform_ito(input_audio, reference_audio, ito_reference_audio, num_steps, optimizer, learning_rate, af_weights):
@@ -96,41 +95,27 @@ def perform_ito(input_audio, reference_audio, ito_reference_audio, num_steps, op
     ):
         ito_log += log_entry
         ito_param_output = mastering_transfer.get_param_output_string(current_params)
-        loss_values.append(loss)
         # Convert current_output to numpy array if it's a tensor
         if isinstance(current_output, torch.Tensor):
             current_output = current_output.cpu().numpy()
-        # Normalize output audio
-        # current_output = loudness_normalize(current_output, args.sample_rate)
-        # Denormalize the audio to int16
-        current_output = denormalize_audio(current_output, dtype=np.int16)
         if current_output.ndim == 1:
             current_output = current_output.reshape(-1, 1)
         elif current_output.ndim > 2:
             current_output = current_output.squeeze()
         # Ensure the audio is in the correct shape (samples, channels)
         if current_output.shape[1] > current_output.shape[0]:
             current_output = current_output.transpose(1,0)
-        yield (args.sample_rate, current_output), ito_param_output, step, ito_log, loss_values
-def plot_loss_curve(loss_values):
-    plt.figure(figsize=(10, 6))
-    plt.plot(loss_values)
-    plt.title('ITO Loss Curve')
-    plt.xlabel('Step')
-    plt.ylabel('Loss')
-    plt.grid(True)
-    buf = io.BytesIO()
-    plt.savefig(buf, format='png')
-    buf.seek(0)
-    return buf
 """ APP display """
 with gr.Blocks() as demo:
@@ -171,8 +156,16 @@ with gr.Blocks() as demo:
             ito_param_output = gr.Textbox(label="ITO Predicted Parameters", lines=15)
         with gr.Column():
             ito_steps_taken = gr.Number(label="ITO Steps Taken")
-            ito_loss_plot = gr.Image(label="ITO Loss Curve")
             ito_log = gr.Textbox(label="ITO Log", lines=10)
     def run_ito(input_audio, reference_audio, ito_reference_audio, num_steps, optimizer, learning_rate, af_weights):
         af_weights = [float(w.strip()) for w in af_weights.split(',')]
@@ -185,19 +178,17 @@ with gr.Blocks() as demo:
         final_params = None
         final_steps = 0
         final_log = ""
         # Iterate through the generator to get the final results
-        for audio, params, steps, log, losses in ito_generator:
             final_audio = audio
             final_params = params
             final_steps = steps
             final_log = log
-            loss_values = losses
-        # loss_plot = plot_loss_curve(loss_values)
-        print(loss_values)
-        return final_audio, final_params, final_steps, final_log, loss_values
     ito_button.click(
         run_ito,

 import pyloudnorm as pyln
 import tempfile
 import os
+import pandas as pd
 mastering_transfer = MasteringStyleTransfer(args)
     if isinstance(output_audio, torch.Tensor):
         output_audio = output_audio.cpu().numpy()
     if output_audio.ndim == 1:
         output_audio = output_audio.reshape(-1, 1)
     elif output_audio.ndim > 2:
     # Ensure the audio is in the correct shape (samples, channels)
     if output_audio.shape[1] > output_audio.shape[0]:
         output_audio = output_audio.transpose(1,0)
+    print(output_audio.shape)
+    print(f"sr: {sr}")
+    # Normalize output audio
+    output_audio = loudness_normalize(output_audio, sr)
+    # Denormalize the audio to int16
+    output_audio = denormalize_audio(output_audio, dtype=np.int16)
     return (sr, output_audio), param_output
 def perform_ito(input_audio, reference_audio, ito_reference_audio, num_steps, optimizer, learning_rate, af_weights):
     ):
         ito_log += log_entry
         ito_param_output = mastering_transfer.get_param_output_string(current_params)
+        loss_values.append({"step": step, "loss": loss})
         # Convert current_output to numpy array if it's a tensor
         if isinstance(current_output, torch.Tensor):
             current_output = current_output.cpu().numpy()
         if current_output.ndim == 1:
             current_output = current_output.reshape(-1, 1)
         elif current_output.ndim > 2:
             current_output = current_output.squeeze()
         # Ensure the audio is in the correct shape (samples, channels)
         if current_output.shape[1] > current_output.shape[0]:
             current_output = current_output.transpose(1,0)
+        # Loudness normalize output audio
+        current_output = loudness_normalize(current_output, args.sample_rate)
+        # Denormalize the audio to int16
+        current_output = denormalize_audio(current_output, dtype=np.int16)
+        yield (args.sample_rate, current_output), ito_param_output, step, ito_log, pd.DataFrame(loss_values)
 """ APP display """
 with gr.Blocks() as demo:
             ito_param_output = gr.Textbox(label="ITO Predicted Parameters", lines=15)
         with gr.Column():
             ito_steps_taken = gr.Number(label="ITO Steps Taken")
             ito_log = gr.Textbox(label="ITO Log", lines=10)
+            ito_loss_plot = gr.LinePlot(
+                x="step",
+                y="loss",
+                title="ITO Loss Curve",
+                x_title="Step",
+                y_title="Loss",
+                height=400,
+                width=600,
+            )
     def run_ito(input_audio, reference_audio, ito_reference_audio, num_steps, optimizer, learning_rate, af_weights):
         af_weights = [float(w.strip()) for w in af_weights.split(',')]
         final_params = None
         final_steps = 0
         final_log = ""
+        loss_df = None
         # Iterate through the generator to get the final results
+        for audio, params, steps, log, loss_data in ito_generator:
             final_audio = audio
             final_params = params
             final_steps = steps
             final_log = log
+            loss_df = loss_data
+        return final_audio, final_params, final_steps, final_log, loss_df
     ito_button.click(
         run_ito,