Spaces:

jhtonyKoo
/

ITO-Master

Running

App Files Files Community

jhtonyKoo commited on 27 days ago

Commit

76df10e

•

1 Parent(s): e3c9443

modify app

Browse files

Files changed (1) hide show

app.py +34 -147

app.py CHANGED Viewed

@@ -7,6 +7,10 @@ from inference import MasteringStyleTransfer
 from utils import download_youtube_audio
 from config import args
 import pyloudnorm as pyln
 mastering_transfer = MasteringStyleTransfer(args)
@@ -87,35 +91,46 @@ def perform_ito(input_audio, reference_audio, ito_reference_audio, num_steps, op
     initial_reference_feature = mastering_transfer.get_reference_embedding(reference_tensor)
     ito_log = ""
-    for log_entry, current_output, current_params, step in mastering_transfer.inference_time_optimization(
         input_tensor, ito_reference_tensor, ito_config, initial_reference_feature
     ):
         ito_log += log_entry
         ito_param_output = mastering_transfer.get_param_output_string(current_params)
         # Convert current_output to numpy array if it's a tensor
         if isinstance(current_output, torch.Tensor):
-            current_output = current_output.detach().cpu().numpy()
-        # # Normalize output audio
-        # current_output = loudness_normalize(current_output, args.sample_rate)
         # Denormalize the audio to int16
         current_output = denormalize_audio(current_output, dtype=np.int16)
         if current_output.ndim == 1:
             current_output = current_output.reshape(-1, 1)
         elif current_output.ndim > 2:
             current_output = current_output.squeeze()
-        # Ensure the audio is in the correct shape (samples, channels)
-        if current_output.shape[1] > current_output.shape[0]:
-            current_output = current_output.transpose(1,0)
-        yield (args.sample_rate, current_output), ito_param_output, step, ito_log
 with gr.Blocks() as demo:
     gr.Markdown("# Mastering Style Transfer Demo")
@@ -151,28 +166,12 @@ with gr.Blocks() as demo:
     with gr.Row():
         with gr.Column():
             ito_output_audio = gr.Audio(label="ITO Output Audio")
-            ito_param_output = gr.Textbox(label="ITO Predicted Parameters", lines=5)
         with gr.Column():
             ito_steps_taken = gr.Number(label="ITO Steps Taken")
             ito_log = gr.Textbox(label="ITO Log", lines=10)
-    # with gr.Row():
-    #     with gr.Column(scale=2):
-    #         ito_reference_audio = gr.Audio(label="ITO Reference Audio (optional)")
-    #         num_steps = gr.Slider(minimum=1, maximum=100, value=10, step=1, label="Number of Steps")
-    #         optimizer = gr.Dropdown(["Adam", "RAdam", "SGD"], value="RAdam", label="Optimizer")
-    #         learning_rate = gr.Slider(minimum=0.0001, maximum=0.1, value=0.001, step=0.0001, label="Learning Rate")
-    #         af_weights = gr.Textbox(label="AudioFeatureLoss Weights (comma-separated)", value="0.1,0.001,1.0,1.0,0.1")
-    #         ito_button = gr.Button("Perform ITO")
-    #         ito_output_audio = gr.Audio(label="ITO Output Audio")
-    #         ito_param_output = gr.Textbox(label="ITO Predicted Parameters", lines=10)
-    #         ito_steps_taken = gr.Number(label="ITO Steps Taken")
-    #     with gr.Column(scale=1):
-    #         ito_log = gr.Textbox(label="ITO Log", lines=30)
     def run_ito(input_audio, reference_audio, ito_reference_audio, num_steps, optimizer, learning_rate, af_weights):
         af_weights = [float(w.strip()) for w in af_weights.split(',')]
         ito_generator = perform_ito(
@@ -186,134 +185,22 @@ with gr.Blocks() as demo:
         final_log = ""
         # Iterate through the generator to get the final results
-        for audio, params, steps, log in ito_generator:
             final_audio = audio
             final_params = params
             final_steps = steps
             final_log = log
-        return final_audio, final_params, final_steps, final_log
     ito_button.click(
         run_ito,
         inputs=[input_audio, reference_audio, ito_reference_audio, num_steps, optimizer, learning_rate, af_weights],
-        outputs=[ito_output_audio, ito_param_output, ito_steps_taken, ito_log]
     )
 demo.launch()
-# import gradio as gr
-# import torch
-# import soundfile as sf
-# import numpy as np
-# import yaml
-# from inference import MasteringStyleTransfer
-# from utils import download_youtube_audio
-# from config import args
-# mastering_transfer = MasteringStyleTransfer(args)
-# def process_audio(input_audio, reference_audio, perform_ito, ito_reference_audio=None):
-#     # Process the audio files
-#     output_audio, predicted_params, ito_output_audio, ito_predicted_params, ito_log, sr = mastering_transfer.process_audio(
-#         input_audio, reference_audio, ito_reference_audio if ito_reference_audio else reference_audio, {}, perform_ito
-#     )
-#     # Generate parameter output strings
-#     param_output = mastering_transfer.get_param_output_string(predicted_params)
-#     ito_param_output = mastering_transfer.get_param_output_string(ito_predicted_params) if ito_predicted_params is not None else "ITO not performed"
-#     # Generate top 10 differences if ITO was performed
-#     top_10_diff = mastering_transfer.get_top_10_diff_string(predicted_params, ito_predicted_params) if ito_predicted_params is not None else "ITO not performed"
-#     return "output_mastered.wav", "ito_output_mastered.wav" if ito_output_audio is not None else None, param_output, ito_param_output, top_10_diff, ito_log
-# def process_with_ito(input_audio, reference_audio, perform_ito, use_same_reference, ito_reference_audio):
-#     ito_ref = reference_audio if use_same_reference else ito_reference_audio
-#     return process_audio(input_audio, reference_audio, perform_ito, ito_ref)
-# def process_youtube_with_ito(input_url, reference_url, perform_ito, use_same_reference, ito_reference_url):
-#     input_audio = download_youtube_audio(input_url)
-#     reference_audio = download_youtube_audio(reference_url)
-#     ito_ref = reference_audio if use_same_reference else download_youtube_audio(ito_reference_url)
-#     output_audio, predicted_params, ito_output_audio, ito_predicted_params, ito_log, sr = mastering_transfer.process_audio(
-#         input_audio, reference_audio, ito_ref, {}, perform_ito, log_ito=True
-#     )
-#     param_output = mastering_transfer.get_param_output_string(predicted_params)
-#     ito_param_output = mastering_transfer.get_param_output_string(ito_predicted_params) if ito_predicted_params is not None else "ITO not performed"
-#     top_10_diff = mastering_transfer.get_top_10_diff_string(predicted_params, ito_predicted_params) if ito_predicted_params is not None else "ITO not performed"
-#     return "output_mastered_yt.wav", "ito_output_mastered_yt.wav" if ito_output_audio is not None else None, param_output, ito_param_output, top_10_diff, ito_log
-# with gr.Blocks() as demo:
-#     gr.Markdown("# Mastering Style Transfer Demo")
-#     with gr.Tab("Upload Audio"):
-#         input_audio = gr.Audio(label="Input Audio")
-#         reference_audio = gr.Audio(label="Reference Audio")
-#         perform_ito = gr.Checkbox(label="Perform ITO")
-#         with gr.Column(visible=False) as ito_options:
-#             use_same_reference = gr.Checkbox(label="Use same reference audio for ITO", value=True)
-#             ito_reference_audio = gr.Audio(label="ITO Reference Audio", visible=False)
-#         def update_ito_options(perform_ito):
-#             return gr.Column.update(visible=perform_ito)
-#         def update_ito_reference(use_same):
-#             return gr.Audio.update(visible=not use_same)
-#         perform_ito.change(fn=update_ito_options, inputs=perform_ito, outputs=ito_options)
-#         use_same_reference.change(fn=update_ito_reference, inputs=use_same_reference, outputs=ito_reference_audio)
-#         submit_button = gr.Button("Process")
-#         output_audio = gr.Audio(label="Output Audio")
-#         ito_output_audio = gr.Audio(label="ITO Output Audio")
-#         param_output = gr.Textbox(label="Predicted Parameters", lines=10)
-#         ito_param_output = gr.Textbox(label="ITO Predicted Parameters", lines=10)
-#         top_10_diff = gr.Textbox(label="Top 10 Parameter Differences", lines=10)
-#         ito_log = gr.Textbox(label="ITO Log", lines=20)
-#         submit_button.click(
-#             process_with_ito,
-#             inputs=[input_audio, reference_audio, perform_ito, use_same_reference, ito_reference_audio],
-#             outputs=[output_audio, ito_output_audio, param_output, ito_param_output, top_10_diff, ito_log]
-#         )
-#     with gr.Tab("YouTube URLs"):
-#         input_url = gr.Textbox(label="Input YouTube URL")
-#         reference_url = gr.Textbox(label="Reference YouTube URL")
-#         perform_ito_yt = gr.Checkbox(label="Perform ITO")
-#         with gr.Column(visible=False) as ito_options_yt:
-#             use_same_reference_yt = gr.Checkbox(label="Use same reference audio for ITO", value=True)
-#             ito_reference_url = gr.Textbox(label="ITO Reference YouTube URL", visible=False)
-#         def update_ito_options_yt(perform_ito):
-#             return gr.Column.update(visible=perform_ito)
-#         def update_ito_reference_yt(use_same):
-#             return gr.Textbox.update(visible=not use_same)
-#         perform_ito_yt.change(fn=update_ito_options_yt, inputs=perform_ito_yt, outputs=ito_options_yt)
-#         use_same_reference_yt.change(fn=update_ito_reference_yt, inputs=use_same_reference_yt, outputs=ito_reference_url)
-#         submit_button_yt = gr.Button("Process")
-#         output_audio_yt = gr.Audio(label="Output Audio")
-#         ito_output_audio_yt = gr.Audio(label="ITO Output Audio")
-#         param_output_yt = gr.Textbox(label="Predicted Parameters", lines=10)
-#         ito_param_output_yt = gr.Textbox(label="ITO Predicted Parameters", lines=10)
-#         top_10_diff_yt = gr.Textbox(label="Top 10 Parameter Differences", lines=10)
-#         ito_log_yt = gr.Textbox(label="ITO Log", lines=20)
-#         submit_button_yt.click(
-#             process_youtube_with_ito,
-#             inputs=[input_url, reference_url, perform_ito_yt, use_same_reference_yt, ito_reference_url],
-#             outputs=[output_audio_yt, ito_output_audio_yt, param_output_yt, ito_param_output_yt, top_10_diff_yt, ito_log_yt]
-#         )
-# demo.launch()

 from utils import download_youtube_audio
 from config import args
 import pyloudnorm as pyln
+import tempfile
+import os
+import matplotlib.pyplot as plt
+import io
 mastering_transfer = MasteringStyleTransfer(args)
     initial_reference_feature = mastering_transfer.get_reference_embedding(reference_tensor)
     ito_log = ""
+    loss_values = []
+    for log_entry, current_output, current_params, step, loss in mastering_transfer.inference_time_optimization(
         input_tensor, ito_reference_tensor, ito_config, initial_reference_feature
     ):
         ito_log += log_entry
         ito_param_output = mastering_transfer.get_param_output_string(current_params)
+        loss_values.append(loss)
         # Convert current_output to numpy array if it's a tensor
         if isinstance(current_output, torch.Tensor):
+            current_output = current_output.cpu().numpy()
+        # Normalize output audio
+        current_output = loudness_normalize(current_output, args.sample_rate)
         # Denormalize the audio to int16
         current_output = denormalize_audio(current_output, dtype=np.int16)
+        # Ensure the audio is in the correct shape (samples, channels)
         if current_output.ndim == 1:
             current_output = current_output.reshape(-1, 1)
         elif current_output.ndim > 2:
             current_output = current_output.squeeze()
+        yield (args.sample_rate, current_output), ito_param_output, step, ito_log, loss_values
+def plot_loss_curve(loss_values):
+    plt.figure(figsize=(10, 6))
+    plt.plot(loss_values)
+    plt.title('ITO Loss Curve')
+    plt.xlabel('Step')
+    plt.ylabel('Loss')
+    plt.grid(True)
+    buf = io.BytesIO()
+    plt.savefig(buf, format='png')
+    buf.seek(0)
+    return buf
+""" APP display """
 with gr.Blocks() as demo:
     gr.Markdown("# Mastering Style Transfer Demo")
     with gr.Row():
         with gr.Column():
             ito_output_audio = gr.Audio(label="ITO Output Audio")
+            ito_param_output = gr.Textbox(label="ITO Predicted Parameters", lines=15)
         with gr.Column():
             ito_steps_taken = gr.Number(label="ITO Steps Taken")
+            ito_loss_plot = gr.Image(label="ITO Loss Curve")
             ito_log = gr.Textbox(label="ITO Log", lines=10)
     def run_ito(input_audio, reference_audio, ito_reference_audio, num_steps, optimizer, learning_rate, af_weights):
         af_weights = [float(w.strip()) for w in af_weights.split(',')]
         ito_generator = perform_ito(
         final_log = ""
         # Iterate through the generator to get the final results
+        for audio, params, steps, log, losses in ito_generator:
             final_audio = audio
             final_params = params
             final_steps = steps
             final_log = log
+            loss_values = losses
+        loss_plot = plot_loss_curve(loss_values)
+        return final_audio, final_params, final_steps, final_log, loss_plot
     ito_button.click(
         run_ito,
         inputs=[input_audio, reference_audio, ito_reference_audio, num_steps, optimizer, learning_rate, af_weights],
+        outputs=[ito_output_audio, ito_param_output, ito_steps_taken, ito_log, ito_loss_plot]
     )
 demo.launch()