Spaces:
Running
Running
modify app
Browse files
app.py
CHANGED
@@ -9,8 +9,7 @@ from config import args
|
|
9 |
import pyloudnorm as pyln
|
10 |
import tempfile
|
11 |
import os
|
12 |
-
import
|
13 |
-
import io
|
14 |
|
15 |
mastering_transfer = MasteringStyleTransfer(args)
|
16 |
|
@@ -52,14 +51,6 @@ def process_audio(input_audio, reference_audio):
|
|
52 |
if isinstance(output_audio, torch.Tensor):
|
53 |
output_audio = output_audio.cpu().numpy()
|
54 |
|
55 |
-
# # Normalize output audio
|
56 |
-
# output_audio = loudness_normalize(output_audio, sr)
|
57 |
-
print(output_audio.shape)
|
58 |
-
print(f"sr: {sr}")
|
59 |
-
|
60 |
-
# Denormalize the audio to int16
|
61 |
-
output_audio = denormalize_audio(output_audio, dtype=np.int16)
|
62 |
-
|
63 |
if output_audio.ndim == 1:
|
64 |
output_audio = output_audio.reshape(-1, 1)
|
65 |
elif output_audio.ndim > 2:
|
@@ -68,7 +59,15 @@ def process_audio(input_audio, reference_audio):
|
|
68 |
# Ensure the audio is in the correct shape (samples, channels)
|
69 |
if output_audio.shape[1] > output_audio.shape[0]:
|
70 |
output_audio = output_audio.transpose(1,0)
|
71 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
72 |
return (sr, output_audio), param_output
|
73 |
|
74 |
def perform_ito(input_audio, reference_audio, ito_reference_audio, num_steps, optimizer, learning_rate, af_weights):
|
@@ -96,41 +95,27 @@ def perform_ito(input_audio, reference_audio, ito_reference_audio, num_steps, op
|
|
96 |
):
|
97 |
ito_log += log_entry
|
98 |
ito_param_output = mastering_transfer.get_param_output_string(current_params)
|
99 |
-
loss_values.append(loss)
|
100 |
|
101 |
# Convert current_output to numpy array if it's a tensor
|
102 |
if isinstance(current_output, torch.Tensor):
|
103 |
current_output = current_output.cpu().numpy()
|
104 |
-
|
105 |
-
# Normalize output audio
|
106 |
-
# current_output = loudness_normalize(current_output, args.sample_rate)
|
107 |
-
|
108 |
-
# Denormalize the audio to int16
|
109 |
-
current_output = denormalize_audio(current_output, dtype=np.int16)
|
110 |
-
|
111 |
if current_output.ndim == 1:
|
112 |
current_output = current_output.reshape(-1, 1)
|
113 |
elif current_output.ndim > 2:
|
114 |
current_output = current_output.squeeze()
|
115 |
-
|
116 |
# Ensure the audio is in the correct shape (samples, channels)
|
117 |
if current_output.shape[1] > current_output.shape[0]:
|
118 |
current_output = current_output.transpose(1,0)
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
plt.ylabel('Loss')
|
128 |
-
plt.grid(True)
|
129 |
-
|
130 |
-
buf = io.BytesIO()
|
131 |
-
plt.savefig(buf, format='png')
|
132 |
-
buf.seek(0)
|
133 |
-
return buf
|
134 |
|
135 |
""" APP display """
|
136 |
with gr.Blocks() as demo:
|
@@ -171,8 +156,16 @@ with gr.Blocks() as demo:
|
|
171 |
ito_param_output = gr.Textbox(label="ITO Predicted Parameters", lines=15)
|
172 |
with gr.Column():
|
173 |
ito_steps_taken = gr.Number(label="ITO Steps Taken")
|
174 |
-
ito_loss_plot = gr.Image(label="ITO Loss Curve")
|
175 |
ito_log = gr.Textbox(label="ITO Log", lines=10)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
176 |
|
177 |
def run_ito(input_audio, reference_audio, ito_reference_audio, num_steps, optimizer, learning_rate, af_weights):
|
178 |
af_weights = [float(w.strip()) for w in af_weights.split(',')]
|
@@ -185,19 +178,17 @@ with gr.Blocks() as demo:
|
|
185 |
final_params = None
|
186 |
final_steps = 0
|
187 |
final_log = ""
|
|
|
188 |
|
189 |
# Iterate through the generator to get the final results
|
190 |
-
for audio, params, steps, log,
|
191 |
final_audio = audio
|
192 |
final_params = params
|
193 |
final_steps = steps
|
194 |
final_log = log
|
195 |
-
|
196 |
|
197 |
-
|
198 |
-
print(loss_values)
|
199 |
-
|
200 |
-
return final_audio, final_params, final_steps, final_log, loss_values
|
201 |
|
202 |
ito_button.click(
|
203 |
run_ito,
|
|
|
9 |
import pyloudnorm as pyln
|
10 |
import tempfile
|
11 |
import os
|
12 |
+
import pandas as pd
|
|
|
13 |
|
14 |
mastering_transfer = MasteringStyleTransfer(args)
|
15 |
|
|
|
51 |
if isinstance(output_audio, torch.Tensor):
|
52 |
output_audio = output_audio.cpu().numpy()
|
53 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
if output_audio.ndim == 1:
|
55 |
output_audio = output_audio.reshape(-1, 1)
|
56 |
elif output_audio.ndim > 2:
|
|
|
59 |
# Ensure the audio is in the correct shape (samples, channels)
|
60 |
if output_audio.shape[1] > output_audio.shape[0]:
|
61 |
output_audio = output_audio.transpose(1,0)
|
62 |
+
|
63 |
+
print(output_audio.shape)
|
64 |
+
print(f"sr: {sr}")
|
65 |
+
|
66 |
+
# Normalize output audio
|
67 |
+
output_audio = loudness_normalize(output_audio, sr)
|
68 |
+
# Denormalize the audio to int16
|
69 |
+
output_audio = denormalize_audio(output_audio, dtype=np.int16)
|
70 |
+
|
71 |
return (sr, output_audio), param_output
|
72 |
|
73 |
def perform_ito(input_audio, reference_audio, ito_reference_audio, num_steps, optimizer, learning_rate, af_weights):
|
|
|
95 |
):
|
96 |
ito_log += log_entry
|
97 |
ito_param_output = mastering_transfer.get_param_output_string(current_params)
|
98 |
+
loss_values.append({"step": step, "loss": loss})
|
99 |
|
100 |
# Convert current_output to numpy array if it's a tensor
|
101 |
if isinstance(current_output, torch.Tensor):
|
102 |
current_output = current_output.cpu().numpy()
|
103 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
104 |
if current_output.ndim == 1:
|
105 |
current_output = current_output.reshape(-1, 1)
|
106 |
elif current_output.ndim > 2:
|
107 |
current_output = current_output.squeeze()
|
|
|
108 |
# Ensure the audio is in the correct shape (samples, channels)
|
109 |
if current_output.shape[1] > current_output.shape[0]:
|
110 |
current_output = current_output.transpose(1,0)
|
111 |
+
|
112 |
+
# Loudness normalize output audio
|
113 |
+
current_output = loudness_normalize(current_output, args.sample_rate)
|
114 |
+
# Denormalize the audio to int16
|
115 |
+
current_output = denormalize_audio(current_output, dtype=np.int16)
|
116 |
+
|
117 |
+
yield (args.sample_rate, current_output), ito_param_output, step, ito_log, pd.DataFrame(loss_values)
|
118 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
119 |
|
120 |
""" APP display """
|
121 |
with gr.Blocks() as demo:
|
|
|
156 |
ito_param_output = gr.Textbox(label="ITO Predicted Parameters", lines=15)
|
157 |
with gr.Column():
|
158 |
ito_steps_taken = gr.Number(label="ITO Steps Taken")
|
|
|
159 |
ito_log = gr.Textbox(label="ITO Log", lines=10)
|
160 |
+
ito_loss_plot = gr.LinePlot(
|
161 |
+
x="step",
|
162 |
+
y="loss",
|
163 |
+
title="ITO Loss Curve",
|
164 |
+
x_title="Step",
|
165 |
+
y_title="Loss",
|
166 |
+
height=400,
|
167 |
+
width=600,
|
168 |
+
)
|
169 |
|
170 |
def run_ito(input_audio, reference_audio, ito_reference_audio, num_steps, optimizer, learning_rate, af_weights):
|
171 |
af_weights = [float(w.strip()) for w in af_weights.split(',')]
|
|
|
178 |
final_params = None
|
179 |
final_steps = 0
|
180 |
final_log = ""
|
181 |
+
loss_df = None
|
182 |
|
183 |
# Iterate through the generator to get the final results
|
184 |
+
for audio, params, steps, log, loss_data in ito_generator:
|
185 |
final_audio = audio
|
186 |
final_params = params
|
187 |
final_steps = steps
|
188 |
final_log = log
|
189 |
+
loss_df = loss_data
|
190 |
|
191 |
+
return final_audio, final_params, final_steps, final_log, loss_df
|
|
|
|
|
|
|
192 |
|
193 |
ito_button.click(
|
194 |
run_ito,
|