Spaces:
Running
Running
modify app
Browse files
app.py
CHANGED
@@ -21,6 +21,21 @@ def denormalize_audio(audio, dtype=np.int16):
|
|
21 |
else:
|
22 |
raise ValueError("Unsupported dtype. Use np.int16 or np.float32.")
|
23 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
def process_audio(input_audio, reference_audio):
|
25 |
output_audio, predicted_params, _, _, _, sr = mastering_transfer.process_audio(
|
26 |
input_audio, reference_audio, reference_audio, {}, False
|
@@ -31,6 +46,9 @@ def process_audio(input_audio, reference_audio):
|
|
31 |
# Convert output_audio to numpy array if it's a tensor
|
32 |
if isinstance(output_audio, torch.Tensor):
|
33 |
output_audio = output_audio.cpu().numpy()
|
|
|
|
|
|
|
34 |
|
35 |
# Denormalize the audio to int16
|
36 |
output_audio = denormalize_audio(output_audio, dtype=np.int16)
|
@@ -78,10 +96,13 @@ def perform_ito(input_audio, reference_audio, ito_reference_audio, num_steps, op
|
|
78 |
if isinstance(current_output, torch.Tensor):
|
79 |
current_output = current_output.detach().cpu().numpy()
|
80 |
|
|
|
|
|
|
|
81 |
# Denormalize the audio to int16
|
82 |
current_output = denormalize_audio(current_output, dtype=np.int16)
|
83 |
|
84 |
-
if
|
85 |
current_output = current_output.reshape(-1, 1)
|
86 |
elif current_output.ndim > 2:
|
87 |
current_output = current_output.squeeze()
|
@@ -163,6 +184,8 @@ with gr.Blocks() as demo:
|
|
163 |
demo.launch()
|
164 |
|
165 |
|
|
|
|
|
166 |
# import gradio as gr
|
167 |
# import torch
|
168 |
# import soundfile as sf
|
|
|
21 |
else:
|
22 |
raise ValueError("Unsupported dtype. Use np.int16 or np.float32.")
|
23 |
|
24 |
+
def loudness_normalize(audio, sample_rate, target_loudness=-12.0):
|
25 |
+
# Ensure audio is float32
|
26 |
+
if audio.dtype != np.float32:
|
27 |
+
audio = audio.astype(np.float32)
|
28 |
+
|
29 |
+
# If audio is mono, reshape to (samples, 1)
|
30 |
+
if audio.ndim == 1:
|
31 |
+
audio = audio.reshape(-1, 1)
|
32 |
+
|
33 |
+
meter = pyln.Meter(sample_rate) # create BS.1770 meter
|
34 |
+
loudness = meter.integrated_loudness(audio)
|
35 |
+
|
36 |
+
loudness_normalized_audio = pyln.normalize.loudness(audio, loudness, target_loudness)
|
37 |
+
return loudness_normalized_audio
|
38 |
+
|
39 |
def process_audio(input_audio, reference_audio):
|
40 |
output_audio, predicted_params, _, _, _, sr = mastering_transfer.process_audio(
|
41 |
input_audio, reference_audio, reference_audio, {}, False
|
|
|
46 |
# Convert output_audio to numpy array if it's a tensor
|
47 |
if isinstance(output_audio, torch.Tensor):
|
48 |
output_audio = output_audio.cpu().numpy()
|
49 |
+
|
50 |
+
# Normalize output audio
|
51 |
+
output_audio = loudness_normalize(output_audio, sr)
|
52 |
|
53 |
# Denormalize the audio to int16
|
54 |
output_audio = denormalize_audio(output_audio, dtype=np.int16)
|
|
|
96 |
if isinstance(current_output, torch.Tensor):
|
97 |
current_output = current_output.detach().cpu().numpy()
|
98 |
|
99 |
+
# Normalize output audio
|
100 |
+
current_output = loudness_normalize(current_output, args.sample_rate)
|
101 |
+
|
102 |
# Denormalize the audio to int16
|
103 |
current_output = denormalize_audio(current_output, dtype=np.int16)
|
104 |
|
105 |
+
if current_output.ndim == 1:
|
106 |
current_output = current_output.reshape(-1, 1)
|
107 |
elif current_output.ndim > 2:
|
108 |
current_output = current_output.squeeze()
|
|
|
184 |
demo.launch()
|
185 |
|
186 |
|
187 |
+
|
188 |
+
|
189 |
# import gradio as gr
|
190 |
# import torch
|
191 |
# import soundfile as sf
|