jhtonyKoo commited on
Commit
d48a45a
1 Parent(s): 7d7bb34

modify app

Browse files
Files changed (1) hide show
  1. app.py +24 -1
app.py CHANGED
@@ -21,6 +21,21 @@ def denormalize_audio(audio, dtype=np.int16):
21
  else:
22
  raise ValueError("Unsupported dtype. Use np.int16 or np.float32.")
23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  def process_audio(input_audio, reference_audio):
25
  output_audio, predicted_params, _, _, _, sr = mastering_transfer.process_audio(
26
  input_audio, reference_audio, reference_audio, {}, False
@@ -31,6 +46,9 @@ def process_audio(input_audio, reference_audio):
31
  # Convert output_audio to numpy array if it's a tensor
32
  if isinstance(output_audio, torch.Tensor):
33
  output_audio = output_audio.cpu().numpy()
 
 
 
34
 
35
  # Denormalize the audio to int16
36
  output_audio = denormalize_audio(output_audio, dtype=np.int16)
@@ -78,10 +96,13 @@ def perform_ito(input_audio, reference_audio, ito_reference_audio, num_steps, op
78
  if isinstance(current_output, torch.Tensor):
79
  current_output = current_output.detach().cpu().numpy()
80
 
 
 
 
81
  # Denormalize the audio to int16
82
  current_output = denormalize_audio(current_output, dtype=np.int16)
83
 
84
- if output_audio.ndim == 1:
85
  current_output = current_output.reshape(-1, 1)
86
  elif current_output.ndim > 2:
87
  current_output = current_output.squeeze()
@@ -163,6 +184,8 @@ with gr.Blocks() as demo:
163
  demo.launch()
164
 
165
 
 
 
166
  # import gradio as gr
167
  # import torch
168
  # import soundfile as sf
 
21
  else:
22
  raise ValueError("Unsupported dtype. Use np.int16 or np.float32.")
23
 
24
+ def loudness_normalize(audio, sample_rate, target_loudness=-12.0):
25
+ # Ensure audio is float32
26
+ if audio.dtype != np.float32:
27
+ audio = audio.astype(np.float32)
28
+
29
+ # If audio is mono, reshape to (samples, 1)
30
+ if audio.ndim == 1:
31
+ audio = audio.reshape(-1, 1)
32
+
33
+ meter = pyln.Meter(sample_rate) # create BS.1770 meter
34
+ loudness = meter.integrated_loudness(audio)
35
+
36
+ loudness_normalized_audio = pyln.normalize.loudness(audio, loudness, target_loudness)
37
+ return loudness_normalized_audio
38
+
39
  def process_audio(input_audio, reference_audio):
40
  output_audio, predicted_params, _, _, _, sr = mastering_transfer.process_audio(
41
  input_audio, reference_audio, reference_audio, {}, False
 
46
  # Convert output_audio to numpy array if it's a tensor
47
  if isinstance(output_audio, torch.Tensor):
48
  output_audio = output_audio.cpu().numpy()
49
+
50
+ # Normalize output audio
51
+ output_audio = loudness_normalize(output_audio, sr)
52
 
53
  # Denormalize the audio to int16
54
  output_audio = denormalize_audio(output_audio, dtype=np.int16)
 
96
  if isinstance(current_output, torch.Tensor):
97
  current_output = current_output.detach().cpu().numpy()
98
 
99
+ # Normalize output audio
100
+ current_output = loudness_normalize(current_output, args.sample_rate)
101
+
102
  # Denormalize the audio to int16
103
  current_output = denormalize_audio(current_output, dtype=np.int16)
104
 
105
+ if current_output.ndim == 1:
106
  current_output = current_output.reshape(-1, 1)
107
  elif current_output.ndim > 2:
108
  current_output = current_output.squeeze()
 
184
  demo.launch()
185
 
186
 
187
+
188
+
189
  # import gradio as gr
190
  # import torch
191
  # import soundfile as sf