jhtonyKoo commited on
Commit
6d70884
1 Parent(s): a8cb9ce

modify app

Browse files
Files changed (2) hide show
  1. app.py +17 -3
  2. inference.py +0 -6
app.py CHANGED
@@ -9,12 +9,21 @@ from config import args
9
 
10
  mastering_transfer = MasteringStyleTransfer(args)
11
 
 
 
 
 
 
 
 
 
 
 
 
12
  def process_audio(input_audio, reference_audio):
13
- print("before style transfer")
14
  output_audio, predicted_params, _, _, _, sr = mastering_transfer.process_audio(
15
  input_audio, reference_audio, reference_audio, {}, False
16
  )
17
- print("style transfer completed")
18
 
19
  param_output = mastering_transfer.get_param_output_string(predicted_params)
20
 
@@ -22,13 +31,15 @@ def process_audio(input_audio, reference_audio):
22
  if isinstance(output_audio, torch.Tensor):
23
  output_audio = output_audio.cpu().numpy()
24
 
 
 
 
25
  # Ensure the audio is in the correct shape (samples, channels)
26
  if output_audio.ndim == 1:
27
  output_audio = output_audio.reshape(-1, 1)
28
  elif output_audio.ndim > 2:
29
  output_audio = output_audio.squeeze()
30
 
31
- print("returning result")
32
  return (sr, output_audio), param_output
33
 
34
  def perform_ito(input_audio, reference_audio, ito_reference_audio, num_steps, optimizer, learning_rate, af_weights):
@@ -60,6 +71,9 @@ def perform_ito(input_audio, reference_audio, ito_reference_audio, num_steps, op
60
  if isinstance(current_output, torch.Tensor):
61
  current_output = current_output.cpu().numpy()
62
 
 
 
 
63
  # Ensure the audio is in the correct shape (samples, channels)
64
  if current_output.ndim == 1:
65
  current_output = current_output.reshape(-1, 1)
 
9
 
10
  mastering_transfer = MasteringStyleTransfer(args)
11
 
12
+ def denormalize_audio(audio, dtype=np.int16):
13
+ """
14
+ Denormalize the audio from the range [-1, 1] to the full range of the specified dtype.
15
+ """
16
+ if dtype == np.int16:
17
+ return (audio * 32767).astype(np.int16)
18
+ elif dtype == np.float32:
19
+ return audio.astype(np.float32)
20
+ else:
21
+ raise ValueError("Unsupported dtype. Use np.int16 or np.float32.")
22
+
23
  def process_audio(input_audio, reference_audio):
 
24
  output_audio, predicted_params, _, _, _, sr = mastering_transfer.process_audio(
25
  input_audio, reference_audio, reference_audio, {}, False
26
  )
 
27
 
28
  param_output = mastering_transfer.get_param_output_string(predicted_params)
29
 
 
31
  if isinstance(output_audio, torch.Tensor):
32
  output_audio = output_audio.cpu().numpy()
33
 
34
+ # Denormalize the audio to int16
35
+ output_audio = denormalize_audio(output_audio, dtype=np.int16)
36
+
37
  # Ensure the audio is in the correct shape (samples, channels)
38
  if output_audio.ndim == 1:
39
  output_audio = output_audio.reshape(-1, 1)
40
  elif output_audio.ndim > 2:
41
  output_audio = output_audio.squeeze()
42
 
 
43
  return (sr, output_audio), param_output
44
 
45
  def perform_ito(input_audio, reference_audio, ito_reference_audio, num_steps, optimizer, learning_rate, af_weights):
 
71
  if isinstance(current_output, torch.Tensor):
72
  current_output = current_output.cpu().numpy()
73
 
74
+ # Denormalize the audio to int16
75
+ current_output = denormalize_audio(current_output, dtype=np.int16)
76
+
77
  # Ensure the audio is in the correct shape (samples, channels)
78
  if current_output.ndim == 1:
79
  current_output = current_output.reshape(-1, 1)
inference.py CHANGED
@@ -155,20 +155,14 @@ class MasteringStyleTransfer:
155
  return data_tensor.to(self.device)
156
 
157
  def process_audio(self, input_audio, reference_audio, ito_reference_audio, params, perform_ito, log_ito=False):
158
- print('run preprocess')
159
  input_tensor = self.preprocess_audio(input_audio, self.args.sample_rate)
160
  reference_tensor = self.preprocess_audio(reference_audio, self.args.sample_rate)
161
  ito_reference_tensor = self.preprocess_audio(ito_reference_audio, self.args.sample_rate)
162
- print('preprocess done')
163
 
164
  reference_feature = self.get_reference_embedding(reference_tensor)
165
 
166
- print('reference extracted')
167
-
168
  output_audio, predicted_params = self.mastering_style_transfer(input_tensor, reference_feature)
169
 
170
- print('style transfer forward done')
171
-
172
  if perform_ito:
173
  ito_log = []
174
  for i in range(self.args.max_iter_ito):
 
155
  return data_tensor.to(self.device)
156
 
157
  def process_audio(self, input_audio, reference_audio, ito_reference_audio, params, perform_ito, log_ito=False):
 
158
  input_tensor = self.preprocess_audio(input_audio, self.args.sample_rate)
159
  reference_tensor = self.preprocess_audio(reference_audio, self.args.sample_rate)
160
  ito_reference_tensor = self.preprocess_audio(ito_reference_audio, self.args.sample_rate)
 
161
 
162
  reference_feature = self.get_reference_embedding(reference_tensor)
163
 
 
 
164
  output_audio, predicted_params = self.mastering_style_transfer(input_tensor, reference_feature)
165
 
 
 
166
  if perform_ito:
167
  ito_log = []
168
  for i in range(self.args.max_iter_ito):