Spaces:
Running
Running
modify app
Browse files- inference.py +19 -2
- requirements.txt +2 -1
inference.py
CHANGED
@@ -4,6 +4,7 @@ import numpy as np
|
|
4 |
import argparse
|
5 |
import os
|
6 |
import yaml
|
|
|
7 |
|
8 |
import sys
|
9 |
currentdir = os.path.dirname(os.path.realpath(__file__))
|
@@ -11,6 +12,15 @@ sys.path.append(os.path.dirname(currentdir))
|
|
11 |
from networks import Dasp_Mastering_Style_Transfer, Effects_Encoder
|
12 |
from modules.loss import AudioFeatureLoss, Loss
|
13 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
class MasteringStyleTransfer:
|
15 |
def __init__(self, args):
|
16 |
self.args = args
|
@@ -105,8 +115,7 @@ class MasteringStyleTransfer:
|
|
105 |
return min_loss_output, min_loss_params, min_loss_embedding, min_loss_step + 1
|
106 |
|
107 |
def process_audio(self, input_audio, reference_audio, ito_reference_audio, params, perform_ito, log_ito=False):
|
108 |
-
|
109 |
-
input_audio, reference_audio, ito_reference_audio = [
|
110 |
np.stack([audio, audio]) if audio.ndim == 1 else audio.transpose(1,0)
|
111 |
for audio in [input_audio, reference_audio, ito_reference_audio]
|
112 |
]
|
@@ -115,6 +124,14 @@ class MasteringStyleTransfer:
|
|
115 |
reference_tensor = torch.FloatTensor(reference_audio).unsqueeze(0).to(self.device)
|
116 |
ito_reference_tensor = torch.FloatTensor(ito_reference_audio).unsqueeze(0).to(self.device)
|
117 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
118 |
reference_feature = self.get_reference_embedding(reference_tensor)
|
119 |
|
120 |
output_audio, predicted_params = self.mastering_style_transfer(input_tensor, reference_feature)
|
|
|
4 |
import argparse
|
5 |
import os
|
6 |
import yaml
|
7 |
+
import julius
|
8 |
|
9 |
import sys
|
10 |
currentdir = os.path.dirname(os.path.realpath(__file__))
|
|
|
12 |
from networks import Dasp_Mastering_Style_Transfer, Effects_Encoder
|
13 |
from modules.loss import AudioFeatureLoss, Loss
|
14 |
|
15 |
+
|
16 |
+
def convert_audio(wav: torch.Tensor, from_rate: float,
|
17 |
+
to_rate: float, to_channels: int) -> torch.Tensor:
|
18 |
+
"""Convert audio to new sample rate and number of audio channels.
|
19 |
+
"""
|
20 |
+
wav = julius.resample_frac(wav, int(from_rate), int(to_rate))
|
21 |
+
wav = convert_audio_channels(wav, to_channels)
|
22 |
+
return wav
|
23 |
+
|
24 |
class MasteringStyleTransfer:
|
25 |
def __init__(self, args):
|
26 |
self.args = args
|
|
|
115 |
return min_loss_output, min_loss_params, min_loss_embedding, min_loss_step + 1
|
116 |
|
117 |
def process_audio(self, input_audio, reference_audio, ito_reference_audio, params, perform_ito, log_ito=False):
|
118 |
+
input_audio[1], reference_audio[1], ito_reference_audio[1] = [
|
|
|
119 |
np.stack([audio, audio]) if audio.ndim == 1 else audio.transpose(1,0)
|
120 |
for audio in [input_audio, reference_audio, ito_reference_audio]
|
121 |
]
|
|
|
124 |
reference_tensor = torch.FloatTensor(reference_audio).unsqueeze(0).to(self.device)
|
125 |
ito_reference_tensor = torch.FloatTensor(ito_reference_audio).unsqueeze(0).to(self.device)
|
126 |
|
127 |
+
#resample to 44.1kHz if necessary
|
128 |
+
if input_audio[0] != self.args.sample_rate:
|
129 |
+
input_tensor = convert_audio(input_tensor, input_audio[0], self.args.sample_rate, 2)
|
130 |
+
if reference_audio[0] != self.args.sample_rate:
|
131 |
+
reference_tensor = convert_audio(reference_tensor, reference_audio[0], self.args.sample_rate, 2)
|
132 |
+
if ito_reference_audio[0] != self.args.sample_rate:
|
133 |
+
ito_reference_tensor = convert_audio(ito_reference_tensor, ito_reference_audio[0], self.args.sample_rate, 2)
|
134 |
+
|
135 |
reference_feature = self.get_reference_embedding(reference_tensor)
|
136 |
|
137 |
output_audio, predicted_params = self.mastering_style_transfer(input_tensor, reference_feature)
|
requirements.txt
CHANGED
@@ -9,4 +9,5 @@ numba==0.58.1
|
|
9 |
auraloss==0.4.0
|
10 |
dasp-pytorch==0.0.1
|
11 |
torchcomp==0.1.3
|
12 |
-
pytorch-lightning==2.4.0
|
|
|
|
9 |
auraloss==0.4.0
|
10 |
dasp-pytorch==0.0.1
|
11 |
torchcomp==0.1.3
|
12 |
+
pytorch-lightning==2.4.0
|
13 |
+
julius==0.2.7
|