Spaces:
Running
on
Zero
Running
on
Zero
fixed gpu access problem
Browse files
app.py
CHANGED
@@ -53,23 +53,28 @@ def debug_profile(func):
|
|
53 |
return pp.profile(sort_by='cumulative', out_lines=10)(func)
|
54 |
return func
|
55 |
|
56 |
-
|
57 |
def extract_pitch(audio, unvoice=True, sr=16000, frame_shift_ms=10, log=True):
|
58 |
if not isinstance(audio, torch.Tensor):
|
59 |
audio = torch.Tensor(audio).to(device)
|
60 |
if len(audio.shape) == 1:
|
61 |
audio = audio.unsqueeze(0)
|
62 |
hop_length = int(sr * frame_shift_ms / 1000)
|
63 |
-
f0 = torchcrepe.predict(audio,
|
64 |
sr,
|
65 |
hop_length=hop_length,
|
66 |
model='tiny',
|
67 |
device=device,
|
68 |
fmin=80,
|
69 |
-
fmax=
|
|
|
|
|
70 |
)
|
71 |
|
72 |
-
|
|
|
|
|
|
|
73 |
|
74 |
def generate_pitch_reinterp(pitch, pitch_model, invert_pitch_fn, num_samples, num_steps, noise_std=0.4, t0=0.5):
|
75 |
'''Generate pitch values for the melodic reinterpretation task'''
|
@@ -100,7 +105,7 @@ def generate_audio(audio_model, f0s, invert_audio_fn, singers=[3], num_steps=100
|
|
100 |
|
101 |
return audio
|
102 |
|
103 |
-
@spaces.GPU(duration=
|
104 |
def generate(pitch, num_samples=1, num_steps=100, singers=[3], outfolder='temp', audio_seq_len=750, pitch_qt=None, type='response', invert_pitch_fn=None, t0=0.5, model_type='diffusion'):
|
105 |
global pitch_model, audio_model
|
106 |
# move the models to device
|
@@ -193,8 +198,6 @@ def container_generate(model_selection, task_selection, audio, singer_id, t0):
|
|
193 |
audio = audio[-12*16000:] # consider only last 12 s
|
194 |
f0 = extract_pitch(audio)
|
195 |
# move f0 to cpu
|
196 |
-
if f0.device != 'cpu': #TODO:
|
197 |
-
f0 = f0.cpu()
|
198 |
mic_f0 = f0.clone() # save the user input pitch values
|
199 |
logging.log(logging.INFO, 'Pitch extracted')
|
200 |
f0 = pitch_task_fn(**{
|
|
|
53 |
return pp.profile(sort_by='cumulative', out_lines=10)(func)
|
54 |
return func
|
55 |
|
56 |
+
@spaces.GPU(duration=10)
|
57 |
def extract_pitch(audio, unvoice=True, sr=16000, frame_shift_ms=10, log=True):
|
58 |
if not isinstance(audio, torch.Tensor):
|
59 |
audio = torch.Tensor(audio).to(device)
|
60 |
if len(audio.shape) == 1:
|
61 |
audio = audio.unsqueeze(0)
|
62 |
hop_length = int(sr * frame_shift_ms / 1000)
|
63 |
+
f0, periodicity = torchcrepe.predict(audio,
|
64 |
sr,
|
65 |
hop_length=hop_length,
|
66 |
model='tiny',
|
67 |
device=device,
|
68 |
fmin=80,
|
69 |
+
fmax=600,
|
70 |
+
decoder=torchcrepe.decode.viterbi,
|
71 |
+
return_periodicity=True
|
72 |
)
|
73 |
|
74 |
+
periodicity = torchcrepe.threshold.Silence(-80)(periodicity=periodicity, audio=audio, sample_rate=sr, hop_length=hop_length)
|
75 |
+
f0 = torchcrepe.threshold.At(0.4)(f0, periodicity=periodicity)
|
76 |
+
|
77 |
+
return f0.squeeze(0).cpu() # temporary hack to allow processing on cpu
|
78 |
|
79 |
def generate_pitch_reinterp(pitch, pitch_model, invert_pitch_fn, num_samples, num_steps, noise_std=0.4, t0=0.5):
|
80 |
'''Generate pitch values for the melodic reinterpretation task'''
|
|
|
105 |
|
106 |
return audio
|
107 |
|
108 |
+
@spaces.GPU(duration=10)
|
109 |
def generate(pitch, num_samples=1, num_steps=100, singers=[3], outfolder='temp', audio_seq_len=750, pitch_qt=None, type='response', invert_pitch_fn=None, t0=0.5, model_type='diffusion'):
|
110 |
global pitch_model, audio_model
|
111 |
# move the models to device
|
|
|
198 |
audio = audio[-12*16000:] # consider only last 12 s
|
199 |
f0 = extract_pitch(audio)
|
200 |
# move f0 to cpu
|
|
|
|
|
201 |
mic_f0 = f0.clone() # save the user input pitch values
|
202 |
logging.log(logging.INFO, 'Pitch extracted')
|
203 |
f0 = pitch_task_fn(**{
|