snnithya commited on
Commit
41c633a
1 Parent(s): 4fc6f5b

fixed gpu access problem

Browse files
Files changed (1) hide show
  1. app.py +10 -7
app.py CHANGED
@@ -53,23 +53,28 @@ def debug_profile(func):
53
  return pp.profile(sort_by='cumulative', out_lines=10)(func)
54
  return func
55
 
56
-
57
  def extract_pitch(audio, unvoice=True, sr=16000, frame_shift_ms=10, log=True):
58
  if not isinstance(audio, torch.Tensor):
59
  audio = torch.Tensor(audio).to(device)
60
  if len(audio.shape) == 1:
61
  audio = audio.unsqueeze(0)
62
  hop_length = int(sr * frame_shift_ms / 1000)
63
- f0 = torchcrepe.predict(audio,
64
  sr,
65
  hop_length=hop_length,
66
  model='tiny',
67
  device=device,
68
  fmin=80,
69
- fmax=800
 
 
70
  )
71
 
72
- return f0.squeeze(0)
 
 
 
73
 
74
  def generate_pitch_reinterp(pitch, pitch_model, invert_pitch_fn, num_samples, num_steps, noise_std=0.4, t0=0.5):
75
  '''Generate pitch values for the melodic reinterpretation task'''
@@ -100,7 +105,7 @@ def generate_audio(audio_model, f0s, invert_audio_fn, singers=[3], num_steps=100
100
 
101
  return audio
102
 
103
- @spaces.GPU(duration=30)
104
  def generate(pitch, num_samples=1, num_steps=100, singers=[3], outfolder='temp', audio_seq_len=750, pitch_qt=None, type='response', invert_pitch_fn=None, t0=0.5, model_type='diffusion'):
105
  global pitch_model, audio_model
106
  # move the models to device
@@ -193,8 +198,6 @@ def container_generate(model_selection, task_selection, audio, singer_id, t0):
193
  audio = audio[-12*16000:] # consider only last 12 s
194
  f0 = extract_pitch(audio)
195
  # move f0 to cpu
196
- if f0.device != 'cpu': #TODO:
197
- f0 = f0.cpu()
198
  mic_f0 = f0.clone() # save the user input pitch values
199
  logging.log(logging.INFO, 'Pitch extracted')
200
  f0 = pitch_task_fn(**{
 
53
  return pp.profile(sort_by='cumulative', out_lines=10)(func)
54
  return func
55
 
56
+ @spaces.GPU(duration=10)
57
  def extract_pitch(audio, unvoice=True, sr=16000, frame_shift_ms=10, log=True):
58
  if not isinstance(audio, torch.Tensor):
59
  audio = torch.Tensor(audio).to(device)
60
  if len(audio.shape) == 1:
61
  audio = audio.unsqueeze(0)
62
  hop_length = int(sr * frame_shift_ms / 1000)
63
+ f0, periodicity = torchcrepe.predict(audio,
64
  sr,
65
  hop_length=hop_length,
66
  model='tiny',
67
  device=device,
68
  fmin=80,
69
+ fmax=600,
70
+ decoder=torchcrepe.decode.viterbi,
71
+ return_periodicity=True
72
  )
73
 
74
+ periodicity = torchcrepe.threshold.Silence(-80)(periodicity=periodicity, audio=audio, sample_rate=sr, hop_length=hop_length)
75
+ f0 = torchcrepe.threshold.At(0.4)(f0, periodicity=periodicity)
76
+
77
+ return f0.squeeze(0).cpu() # temporary hack to allow processing on cpu
78
 
79
  def generate_pitch_reinterp(pitch, pitch_model, invert_pitch_fn, num_samples, num_steps, noise_std=0.4, t0=0.5):
80
  '''Generate pitch values for the melodic reinterpretation task'''
 
105
 
106
  return audio
107
 
108
+ @spaces.GPU(duration=10)
109
  def generate(pitch, num_samples=1, num_steps=100, singers=[3], outfolder='temp', audio_seq_len=750, pitch_qt=None, type='response', invert_pitch_fn=None, t0=0.5, model_type='diffusion'):
110
  global pitch_model, audio_model
111
  # move the models to device
 
198
  audio = audio[-12*16000:] # consider only last 12 s
199
  f0 = extract_pitch(audio)
200
  # move f0 to cpu
 
 
201
  mic_f0 = f0.clone() # save the user input pitch values
202
  logging.log(logging.INFO, 'Pitch extracted')
203
  f0 = pitch_task_fn(**{