Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
fix vc bugs
Browse files
app.py
CHANGED
@@ -2,6 +2,7 @@ import os
|
|
2 |
|
3 |
os.system('cd monotonic_align && python setup.py build_ext --inplace && cd ..')
|
4 |
|
|
|
5 |
import numpy as np
|
6 |
import torch
|
7 |
from torch import no_grad, LongTensor
|
@@ -34,9 +35,13 @@ def tts_fn(text, speaker_id):
|
|
34 |
|
35 |
def vc_fn(original_speaker_id, target_speaker_id, input_audio):
|
36 |
sampling_rate, audio = input_audio
|
37 |
-
|
|
|
|
|
|
|
|
|
|
|
38 |
y = y.unsqueeze(0)
|
39 |
-
|
40 |
spec = spectrogram_torch(y, hps.data.filter_length,
|
41 |
hps.data.sampling_rate, hps.data.hop_length, hps.data.win_length,
|
42 |
center=False)
|
|
|
2 |
|
3 |
os.system('cd monotonic_align && python setup.py build_ext --inplace && cd ..')
|
4 |
|
5 |
+
import librosa
|
6 |
import numpy as np
|
7 |
import torch
|
8 |
from torch import no_grad, LongTensor
|
|
|
35 |
|
36 |
def vc_fn(original_speaker_id, target_speaker_id, input_audio):
|
37 |
sampling_rate, audio = input_audio
|
38 |
+
audio = (audio / np.iinfo(audio.dtype).max).astype(np.float32)
|
39 |
+
if len(audio.shape) > 1:
|
40 |
+
audio = librosa.to_mono(audio.transpose(1, 0))
|
41 |
+
if sampling_rate != hps.data.sampling_rate:
|
42 |
+
audio = librosa.resample(audio, orig_sr=sampling_rate, target_sr=hps.data.sampling_rate)
|
43 |
+
y = torch.FloatTensor(audio)
|
44 |
y = y.unsqueeze(0)
|
|
|
45 |
spec = spectrogram_torch(y, hps.data.filter_length,
|
46 |
hps.data.sampling_rate, hps.data.hop_length, hps.data.win_length,
|
47 |
center=False)
|