ZeroRVC / zerorvc /f0 /extractor.py
github-actions[bot]
Sync to HuggingFace Spaces
f80c5ec
raw
history blame
No virus
1.76 kB
import logging
import numpy as np
import librosa
from .rmvpe import RMVPE
from ..constants import SR_16K
logger = logging.getLogger(__name__)
class F0Extractor:
def __init__(
self,
rmvpe: RMVPE = None,
sr=SR_16K,
f0_bin=256,
f0_max=1100.0,
f0_min=50.0,
):
self.sr = sr
self.f0_bin = f0_bin
self.f0_max = f0_max
self.f0_min = f0_min
self.f0_mel_min = 1127 * np.log(1 + f0_min / 700)
self.f0_mel_max = 1127 * np.log(1 + f0_max / 700)
if rmvpe is not None:
self.load(rmvpe)
def load(self, rmvpe: RMVPE):
self.rmvpe = rmvpe
self.device = next(rmvpe.parameters()).device
logger.info(f"RMVPE model is on {self.device}")
def is_loaded(self) -> bool:
return hasattr(self, "rmvpe")
def calculate_f0_from_f0nsf(self, f0nsf: np.ndarray):
f0_mel = 1127 * np.log(1 + f0nsf / 700)
f0_mel[f0_mel > 0] = (f0_mel[f0_mel > 0] - self.f0_mel_min) * (
self.f0_bin - 2
) / (self.f0_mel_max - self.f0_mel_min) + 1
# use 0 or 1
f0_mel[f0_mel <= 1] = 1
f0_mel[f0_mel > self.f0_bin - 1] = self.f0_bin - 1
f0 = np.rint(f0_mel).astype(int)
assert f0.max() <= 255 and f0.min() >= 1, (
f0.max(),
f0.min(),
)
return f0
def extract_f0_from(self, y: np.ndarray, modification=0.0):
f0nsf = self.rmvpe.infer_from_audio(y, thred=0.03)
f0nsf *= pow(2, modification / 12)
f0 = self.calculate_f0_from_f0nsf(f0nsf)
return f0nsf, f0
def extract_f0(self, wav_file: str):
y, _ = librosa.load(wav_file, sr=self.sr)
return self.extract_f0_from(y)