|
|
|
|
|
|
|
|
|
|
|
import torch |
|
import librosa |
|
|
|
from utils.util import JsonHParams |
|
from utils.f0 import get_f0_features_using_parselmouth, get_pitch_sub_median |
|
from utils.mel import extract_mel_features |
|
|
|
|
|
def extract_spr( |
|
audio, |
|
fs=None, |
|
hop_length=256, |
|
win_length=1024, |
|
n_fft=1024, |
|
n_mels=128, |
|
f0_min=37, |
|
f0_max=1000, |
|
pitch_bin=256, |
|
pitch_max=1100.0, |
|
pitch_min=50.0, |
|
): |
|
"""Compute Singing Power Ratio (SPR) from a given audio. |
|
audio: path to the audio. |
|
fs: sampling rate. |
|
hop_length: hop length. |
|
win_length: window length. |
|
n_mels: number of mel filters. |
|
f0_min: lower limit for f0. |
|
f0_max: upper limit for f0. |
|
pitch_bin: number of bins for f0 quantization. |
|
pitch_max: upper limit for f0 quantization. |
|
pitch_min: lower limit for f0 quantization. |
|
""" |
|
|
|
if fs != None: |
|
audio, _ = librosa.load(audio, sr=fs) |
|
else: |
|
audio, fs = librosa.load(audio) |
|
audio = torch.from_numpy(audio) |
|
|
|
|
|
cfg = JsonHParams() |
|
cfg.sample_rate = fs |
|
cfg.hop_size = hop_length |
|
cfg.win_size = win_length |
|
cfg.n_fft = n_fft |
|
cfg.n_mel = n_mels |
|
cfg.f0_min = f0_min |
|
cfg.f0_max = f0_max |
|
cfg.pitch_bin = pitch_bin |
|
cfg.pitch_max = pitch_max |
|
cfg.pitch_min = pitch_min |
|
|
|
|
|
|
|
cfg.fmin = 2000 |
|
cfg.fmax = 4000 |
|
|
|
mel1 = extract_mel_features( |
|
y=audio.unsqueeze(0), |
|
cfg=cfg, |
|
).squeeze(0) |
|
|
|
cfg.fmin = 0 |
|
cfg.fmax = 2000 |
|
|
|
mel2 = extract_mel_features( |
|
y=audio.unsqueeze(0), |
|
cfg=cfg, |
|
).squeeze(0) |
|
|
|
f0 = get_f0_features_using_parselmouth( |
|
audio, |
|
cfg, |
|
) |
|
|
|
|
|
length = min(len(f0), mel1.shape[-1]) |
|
f0 = f0[:length] |
|
mel1 = mel1[:, :length] |
|
mel2 = mel2[:, :length] |
|
|
|
|
|
res = [] |
|
|
|
for i in range(mel1.shape[-1]): |
|
if f0[i] <= 1: |
|
continue |
|
|
|
chunk1 = mel1[:, i] |
|
chunk2 = mel2[:, i] |
|
|
|
max1 = max(chunk1.numpy()) |
|
max2 = max(chunk2.numpy()) |
|
|
|
tmp_res = max2 - max1 |
|
|
|
res.append(tmp_res) |
|
|
|
if len(res) == 0: |
|
return False |
|
else: |
|
return sum(res) / len(res) |
|
|