|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import torchaudio |
|
import pyworld as pw |
|
import numpy as np |
|
import torch |
|
import diffsptk |
|
import os |
|
from tqdm import tqdm |
|
import pickle |
|
import torchaudio |
|
|
|
|
|
def get_mcep_params(fs): |
|
"""Hyperparameters of transformation between SP and MCEP |
|
|
|
Reference: |
|
https://github.com/CSTR-Edinburgh/merlin/blob/master/misc/scripts/vocoder/world_v2/copy_synthesis.sh |
|
|
|
""" |
|
if fs in [44100, 48000]: |
|
fft_size = 2048 |
|
alpha = 0.77 |
|
if fs in [16000]: |
|
fft_size = 1024 |
|
alpha = 0.58 |
|
return fft_size, alpha |
|
|
|
|
|
def extract_world_features(waveform, frameshift=10): |
|
|
|
|
|
x = np.array(waveform, dtype=np.double) |
|
|
|
_f0, t = pw.dio(x, fs, frame_period=frameshift) |
|
f0 = pw.stonemask(x, _f0, t, fs) |
|
sp = pw.cheaptrick(x, f0, t, fs) |
|
ap = pw.d4c(x, f0, t, fs) |
|
|
|
return f0, sp, ap, fs |
|
|
|
|
|
def sp2mcep(x, mcsize, fs): |
|
fft_size, alpha = get_mcep_params(fs) |
|
x = torch.as_tensor(x, dtype=torch.float) |
|
|
|
tmp = diffsptk.ScalarOperation("SquareRoot")(x) |
|
tmp = diffsptk.ScalarOperation("Multiplication", 32768.0)(tmp) |
|
mgc = diffsptk.MelCepstralAnalysis( |
|
cep_order=mcsize - 1, fft_length=fft_size, alpha=alpha, n_iter=1 |
|
)(tmp) |
|
return mgc.numpy() |
|
|
|
|
|
def mcep2sp(x, mcsize, fs): |
|
fft_size, alpha = get_mcep_params(fs) |
|
x = torch.as_tensor(x, dtype=torch.float) |
|
|
|
tmp = diffsptk.MelGeneralizedCepstrumToSpectrum( |
|
alpha=alpha, |
|
cep_order=mcsize - 1, |
|
fft_length=fft_size, |
|
)(x) |
|
tmp = diffsptk.ScalarOperation("Division", 32768.0)(tmp) |
|
sp = diffsptk.ScalarOperation("Power", 2)(tmp) |
|
return sp.double().numpy() |
|
|
|
|
|
def f0_statistics(f0_features, path): |
|
print("\nF0 statistics...") |
|
|
|
total_f0 = [] |
|
for f0 in tqdm(f0_features): |
|
total_f0 += [f for f in f0 if f != 0] |
|
|
|
mean = sum(total_f0) / len(total_f0) |
|
print("Min = {}, Max = {}, Mean = {}".format(min(total_f0), max(total_f0), mean)) |
|
|
|
with open(path, "wb") as f: |
|
pickle.dump([mean, total_f0], f) |
|
|
|
|
|
def world_synthesis(f0, sp, ap, fs, frameshift): |
|
y = pw.synthesize( |
|
f0, sp, ap, fs, frame_period=frameshift |
|
) |
|
return y |
|
|