MashiroSA
feat: init
b87af40
raw
history blame
4.16 kB
#include "F0Preprocess.hpp"
void F0PreProcess::compute_f0(const double* audio, int64_t len)
{
DioOption Doption;
InitializeDioOption(&Doption);
Doption.f0_ceil = 800;
Doption.frame_period = 1000.0 * hop / fs;
f0Len = GetSamplesForDIO(fs, (int)len, Doption.frame_period);
const auto tp = new double[f0Len];
const auto tmpf0 = new double[f0Len];
rf0 = new double[f0Len];
Dio(audio, (int)len, fs, &Doption, tp, tmpf0);
StoneMask(audio, (int)len, fs, tp, tmpf0, (int)f0Len, rf0);
delete[] tmpf0;
delete[] tp;
}
std::vector<double> arange(double start,double end,double step = 1.0,double div = 1.0)
{
std::vector<double> output;
while(start<end)
{
output.push_back(start / div);
start += step;
}
return output;
}
void F0PreProcess::InterPf0(int64_t len)
{
const auto xi = arange(0.0, (double)f0Len * (double)len, (double)f0Len, (double)len);
const auto tmp = new double[xi.size() + 1];
interp1(arange(0, (double)f0Len).data(), rf0, static_cast<int>(f0Len), xi.data(), (int)xi.size(), tmp);
for (size_t i = 0; i < xi.size(); i++)
if (isnan(tmp[i]))
tmp[i] = 0.0;
delete[] rf0;
rf0 = nullptr;
rf0 = tmp;
f0Len = (int64_t)xi.size();
}
long long* F0PreProcess::f0Log()
{
const auto tmp = new long long[f0Len];
const auto f0_mel = new double[f0Len];
for (long long i = 0; i < f0Len; i++)
{
f0_mel[i] = 1127 * log(1.0 + rf0[i] / 700.0);
if (f0_mel[i] > 0.0)
f0_mel[i] = (f0_mel[i] - f0_mel_min) * (f0_bin - 2.0) / (f0_mel_max - f0_mel_min) + 1.0;
if (f0_mel[i] < 1.0)
f0_mel[i] = 1;
if (f0_mel[i] > f0_bin - 1)
f0_mel[i] = f0_bin - 1;
tmp[i] = (long long)round(f0_mel[i]);
}
delete[] f0_mel;
delete[] rf0;
rf0 = nullptr;
return tmp;
}
std::vector<long long> F0PreProcess::GetF0AndOtherInput(const double* audio, int64_t audioLen, int64_t hubLen, int64_t tran)
{
compute_f0(audio, audioLen);
for (int64_t i = 0; i < f0Len; ++i)
{
rf0[i] = rf0[i] * pow(2.0, static_cast<double>(tran) / 12.0);
if (rf0[i] < 0.001)
rf0[i] = NAN;
}
InterPf0(hubLen);
const auto O0f = f0Log();
std::vector<long long> Of0(O0f, O0f + f0Len);
delete[] O0f;
return Of0;
}
std::vector<long long> getAligments(size_t specLen, size_t hubertLen)
{
std::vector<long long> mel2ph(specLen + 1, 0);
size_t startFrame = 0;
const double ph_durs = static_cast<double>(specLen) / static_cast<double>(hubertLen);
for (size_t iph = 0; iph < hubertLen; ++iph)
{
const auto endFrame = static_cast<size_t>(round(static_cast<double>(iph) * ph_durs + ph_durs));
for (auto j = startFrame; j < endFrame + 1; ++j)
mel2ph[j] = static_cast<long long>(iph) + 1;
startFrame = endFrame + 1;
}
return mel2ph;
}
std::vector<float> F0PreProcess::GetF0AndOtherInputF0(const double* audio, int64_t audioLen, int64_t tran)
{
compute_f0(audio, audioLen);
for (int64_t i = 0; i < f0Len; ++i)
{
rf0[i] = log2(rf0[i] * pow(2.0, static_cast<double>(tran) / 12.0));
if (rf0[i] < 0.001)
rf0[i] = NAN;
}
const int64_t specLen = audioLen / hop;
InterPf0(specLen);
std::vector<float> Of0(specLen, 0.0);
double last_value = 0.0;
for (int64_t i = 0; i < specLen; ++i)
{
if (rf0[i] <= 0.0)
{
int64_t j = i + 1;
for (; j < specLen; ++j)
{
if (rf0[j] > 0.0)
break;
}
if (j < specLen - 1)
{
if (last_value > 0.0)
{
const auto step = (rf0[j] - rf0[i - 1]) / double(j - i);
for (int64_t k = i; k < j; ++k)
Of0[k] = float(rf0[i - 1] + step * double(k - i + 1));
}
else
for (int64_t k = i; k < j; ++k)
Of0[k] = float(rf0[j]);
i = j;
}
else
{
for (int64_t k = i; k < specLen; ++k)
Of0[k] = float(last_value);
i = specLen;
}
}
else
{
Of0[i] = float(rf0[i - 1]);
last_value = rf0[i];
}
}
delete[] rf0;
rf0 = nullptr;
return Of0;
}