File size: 4,162 Bytes
c82bb46
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
#include "F0Preprocess.hpp"


void F0PreProcess::compute_f0(const double* audio, int64_t len)
{
	DioOption Doption;
	InitializeDioOption(&Doption);
	Doption.f0_ceil = 800;
	Doption.frame_period = 1000.0 * hop / fs;
	f0Len = GetSamplesForDIO(fs, (int)len, Doption.frame_period);
	const auto tp = new double[f0Len];
	const auto tmpf0 = new double[f0Len];
	rf0 = new double[f0Len];
	Dio(audio, (int)len, fs, &Doption, tp, tmpf0);
	StoneMask(audio, (int)len, fs, tp, tmpf0, (int)f0Len, rf0);
	delete[] tmpf0;
	delete[] tp;
}

std::vector<double> arange(double start,double end,double step = 1.0,double div = 1.0)
{
	std::vector<double> output;
	while(start<end)
	{
		output.push_back(start / div);
		start += step;
	}
	return output;
}

void F0PreProcess::InterPf0(int64_t len)
{
	const auto xi = arange(0.0, (double)f0Len * (double)len, (double)f0Len, (double)len);
	const auto tmp = new double[xi.size() + 1];
	interp1(arange(0, (double)f0Len).data(), rf0, static_cast<int>(f0Len), xi.data(), (int)xi.size(), tmp);
	for (size_t i = 0; i < xi.size(); i++)
		if (isnan(tmp[i]))
			tmp[i] = 0.0;
	delete[] rf0;
    rf0 = nullptr;
	rf0 = tmp;
	f0Len = (int64_t)xi.size();
}

long long* F0PreProcess::f0Log()
{
	const auto tmp = new long long[f0Len];
	const auto f0_mel = new double[f0Len];
	for (long long i = 0; i < f0Len; i++)
	{
		f0_mel[i] = 1127 * log(1.0 + rf0[i] / 700.0);
		if (f0_mel[i] > 0.0)
			f0_mel[i] = (f0_mel[i] - f0_mel_min) * (f0_bin - 2.0) / (f0_mel_max - f0_mel_min) + 1.0;
		if (f0_mel[i] < 1.0)
			f0_mel[i] = 1;
		if (f0_mel[i] > f0_bin - 1)
			f0_mel[i] = f0_bin - 1;
		tmp[i] = (long long)round(f0_mel[i]);
	}
	delete[] f0_mel;
	delete[] rf0;
    rf0 = nullptr;
	return tmp;
}

std::vector<long long> F0PreProcess::GetF0AndOtherInput(const double* audio, int64_t audioLen, int64_t hubLen, int64_t tran)
{
	compute_f0(audio, audioLen);
	for (int64_t i = 0; i < f0Len; ++i)
	{
		rf0[i] = rf0[i] * pow(2.0, static_cast<double>(tran) / 12.0);
		if (rf0[i] < 0.001)
			rf0[i] = NAN;
	}
	InterPf0(hubLen);
	const auto O0f = f0Log();
	std::vector<long long> Of0(O0f, O0f + f0Len);
    delete[] O0f;
	return Of0;
}

std::vector<long long> getAligments(size_t specLen, size_t hubertLen)
{
	std::vector<long long> mel2ph(specLen + 1, 0);

	size_t startFrame = 0;
	const double ph_durs = static_cast<double>(specLen) / static_cast<double>(hubertLen);
	for (size_t iph = 0; iph < hubertLen; ++iph)
	{
		const auto endFrame = static_cast<size_t>(round(static_cast<double>(iph) * ph_durs + ph_durs));
		for (auto j = startFrame; j < endFrame + 1; ++j)
			mel2ph[j] = static_cast<long long>(iph) + 1;
		startFrame = endFrame + 1;
	}

	return mel2ph;
}

std::vector<float> F0PreProcess::GetF0AndOtherInputF0(const double* audio, int64_t audioLen, int64_t tran)
{
	compute_f0(audio, audioLen);
	for (int64_t i = 0; i < f0Len; ++i)
	{
		rf0[i] = log2(rf0[i] * pow(2.0, static_cast<double>(tran) / 12.0));
		if (rf0[i] < 0.001)
			rf0[i] = NAN;
	}
	const int64_t specLen = audioLen / hop;
	InterPf0(specLen);

    std::vector<float> Of0(specLen, 0.0);

    double last_value = 0.0;
    for (int64_t i = 0; i < specLen; ++i)
    {
        if (rf0[i] <= 0.0)
        {
            int64_t j = i + 1;
            for (; j < specLen; ++j)
            {
                if (rf0[j] > 0.0)
                    break;
            }
            if (j < specLen - 1)
            {
                if (last_value > 0.0)
                {
                    const auto step = (rf0[j] - rf0[i - 1]) / double(j - i);
                    for (int64_t k = i; k < j; ++k)
                        Of0[k] = float(rf0[i - 1] + step * double(k - i + 1));
                }
                else
                    for (int64_t k = i; k < j; ++k)
                        Of0[k] = float(rf0[j]);
                i = j;
            }
            else
            {
                for (int64_t k = i; k < specLen; ++k)
                    Of0[k] = float(last_value);
                i = specLen;
            }
        }
        else
        {
            Of0[i] = float(rf0[i - 1]);
            last_value = rf0[i];
        }
    }
    delete[] rf0;
    rf0 = nullptr;
	return Of0;
}