TIMBOVILL commited on
Commit
b4c28a0
·
verified ·
1 Parent(s): ece6134

Upload 5 files

Browse files
rvc/lib/infer_pack/modules/F0Predictor/DioF0Predictor.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from infer_pack.modules.F0Predictor.F0Predictor import F0Predictor
2
+ import pyworld
3
+ import numpy as np
4
+
5
+
6
+ class DioF0Predictor(F0Predictor):
7
+ def __init__(self, hop_length=512, f0_min=50, f0_max=1100, sampling_rate=44100):
8
+ self.hop_length = hop_length
9
+ self.f0_min = f0_min
10
+ self.f0_max = f0_max
11
+ self.sampling_rate = sampling_rate
12
+
13
+ def interpolate_f0(self, f0):
14
+ data = np.reshape(f0, (f0.size, 1))
15
+
16
+ vuv_vector = np.zeros((data.size, 1), dtype=np.float32)
17
+ vuv_vector[data > 0.0] = 1.0
18
+ vuv_vector[data <= 0.0] = 0.0
19
+
20
+ ip_data = data
21
+
22
+ frame_number = data.size
23
+ last_value = 0.0
24
+ for i in range(frame_number):
25
+ if data[i] <= 0.0:
26
+ j = i + 1
27
+ for j in range(i + 1, frame_number):
28
+ if data[j] > 0.0:
29
+ break
30
+ if j < frame_number - 1:
31
+ if last_value > 0.0:
32
+ step = (data[j] - data[i - 1]) / float(j - i)
33
+ for k in range(i, j):
34
+ ip_data[k] = data[i - 1] + step * (k - i + 1)
35
+ else:
36
+ for k in range(i, j):
37
+ ip_data[k] = data[j]
38
+ else:
39
+ for k in range(i, frame_number):
40
+ ip_data[k] = last_value
41
+ else:
42
+ ip_data[i] = data[i] # 这里可能存在一个没有必要的拷贝
43
+ last_value = data[i]
44
+
45
+ return ip_data[:, 0], vuv_vector[:, 0]
46
+
47
+ def resize_f0(self, x, target_len):
48
+ source = np.array(x)
49
+ source[source < 0.001] = np.nan
50
+ target = np.interp(
51
+ np.arange(0, len(source) * target_len, len(source)) / target_len,
52
+ np.arange(0, len(source)),
53
+ source,
54
+ )
55
+ res = np.nan_to_num(target)
56
+ return res
57
+
58
+ def compute_f0(self, wav, p_len=None):
59
+ if p_len is None:
60
+ p_len = wav.shape[0] // self.hop_length
61
+ f0, t = pyworld.dio(
62
+ wav.astype(np.double),
63
+ fs=self.sampling_rate,
64
+ f0_floor=self.f0_min,
65
+ f0_ceil=self.f0_max,
66
+ frame_period=1000 * self.hop_length / self.sampling_rate,
67
+ )
68
+ f0 = pyworld.stonemask(wav.astype(np.double), f0, t, self.sampling_rate)
69
+ for index, pitch in enumerate(f0):
70
+ f0[index] = round(pitch, 1)
71
+ return self.interpolate_f0(self.resize_f0(f0, p_len))[0]
72
+
73
+ def compute_f0_uv(self, wav, p_len=None):
74
+ if p_len is None:
75
+ p_len = wav.shape[0] // self.hop_length
76
+ f0, t = pyworld.dio(
77
+ wav.astype(np.double),
78
+ fs=self.sampling_rate,
79
+ f0_floor=self.f0_min,
80
+ f0_ceil=self.f0_max,
81
+ frame_period=1000 * self.hop_length / self.sampling_rate,
82
+ )
83
+ f0 = pyworld.stonemask(wav.astype(np.double), f0, t, self.sampling_rate)
84
+ for index, pitch in enumerate(f0):
85
+ f0[index] = round(pitch, 1)
86
+ return self.interpolate_f0(self.resize_f0(f0, p_len))
rvc/lib/infer_pack/modules/F0Predictor/F0Predictor.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ class F0Predictor(object):
2
+ def compute_f0(self, wav, p_len):
3
+ pass
4
+
5
+ def compute_f0_uv(self, wav, p_len):
6
+ pass
rvc/lib/infer_pack/modules/F0Predictor/HarvestF0Predictor.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from infer_pack.modules.F0Predictor.F0Predictor import F0Predictor
2
+ import pyworld
3
+ import numpy as np
4
+
5
+
6
+ class HarvestF0Predictor(F0Predictor):
7
+ def __init__(self, hop_length=512, f0_min=50, f0_max=1100, sampling_rate=44100):
8
+ self.hop_length = hop_length
9
+ self.f0_min = f0_min
10
+ self.f0_max = f0_max
11
+ self.sampling_rate = sampling_rate
12
+
13
+ def interpolate_f0(self, f0):
14
+ data = np.reshape(f0, (f0.size, 1))
15
+
16
+ vuv_vector = np.zeros((data.size, 1), dtype=np.float32)
17
+ vuv_vector[data > 0.0] = 1.0
18
+ vuv_vector[data <= 0.0] = 0.0
19
+
20
+ ip_data = data
21
+
22
+ frame_number = data.size
23
+ last_value = 0.0
24
+ for i in range(frame_number):
25
+ if data[i] <= 0.0:
26
+ j = i + 1
27
+ for j in range(i + 1, frame_number):
28
+ if data[j] > 0.0:
29
+ break
30
+ if j < frame_number - 1:
31
+ if last_value > 0.0:
32
+ step = (data[j] - data[i - 1]) / float(j - i)
33
+ for k in range(i, j):
34
+ ip_data[k] = data[i - 1] + step * (k - i + 1)
35
+ else:
36
+ for k in range(i, j):
37
+ ip_data[k] = data[j]
38
+ else:
39
+ for k in range(i, frame_number):
40
+ ip_data[k] = last_value
41
+ else:
42
+ ip_data[i] = data[i]
43
+ last_value = data[i]
44
+
45
+ return ip_data[:, 0], vuv_vector[:, 0]
46
+
47
+ def resize_f0(self, x, target_len):
48
+ source = np.array(x)
49
+ source[source < 0.001] = np.nan
50
+ target = np.interp(
51
+ np.arange(0, len(source) * target_len, len(source)) / target_len,
52
+ np.arange(0, len(source)),
53
+ source,
54
+ )
55
+ res = np.nan_to_num(target)
56
+ return res
57
+
58
+ def compute_f0(self, wav, p_len=None):
59
+ if p_len is None:
60
+ p_len = wav.shape[0] // self.hop_length
61
+ f0, t = pyworld.harvest(
62
+ wav.astype(np.double),
63
+ fs=self.sampling_rate,
64
+ f0_ceil=self.f0_max,
65
+ f0_floor=self.f0_min,
66
+ frame_period=1000 * self.hop_length / self.sampling_rate,
67
+ )
68
+ f0 = pyworld.stonemask(wav.astype(np.double), f0, t, self.fs)
69
+ return self.interpolate_f0(self.resize_f0(f0, p_len))[0]
70
+
71
+ def compute_f0_uv(self, wav, p_len=None):
72
+ if p_len is None:
73
+ p_len = wav.shape[0] // self.hop_length
74
+ f0, t = pyworld.harvest(
75
+ wav.astype(np.double),
76
+ fs=self.sampling_rate,
77
+ f0_floor=self.f0_min,
78
+ f0_ceil=self.f0_max,
79
+ frame_period=1000 * self.hop_length / self.sampling_rate,
80
+ )
81
+ f0 = pyworld.stonemask(wav.astype(np.double), f0, t, self.sampling_rate)
82
+ return self.interpolate_f0(self.resize_f0(f0, p_len))
rvc/lib/infer_pack/modules/F0Predictor/PMF0Predictor.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from infer_pack.modules.F0Predictor.F0Predictor import F0Predictor
2
+ import parselmouth
3
+ import numpy as np
4
+
5
+
6
+ class PMF0Predictor(F0Predictor):
7
+ def __init__(self, hop_length=512, f0_min=50, f0_max=1100, sampling_rate=44100):
8
+ self.hop_length = hop_length
9
+ self.f0_min = f0_min
10
+ self.f0_max = f0_max
11
+ self.sampling_rate = sampling_rate
12
+
13
+ def interpolate_f0(self, f0):
14
+ data = np.reshape(f0, (f0.size, 1))
15
+
16
+ vuv_vector = np.zeros((data.size, 1), dtype=np.float32)
17
+ vuv_vector[data > 0.0] = 1.0
18
+ vuv_vector[data <= 0.0] = 0.0
19
+
20
+ ip_data = data
21
+
22
+ frame_number = data.size
23
+ last_value = 0.0
24
+ for i in range(frame_number):
25
+ if data[i] <= 0.0:
26
+ j = i + 1
27
+ for j in range(i + 1, frame_number):
28
+ if data[j] > 0.0:
29
+ break
30
+ if j < frame_number - 1:
31
+ if last_value > 0.0:
32
+ step = (data[j] - data[i - 1]) / float(j - i)
33
+ for k in range(i, j):
34
+ ip_data[k] = data[i - 1] + step * (k - i + 1)
35
+ else:
36
+ for k in range(i, j):
37
+ ip_data[k] = data[j]
38
+ else:
39
+ for k in range(i, frame_number):
40
+ ip_data[k] = last_value
41
+ else:
42
+ ip_data[i] = data[i] # 这里可能存在一个没有必要的拷贝
43
+ last_value = data[i]
44
+
45
+ return ip_data[:, 0], vuv_vector[:, 0]
46
+
47
+ def compute_f0(self, wav, p_len=None):
48
+ x = wav
49
+ if p_len is None:
50
+ p_len = x.shape[0] // self.hop_length
51
+ else:
52
+ assert abs(p_len - x.shape[0] // self.hop_length) < 4, "pad length error"
53
+ time_step = self.hop_length / self.sampling_rate * 1000
54
+ f0 = (
55
+ parselmouth.Sound(x, self.sampling_rate)
56
+ .to_pitch_ac(
57
+ time_step=time_step / 1000,
58
+ voicing_threshold=0.6,
59
+ pitch_floor=self.f0_min,
60
+ pitch_ceiling=self.f0_max,
61
+ )
62
+ .selected_array["frequency"]
63
+ )
64
+
65
+ pad_size = (p_len - len(f0) + 1) // 2
66
+ if pad_size > 0 or p_len - len(f0) - pad_size > 0:
67
+ f0 = np.pad(f0, [[pad_size, p_len - len(f0) - pad_size]], mode="constant")
68
+ f0, uv = self.interpolate_f0(f0)
69
+ return f0
70
+
71
+ def compute_f0_uv(self, wav, p_len=None):
72
+ x = wav
73
+ if p_len is None:
74
+ p_len = x.shape[0] // self.hop_length
75
+ else:
76
+ assert abs(p_len - x.shape[0] // self.hop_length) < 4, "pad length error"
77
+ time_step = self.hop_length / self.sampling_rate * 1000
78
+ f0 = (
79
+ parselmouth.Sound(x, self.sampling_rate)
80
+ .to_pitch_ac(
81
+ time_step=time_step / 1000,
82
+ voicing_threshold=0.6,
83
+ pitch_floor=self.f0_min,
84
+ pitch_ceiling=self.f0_max,
85
+ )
86
+ .selected_array["frequency"]
87
+ )
88
+
89
+ pad_size = (p_len - len(f0) + 1) // 2
90
+ if pad_size > 0 or p_len - len(f0) - pad_size > 0:
91
+ f0 = np.pad(f0, [[pad_size, p_len - len(f0) - pad_size]], mode="constant")
92
+ f0, uv = self.interpolate_f0(f0)
93
+ return f0, uv
rvc/lib/infer_pack/modules/F0Predictor/__init__.py ADDED
File without changes