Kremon96 commited on
Commit
df438fc
·
verified ·
1 Parent(s): a00d7bf

Delete utils

Browse files
utils/__init__.py DELETED
File without changes
utils/argutils.py DELETED
@@ -1,40 +0,0 @@
1
- from pathlib import Path
2
- import numpy as np
3
- import argparse
4
-
5
- _type_priorities = [ # In decreasing order
6
- Path,
7
- str,
8
- int,
9
- float,
10
- bool,
11
- ]
12
-
13
- def _priority(o):
14
- p = next((i for i, t in enumerate(_type_priorities) if type(o) is t), None)
15
- if p is not None:
16
- return p
17
- p = next((i for i, t in enumerate(_type_priorities) if isinstance(o, t)), None)
18
- if p is not None:
19
- return p
20
- return len(_type_priorities)
21
-
22
- def print_args(args: argparse.Namespace, parser=None):
23
- args = vars(args)
24
- if parser is None:
25
- priorities = list(map(_priority, args.values()))
26
- else:
27
- all_params = [a.dest for g in parser._action_groups for a in g._group_actions ]
28
- priority = lambda p: all_params.index(p) if p in all_params else len(all_params)
29
- priorities = list(map(priority, args.keys()))
30
-
31
- pad = max(map(len, args.keys())) + 3
32
- indices = np.lexsort((list(args.keys()), priorities))
33
- items = list(args.items())
34
-
35
- print("Arguments:")
36
- for i in indices:
37
- param, value = items[i]
38
- print(" {0}:{1}{2}".format(param, ' ' * (pad - len(param)), value))
39
- print("")
40
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
utils/default_models.py DELETED
@@ -1,56 +0,0 @@
1
- import urllib.request
2
- from pathlib import Path
3
- from threading import Thread
4
- from urllib.error import HTTPError
5
-
6
- from tqdm import tqdm
7
-
8
-
9
- default_models = {
10
- "encoder": ("https://drive.google.com/uc?export=download&id=1q8mEGwCkFy23KZsinbuvdKAQLqNKbYf1", 17090379),
11
- "synthesizer": ("https://drive.google.com/u/0/uc?id=1EqFMIbvxffxtjiVrtykroF6_mUh-5Z3s&export=download&confirm=t", 370554559),
12
- "vocoder": ("https://drive.google.com/uc?export=download&id=1cf2NO6FtI0jDuy8AV3Xgn6leO6dHjIgu", 53845290),
13
- }
14
-
15
-
16
- class DownloadProgressBar(tqdm):
17
- def update_to(self, b=1, bsize=1, tsize=None):
18
- if tsize is not None:
19
- self.total = tsize
20
- self.update(b * bsize - self.n)
21
-
22
-
23
- def download(url: str, target: Path, bar_pos=0):
24
- # Ensure the directory exists
25
- target.parent.mkdir(exist_ok=True, parents=True)
26
-
27
- desc = f"Downloading {target.name}"
28
- with DownloadProgressBar(unit="B", unit_scale=True, miniters=1, desc=desc, position=bar_pos, leave=False) as t:
29
- try:
30
- urllib.request.urlretrieve(url, filename=target, reporthook=t.update_to)
31
- except HTTPError:
32
- return
33
-
34
-
35
- def ensure_default_models(models_dir: Path):
36
- # Define download tasks
37
- jobs = []
38
- for model_name, (url, size) in default_models.items():
39
- target_path = models_dir / "default" / f"{model_name}.pt"
40
- if target_path.exists():
41
- if target_path.stat().st_size != size:
42
- print(f"File {target_path} is not of expected size, redownloading...")
43
- else:
44
- continue
45
-
46
- thread = Thread(target=download, args=(url, target_path, len(jobs)))
47
- thread.start()
48
- jobs.append((thread, target_path, size))
49
-
50
- # Run and join threads
51
- for thread, target_path, size in jobs:
52
- thread.join()
53
-
54
- assert target_path.exists() and target_path.stat().st_size == size, \
55
- f"Download for {target_path.name} failed. You may download models manually instead.\n" \
56
- f"https://drive.google.com/drive/folders/1fU6umc5uQAVR2udZdHX-lDgXYzTyqG_j"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
utils/logmmse.py DELETED
@@ -1,247 +0,0 @@
1
- # The MIT License (MIT)
2
- #
3
- # Copyright (c) 2015 braindead
4
- #
5
- # Permission is hereby granted, free of charge, to any person obtaining a copy
6
- # of this software and associated documentation files (the "Software"), to deal
7
- # in the Software without restriction, including without limitation the rights
8
- # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
- # copies of the Software, and to permit persons to whom the Software is
10
- # furnished to do so, subject to the following conditions:
11
- #
12
- # The above copyright notice and this permission notice shall be included in all
13
- # copies or substantial portions of the Software.
14
- #
15
- # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
- # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
- # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
- # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
- # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
- # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
- # SOFTWARE.
22
- #
23
- #
24
- # This code was extracted from the logmmse package (https://pypi.org/project/logmmse/) and I
25
- # simply modified the interface to meet my needs.
26
-
27
-
28
- import numpy as np
29
- import math
30
- from scipy.special import expn
31
- from collections import namedtuple
32
-
33
- NoiseProfile = namedtuple("NoiseProfile", "sampling_rate window_size len1 len2 win n_fft noise_mu2")
34
-
35
-
36
- def profile_noise(noise, sampling_rate, window_size=0):
37
- """
38
- Creates a profile of the noise in a given waveform.
39
-
40
- :param noise: a waveform containing noise ONLY, as a numpy array of floats or ints.
41
- :param sampling_rate: the sampling rate of the audio
42
- :param window_size: the size of the window the logmmse algorithm operates on. A default value
43
- will be picked if left as 0.
44
- :return: a NoiseProfile object
45
- """
46
- noise, dtype = to_float(noise)
47
- noise += np.finfo(np.float64).eps
48
-
49
- if window_size == 0:
50
- window_size = int(math.floor(0.02 * sampling_rate))
51
-
52
- if window_size % 2 == 1:
53
- window_size = window_size + 1
54
-
55
- perc = 50
56
- len1 = int(math.floor(window_size * perc / 100))
57
- len2 = int(window_size - len1)
58
-
59
- win = np.hanning(window_size)
60
- win = win * len2 / np.sum(win)
61
- n_fft = 2 * window_size
62
-
63
- noise_mean = np.zeros(n_fft)
64
- n_frames = len(noise) // window_size
65
- for j in range(0, window_size * n_frames, window_size):
66
- noise_mean += np.absolute(np.fft.fft(win * noise[j:j + window_size], n_fft, axis=0))
67
- noise_mu2 = (noise_mean / n_frames) ** 2
68
-
69
- return NoiseProfile(sampling_rate, window_size, len1, len2, win, n_fft, noise_mu2)
70
-
71
-
72
- def denoise(wav, noise_profile: NoiseProfile, eta=0.15):
73
- """
74
- Cleans the noise from a speech waveform given a noise profile. The waveform must have the
75
- same sampling rate as the one used to create the noise profile.
76
-
77
- :param wav: a speech waveform as a numpy array of floats or ints.
78
- :param noise_profile: a NoiseProfile object that was created from a similar (or a segment of
79
- the same) waveform.
80
- :param eta: voice threshold for noise update. While the voice activation detection value is
81
- below this threshold, the noise profile will be continuously updated throughout the audio.
82
- Set to 0 to disable updating the noise profile.
83
- :return: the clean wav as a numpy array of floats or ints of the same length.
84
- """
85
- wav, dtype = to_float(wav)
86
- wav += np.finfo(np.float64).eps
87
- p = noise_profile
88
-
89
- nframes = int(math.floor(len(wav) / p.len2) - math.floor(p.window_size / p.len2))
90
- x_final = np.zeros(nframes * p.len2)
91
-
92
- aa = 0.98
93
- mu = 0.98
94
- ksi_min = 10 ** (-25 / 10)
95
-
96
- x_old = np.zeros(p.len1)
97
- xk_prev = np.zeros(p.len1)
98
- noise_mu2 = p.noise_mu2
99
- for k in range(0, nframes * p.len2, p.len2):
100
- insign = p.win * wav[k:k + p.window_size]
101
-
102
- spec = np.fft.fft(insign, p.n_fft, axis=0)
103
- sig = np.absolute(spec)
104
- sig2 = sig ** 2
105
-
106
- gammak = np.minimum(sig2 / noise_mu2, 40)
107
-
108
- if xk_prev.all() == 0:
109
- ksi = aa + (1 - aa) * np.maximum(gammak - 1, 0)
110
- else:
111
- ksi = aa * xk_prev / noise_mu2 + (1 - aa) * np.maximum(gammak - 1, 0)
112
- ksi = np.maximum(ksi_min, ksi)
113
-
114
- log_sigma_k = gammak * ksi/(1 + ksi) - np.log(1 + ksi)
115
- vad_decision = np.sum(log_sigma_k) / p.window_size
116
- if vad_decision < eta:
117
- noise_mu2 = mu * noise_mu2 + (1 - mu) * sig2
118
-
119
- a = ksi / (1 + ksi)
120
- vk = a * gammak
121
- ei_vk = 0.5 * expn(1, np.maximum(vk, 1e-8))
122
- hw = a * np.exp(ei_vk)
123
- sig = sig * hw
124
- xk_prev = sig ** 2
125
- xi_w = np.fft.ifft(hw * spec, p.n_fft, axis=0)
126
- xi_w = np.real(xi_w)
127
-
128
- x_final[k:k + p.len2] = x_old + xi_w[0:p.len1]
129
- x_old = xi_w[p.len1:p.window_size]
130
-
131
- output = from_float(x_final, dtype)
132
- output = np.pad(output, (0, len(wav) - len(output)), mode="constant")
133
- return output
134
-
135
-
136
- ## Alternative VAD algorithm to webrctvad. It has the advantage of not requiring to install that
137
- ## darn package and it also works for any sampling rate. Maybe I'll eventually use it instead of
138
- ## webrctvad
139
- # def vad(wav, sampling_rate, eta=0.15, window_size=0):
140
- # """
141
- # TODO: fix doc
142
- # Creates a profile of the noise in a given waveform.
143
- #
144
- # :param wav: a waveform containing noise ONLY, as a numpy array of floats or ints.
145
- # :param sampling_rate: the sampling rate of the audio
146
- # :param window_size: the size of the window the logmmse algorithm operates on. A default value
147
- # will be picked if left as 0.
148
- # :param eta: voice threshold for noise update. While the voice activation detection value is
149
- # below this threshold, the noise profile will be continuously updated throughout the audio.
150
- # Set to 0 to disable updating the noise profile.
151
- # """
152
- # wav, dtype = to_float(wav)
153
- # wav += np.finfo(np.float64).eps
154
- #
155
- # if window_size == 0:
156
- # window_size = int(math.floor(0.02 * sampling_rate))
157
- #
158
- # if window_size % 2 == 1:
159
- # window_size = window_size + 1
160
- #
161
- # perc = 50
162
- # len1 = int(math.floor(window_size * perc / 100))
163
- # len2 = int(window_size - len1)
164
- #
165
- # win = np.hanning(window_size)
166
- # win = win * len2 / np.sum(win)
167
- # n_fft = 2 * window_size
168
- #
169
- # wav_mean = np.zeros(n_fft)
170
- # n_frames = len(wav) // window_size
171
- # for j in range(0, window_size * n_frames, window_size):
172
- # wav_mean += np.absolute(np.fft.fft(win * wav[j:j + window_size], n_fft, axis=0))
173
- # noise_mu2 = (wav_mean / n_frames) ** 2
174
- #
175
- # wav, dtype = to_float(wav)
176
- # wav += np.finfo(np.float64).eps
177
- #
178
- # nframes = int(math.floor(len(wav) / len2) - math.floor(window_size / len2))
179
- # vad = np.zeros(nframes * len2, dtype=np.bool)
180
- #
181
- # aa = 0.98
182
- # mu = 0.98
183
- # ksi_min = 10 ** (-25 / 10)
184
- #
185
- # xk_prev = np.zeros(len1)
186
- # noise_mu2 = noise_mu2
187
- # for k in range(0, nframes * len2, len2):
188
- # insign = win * wav[k:k + window_size]
189
- #
190
- # spec = np.fft.fft(insign, n_fft, axis=0)
191
- # sig = np.absolute(spec)
192
- # sig2 = sig ** 2
193
- #
194
- # gammak = np.minimum(sig2 / noise_mu2, 40)
195
- #
196
- # if xk_prev.all() == 0:
197
- # ksi = aa + (1 - aa) * np.maximum(gammak - 1, 0)
198
- # else:
199
- # ksi = aa * xk_prev / noise_mu2 + (1 - aa) * np.maximum(gammak - 1, 0)
200
- # ksi = np.maximum(ksi_min, ksi)
201
- #
202
- # log_sigma_k = gammak * ksi / (1 + ksi) - np.log(1 + ksi)
203
- # vad_decision = np.sum(log_sigma_k) / window_size
204
- # if vad_decision < eta:
205
- # noise_mu2 = mu * noise_mu2 + (1 - mu) * sig2
206
- # print(vad_decision)
207
- #
208
- # a = ksi / (1 + ksi)
209
- # vk = a * gammak
210
- # ei_vk = 0.5 * expn(1, np.maximum(vk, 1e-8))
211
- # hw = a * np.exp(ei_vk)
212
- # sig = sig * hw
213
- # xk_prev = sig ** 2
214
- #
215
- # vad[k:k + len2] = vad_decision >= eta
216
- #
217
- # vad = np.pad(vad, (0, len(wav) - len(vad)), mode="constant")
218
- # return vad
219
-
220
-
221
- def to_float(_input):
222
- if _input.dtype == np.float64:
223
- return _input, _input.dtype
224
- elif _input.dtype == np.float32:
225
- return _input.astype(np.float64), _input.dtype
226
- elif _input.dtype == np.uint8:
227
- return (_input - 128) / 128., _input.dtype
228
- elif _input.dtype == np.int16:
229
- return _input / 32768., _input.dtype
230
- elif _input.dtype == np.int32:
231
- return _input / 2147483648., _input.dtype
232
- raise ValueError('Unsupported wave file format')
233
-
234
-
235
- def from_float(_input, dtype):
236
- if dtype == np.float64:
237
- return _input, np.float64
238
- elif dtype == np.float32:
239
- return _input.astype(np.float32)
240
- elif dtype == np.uint8:
241
- return ((_input * 128) + 128).astype(np.uint8)
242
- elif dtype == np.int16:
243
- return (_input * 32768).astype(np.int16)
244
- elif dtype == np.int32:
245
- print(_input)
246
- return (_input * 2147483648).astype(np.int32)
247
- raise ValueError('Unsupported wave file format')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
utils/profiler.py DELETED
@@ -1,45 +0,0 @@
1
- from time import perf_counter as timer
2
- from collections import OrderedDict
3
- import numpy as np
4
-
5
-
6
- class Profiler:
7
- def __init__(self, summarize_every=5, disabled=False):
8
- self.last_tick = timer()
9
- self.logs = OrderedDict()
10
- self.summarize_every = summarize_every
11
- self.disabled = disabled
12
-
13
- def tick(self, name):
14
- if self.disabled:
15
- return
16
-
17
- # Log the time needed to execute that function
18
- if not name in self.logs:
19
- self.logs[name] = []
20
- if len(self.logs[name]) >= self.summarize_every:
21
- self.summarize()
22
- self.purge_logs()
23
- self.logs[name].append(timer() - self.last_tick)
24
-
25
- self.reset_timer()
26
-
27
- def purge_logs(self):
28
- for name in self.logs:
29
- self.logs[name].clear()
30
-
31
- def reset_timer(self):
32
- self.last_tick = timer()
33
-
34
- def summarize(self):
35
- n = max(map(len, self.logs.values()))
36
- assert n == self.summarize_every
37
- print("\nAverage execution time over %d steps:" % n)
38
-
39
- name_msgs = ["%s (%d/%d):" % (name, len(deltas), n) for name, deltas in self.logs.items()]
40
- pad = max(map(len, name_msgs))
41
- for name_msg, deltas in zip(name_msgs, self.logs.values()):
42
- print(" %s mean: %4.0fms std: %4.0fms" %
43
- (name_msg.ljust(pad), np.mean(deltas) * 1000, np.std(deltas) * 1000))
44
- print("", flush=True)
45
-