lovemefan commited on
Commit
41b53f8
1 Parent(s): 5118749

upload onnx model files

Browse files
uvronnx/onnx/uvr-sim.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ceeb5f59af63e70ae9ef131844c2cf123b1bbec75f6866e633f7f3efee0bada7
3
+ size 127044627
uvronnx/src/__pycache__/config.cpython-38.pyc ADDED
Binary file (939 Bytes). View file
 
uvronnx/src/__pycache__/ortInferSession.cpython-38.pyc ADDED
Binary file (3.52 kB). View file
 
uvronnx/src/config.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding:utf-8 -*-
2
+ # @FileName :config.py
3
+ # @Time :2023/8/2 10:54
4
+ # @Author :lovemefan
5
+ # @Email :[email protected]
6
+
7
+ UVR_CONFIG = {
8
+ "bins": 672,
9
+ "unstable_bins": 8,
10
+ "reduction_bins": 637,
11
+ "band": {
12
+ 1: {
13
+ "sr": 7350,
14
+ "hl": 80,
15
+ "n_fft": 640,
16
+ "crop_start": 0,
17
+ "crop_stop": 85,
18
+ "lpf_start": 25,
19
+ "lpf_stop": 53,
20
+ "res_type": "polyphase"
21
+ },
22
+ 2: {
23
+ "sr": 7350,
24
+ "hl": 80,
25
+ "n_fft": 320,
26
+ "crop_start": 4,
27
+ "crop_stop": 87,
28
+ "hpf_start": 25,
29
+ "hpf_stop": 12,
30
+ "lpf_start": 31,
31
+ "lpf_stop": 62,
32
+ "res_type": "polyphase"
33
+ },
34
+ 3: {
35
+ "sr": 14700,
36
+ "hl": 160,
37
+ "n_fft": 512,
38
+ "crop_start": 17,
39
+ "crop_stop": 216,
40
+ "hpf_start": 48,
41
+ "hpf_stop": 24,
42
+ "lpf_start": 139,
43
+ "lpf_stop": 210,
44
+ "res_type": "polyphase"
45
+ },
46
+ 4: {
47
+ "sr": 44100,
48
+ "hl": 480,
49
+ "n_fft": 960,
50
+ "crop_start": 78,
51
+ "crop_stop": 383,
52
+ "hpf_start": 130,
53
+ "hpf_stop": 86,
54
+ "res_type": "kaiser_fast"
55
+ }
56
+ },
57
+ "sr": 44100,
58
+ "pre_filter_start": 668,
59
+ "pre_filter_stop": 672,
60
+ "mid_side": False,
61
+ "mid_side_b": False,
62
+ "mid_side_b2": False,
63
+ "stereo_w": False,
64
+ "reverse": False,
65
+ }
uvronnx/src/ortInferSession.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding:utf-8 -*-
2
+ # @FileName :ortInferSession.py
3
+ # @Time :2023/8/3 00:20
4
+ # @Author :lovemefan
5
+ # @Email :[email protected]
6
+
7
+ from pathlib import Path
8
+ from typing import List, Union
9
+ from uvronnx.src.utils.logger import logger
10
+ import numpy as np
11
+ from onnxruntime import (GraphOptimizationLevel, InferenceSession,
12
+ SessionOptions, get_available_providers, get_device)
13
+
14
+
15
+ class UVROrtInferSession:
16
+ def __init__(self, config):
17
+ sess_opt = SessionOptions()
18
+ sess_opt.log_severity_level = 4
19
+ sess_opt.enable_cpu_mem_arena = False
20
+ sess_opt.graph_optimization_level = GraphOptimizationLevel.ORT_ENABLE_ALL
21
+
22
+ cuda_ep = "CUDAExecutionProvider"
23
+ cpu_ep = "CPUExecutionProvider"
24
+ cpu_provider_options = {
25
+ "arena_extend_strategy": "kSameAsRequested",
26
+ }
27
+
28
+ EP_list = []
29
+ if (
30
+ config["use_cuda"]
31
+ and get_device() == "GPU"
32
+ and cuda_ep in get_available_providers()
33
+ ):
34
+ EP_list = [(cuda_ep, config[cuda_ep])]
35
+ EP_list.append((cpu_ep, cpu_provider_options))
36
+
37
+ self._verify_model(config["model_path"])
38
+ logger.info(f"Loading onnx model at {str(config['model_path'])}")
39
+ self.session = InferenceSession(
40
+ str(config["model_path"]), sess_options=sess_opt, providers=EP_list
41
+ )
42
+
43
+ if config["use_cuda"] and cuda_ep not in self.session.get_providers():
44
+ logger.warning(
45
+ f"{cuda_ep} is not available for current env, "
46
+ f"the inference part is automatically shifted to be "
47
+ f"executed under {cpu_ep}.\n "
48
+ "Please ensure the installed onnxruntime-gpu version"
49
+ " matches your cuda and cudnn version, "
50
+ "you can check their relations from the offical web site: "
51
+ "https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html",
52
+ RuntimeWarning,
53
+ )
54
+
55
+ def __call__(
56
+ self, input_chunk: np.ndarray
57
+ ) -> np.ndarray:
58
+
59
+ input_dict = {
60
+ "input": input_chunk,
61
+ }
62
+
63
+ return self.session.run(None, input_dict)[0]
64
+
65
+ def get_input_names(
66
+ self,
67
+ ):
68
+ return [v.name for v in self.session.get_inputs()]
69
+
70
+ def get_output_names(
71
+ self,
72
+ ):
73
+ return [v.name for v in self.session.get_outputs()]
74
+
75
+ def get_character_list(self, key: str = "character"):
76
+ return self.meta_dict[key].splitlines()
77
+
78
+ def have_key(self, key: str = "character") -> bool:
79
+ self.meta_dict = self.session.get_modelmeta().custom_metadata_map
80
+ if key in self.meta_dict.keys():
81
+ return True
82
+ return False
83
+
84
+ @staticmethod
85
+ def _verify_model(model_path):
86
+ model_path = Path(model_path)
87
+ if not model_path.exists():
88
+ raise FileNotFoundError(f"{model_path} does not exists.")
89
+ if not model_path.is_file():
90
+ raise FileExistsError(f"{model_path} is not a file.")
uvronnx/src/utils/AudioHelper.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding:utf-8 -*-
2
+ # @FileName :AudioHelper.py
3
+ # @Time :2023/8/3 00:34
4
+ # @Author :lovemefan
5
+ # @Email :[email protected]
6
+
7
+ import array
8
+ import math
9
+ import struct
10
+ import numpy as np
11
+ from numpy import where
12
+
13
+
14
+ class AudioReader:
15
+ """
16
+
17
+ read audio from sanic request
18
+ """
19
+
20
+ def __init__(self):
21
+ pass
22
+
23
+ @staticmethod
24
+ def get_info(self, path: str):
25
+ with open(path, "rb") as f:
26
+ (
27
+ name,
28
+ data_lengths,
29
+ _,
30
+ _,
31
+ _,
32
+ _,
33
+ channels,
34
+ sample_rate,
35
+ bit_rate,
36
+ block_length,
37
+ sample_bit,
38
+ _,
39
+ pcm_length,
40
+ ) = struct.unpack_from("<4sL4s4sLHHLLHH4sL", f.read(44))
41
+ assert sample_rate == 16000, "sample rate must be 16000"
42
+ nframes = pcm_length // (channels * 2)
43
+ return nframes
44
+
45
+ @staticmethod
46
+ def read_wav_bytes(data: bytes):
47
+ """
48
+ convert bytes into array of pcm_s16le data
49
+ :param data: PCM format bytes
50
+ :return:
51
+ """
52
+
53
+ # header of wav file
54
+ info = data[:44]
55
+ frames = data[44:]
56
+ (
57
+ name,
58
+ data_lengths,
59
+ _,
60
+ _,
61
+ _,
62
+ _,
63
+ channels,
64
+ sample_rate,
65
+ bit_rate,
66
+ block_length,
67
+ sample_bit,
68
+ _,
69
+ pcm_length,
70
+ ) = struct.unpack_from("<4sL4s4sLHHLLHH4sL", info)
71
+ # shortArray each element is 16bit
72
+ data = AudioReader.read_pcm_byte(frames)
73
+ return data, sample_rate
74
+
75
+ @staticmethod
76
+ def read_wav_file(audio_path: str):
77
+ with open(audio_path, "rb") as f:
78
+ data = f.read()
79
+ return AudioReader.read_wav_bytes(data)
80
+
81
+ @staticmethod
82
+ def read_pcm_byte(data: bytes):
83
+ short_array = array.array("h")
84
+ short_array.frombytes(data)
85
+ data = np.array(short_array, dtype="float16") / (1 << 15)
86
+ return data
87
+
uvronnx/src/utils/__pycache__/AudioHelper.cpython-38.pyc ADDED
Binary file (2.09 kB). View file
 
uvronnx/src/utils/__pycache__/logger.cpython-38.pyc ADDED
Binary file (9.73 kB). View file
 
uvronnx/src/utils/__pycache__/spec_utils.cpython-38.pyc ADDED
Binary file (10.3 kB). View file
 
uvronnx/src/utils/logger.py ADDED
@@ -0,0 +1,299 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding:utf-8 -*-
2
+ # @FileName :logger.py
3
+ # @Time :2023/8/1 10:44
4
+ # @Author :lovemefan
5
+ # @Email :[email protected]
6
+ """LOGGER Module"""
7
+ import logging
8
+ import logging.config
9
+ import logging.handlers
10
+ import os
11
+ import sys
12
+
13
+ from functools import wraps
14
+ from typing import Dict, List, Tuple, Union
15
+
16
+
17
+ logger_list = []
18
+ LEVEL = ('DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL')
19
+ _LOG_FILE_DIR = '~/.cache/speech-webui/'
20
+ LOCAL_DEFAULT_LOG_FILE_DIR = os.path.join(
21
+ os.getenv("LOCAL_DEFAULT_PATH", _LOG_FILE_DIR), 'log')
22
+
23
+ DEFAULT_FILEHANDLER_FORMAT = '[%(levelname)s] %(asctime)s ' \
24
+ '[%(pathname)s:%(lineno)d] %(funcName)s: %(message)s'
25
+ DEFAULT_STDOUT_FORMAT = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
26
+ DEFAULT_REDIRECT_FILE_NAME = 'mindspore.log'
27
+
28
+
29
+ class StreamRedirector:
30
+ """Stream Re-director for Log."""
31
+
32
+ def __init__(self, source_stream, target_stream):
33
+ """Redirects the source stream to the target stream.
34
+
35
+ Args:
36
+ source_stream: Source stream.
37
+ target_stream: Target stream.
38
+ """
39
+ super(StreamRedirector, self).__init__()
40
+
41
+ self.source_stream = source_stream
42
+ self.target_stream = target_stream
43
+
44
+ self.save_source_stream_fd = os.dup(self.source_stream.fileno())
45
+
46
+ def __call__(self, func):
47
+
48
+ @wraps(func)
49
+ def wrapper(*args, **kwargs):
50
+ self.start()
51
+ func(*args, **kwargs)
52
+ self.stop()
53
+
54
+ return wrapper
55
+
56
+ def __enter__(self):
57
+ self.start()
58
+
59
+ def __exit__(self, exc_type, exc_val, exc_tb):
60
+ self.stop()
61
+
62
+ def start(self):
63
+ """start."""
64
+ self.source_stream.flush()
65
+ os.dup2(self.target_stream.fileno(), self.source_stream.fileno())
66
+
67
+ def stop(self):
68
+ """stop."""
69
+ self.source_stream.flush()
70
+ os.dup2(self.save_source_stream_fd, self.source_stream.fileno())
71
+ self.target_stream.flush()
72
+
73
+
74
+ def validate_nodes_devices_input(var_name: str, var):
75
+ """Check the list of nodes or devices.
76
+
77
+ Args:
78
+ var_name (str): Variable name.
79
+ var: The name of the variable to be checked.
80
+
81
+ Returns:
82
+ None
83
+ """
84
+ if not (var is None or isinstance(var, (list, tuple, dict))):
85
+ raise TypeError('The value of {} can be None or a value of type tuple, ' 'list, or dict.'.format(var_name))
86
+ if isinstance(var, (list, tuple)):
87
+ for item in var:
88
+ if not isinstance(item, int):
89
+ raise TypeError('The elements of a variable of type list or ' 'tuple must be of type int.')
90
+
91
+
92
+ def validate_level(var_name: str, var):
93
+ """Verify that the log level is correct.
94
+
95
+ Args:
96
+ var_name (str): Variable name.
97
+ var: The name of variable to be checked.
98
+
99
+ Returns:
100
+ None
101
+ """
102
+ if not isinstance(var, str):
103
+ raise TypeError('The format of {} must be of type str.'.format(var_name))
104
+ if var not in LEVEL:
105
+ raise ValueError('{}={} needs to be in {}'.format(var_name, var, LEVEL))
106
+
107
+
108
+ def validate_std_input_format(to_std: bool, stdout_nodes: Union[List, Tuple, None],
109
+ stdout_devices: Union[List, Tuple, None], stdout_level: str):
110
+ """Validate the input about stdout of the get_logger function."""
111
+
112
+ if not isinstance(to_std, bool):
113
+ raise TypeError('The format of the to_std must be of type bool.')
114
+
115
+ validate_nodes_devices_input('stdout_nodes', stdout_nodes)
116
+ validate_nodes_devices_input('stdout_devices', stdout_devices)
117
+ validate_level('stdout_level', stdout_level)
118
+
119
+
120
+ def validate_file_input_format(file_level: Union[List[str], Tuple[str]], file_save_dir: str, append_rank_dir: str,
121
+ file_name: Union[List[str], Tuple[str]]):
122
+ """Validate the input about file of the get_logger function."""
123
+
124
+ if not isinstance(file_level, (tuple, list)):
125
+ raise TypeError('The value of file_level should be list or a tuple.')
126
+ for level in file_level:
127
+ validate_level('level in file_level', level)
128
+
129
+ if not len(file_level) == len(file_name):
130
+ raise ValueError('The length of file_level and file_name should be equal.')
131
+
132
+ if not isinstance(file_save_dir, str):
133
+ raise TypeError('The value of file_save_dir should be a value of type str.')
134
+
135
+ if not isinstance(append_rank_dir, bool):
136
+ raise TypeError('The value of append_rank_dir should be a value of type bool.')
137
+
138
+ if not isinstance(file_name, (tuple, list)):
139
+ raise TypeError('The value of file_name should be list or a tuple.')
140
+ for name in file_name:
141
+ if not isinstance(name, str):
142
+ raise TypeError('The value of name in file_name should be a value of type str.')
143
+
144
+
145
+ def _convert_level(level: str) -> int:
146
+ """Convert the format of the log to logging level.
147
+
148
+ Args:
149
+ level (str): User log level.
150
+
151
+ Returns:
152
+ level (str): Logging level.
153
+ """
154
+ level_convert = {
155
+ 'DEBUG': logging.DEBUG,
156
+ 'INFO': logging.INFO,
157
+ 'WARNING': logging.WARNING,
158
+ 'ERROR': logging.ERROR,
159
+ 'CRITICAL': logging.CRITICAL
160
+ }
161
+ level = level_convert.get(level, logging.INFO)
162
+
163
+ return level
164
+
165
+
166
+ def get_logger(logger_name: str = 'uvr-onnx', **kwargs) -> logging.Logger:
167
+ """Get the logger. Both computing centers and bare metal servers are
168
+ available.
169
+
170
+ Args:
171
+ logger_name (str): Logger name.
172
+ kwargs (dict): Other input.
173
+ to_std (bool): If set to True, output the log to stdout.
174
+ stdout_level (str): The level of the log output to stdout.
175
+ If the type is str, the options are DEBUG, INFO, WARNING, ERROR, CRITICAL.
176
+ stdout_format (str): Log format.
177
+ file_level (list[str] or tuple[str]): The level of the log output to file.
178
+ eg: ['INFO', 'ERROR'] Indicates that the logger will output logs above
179
+ the level INFO and ERROR in the list to the corresponding file.
180
+ The length of the list needs to be the same as the length of file_name.
181
+ file_save_dir (str): The folder where the log files are stored.
182
+ append_rank_dir (bool): Whether to add a folder with the format rank{}.
183
+ file_name (list[str] or list[tuple]): Store a list of output file names.
184
+ max_file_size (int): The maximum size of a single log file. Unit: MB.
185
+ max_num_of_files (int): The maximum number of files to save.
186
+
187
+ Returns:
188
+ logger (logging.Logger): Logger.
189
+ """
190
+ mf_logger = logging.getLogger(logger_name)
191
+ if logger_name in logger_list:
192
+ return mf_logger
193
+
194
+ to_std = kwargs.get('to_std', True)
195
+ stdout_nodes = kwargs.get('stdout_nodes', None)
196
+
197
+ def get_stdout_devices():
198
+ if os.getenv("STDOUT_DEVICES"):
199
+ devices = os.getenv("STDOUT_DEVICES")
200
+ if devices.startswith(("(", "[")) and devices.endswith((")", "]")):
201
+ devices = devices[1:-1]
202
+ devices = tuple(map(lambda x: int(x.strip()), devices.split(",")))
203
+ else:
204
+ devices = kwargs.get('stdout_devices', None)
205
+ return devices
206
+
207
+ stdout_devices = get_stdout_devices()
208
+ stdout_level = kwargs.get('stdout_level', 'INFO')
209
+ stdout_format = kwargs.get('stdout_format', '')
210
+ file_level = kwargs.get('file_level', ('INFO', 'ERROR'))
211
+ file_save_dir = kwargs.get('file_save_dir', '')
212
+ append_rank_dir = kwargs.get('append_rank_dir', True)
213
+ file_name = kwargs.get('file_name', (f'info.log', 'error.log'))
214
+ max_file_size = kwargs.get('max_file_size', 50)
215
+ max_num_of_files = kwargs.get('max_num_of_files', 5)
216
+
217
+ validate_std_input_format(to_std, stdout_nodes, stdout_devices, stdout_level)
218
+ validate_file_input_format(file_level, file_save_dir, append_rank_dir, file_name)
219
+
220
+ if to_std:
221
+ if not stdout_format:
222
+ stdout_format = DEFAULT_STDOUT_FORMAT
223
+ stream_handler = logging.StreamHandler(sys.stdout)
224
+ stream_handler.setLevel(_convert_level(stdout_level))
225
+ stream_formatter = logging.Formatter(stdout_format)
226
+ stream_handler.setFormatter(stream_formatter)
227
+ mf_logger.addHandler(stream_handler)
228
+
229
+ logging_level = []
230
+ for level in file_level:
231
+ logging_level.append(_convert_level(level))
232
+
233
+ if not file_save_dir:
234
+ file_save_dir = LOCAL_DEFAULT_LOG_FILE_DIR
235
+
236
+ file_path = []
237
+ for name in file_name:
238
+ path = os.path.join(file_save_dir, name)
239
+ path = os.path.realpath(path)
240
+ base_dir = os.path.dirname(path)
241
+ if not os.path.exists(base_dir):
242
+ os.makedirs(base_dir, exist_ok=True)
243
+ file_path.append(path)
244
+
245
+ max_file_size = max_file_size * 1024 * 1024
246
+
247
+ file_formatter = logging.Formatter(DEFAULT_FILEHANDLER_FORMAT)
248
+ for i, level in enumerate(file_level):
249
+ file_handler = logging.handlers.RotatingFileHandler(filename=file_path[i],
250
+ maxBytes=max_file_size,
251
+ backupCount=max_num_of_files)
252
+ file_handler.setLevel(level)
253
+ file_handler.setFormatter(file_formatter)
254
+ mf_logger.addHandler(file_handler)
255
+
256
+ mf_logger.setLevel(_convert_level('INFO'))
257
+
258
+ mf_logger.propagate = False
259
+
260
+ logger_list.append(logger_name)
261
+
262
+ return mf_logger
263
+
264
+
265
+ class _LogActionOnce:
266
+ """
267
+ A wrapper for modify the warning logging to an empty function. This is used when we want to only log
268
+ once to avoid the repeated logging.
269
+
270
+ Args:
271
+ logger (logging): The logger object.
272
+
273
+ """
274
+ is_logged = dict()
275
+
276
+ def __init__(self, m_logger, key, no_warning=False):
277
+ self.logger = m_logger
278
+ self.key = key
279
+ self.no_warning = no_warning
280
+
281
+ def __call__(self, func):
282
+ def wrapper(*args, **kwargs):
283
+ if not hasattr(self.logger, 'warning'):
284
+ return func(*args, **kwargs)
285
+
286
+ old_func = self.logger.warning
287
+ if self.no_warning or self.key in _LogActionOnce.is_logged:
288
+ self.logger.warning = lambda x: x
289
+ else:
290
+ _LogActionOnce.is_logged[self.key] = True
291
+ res = func(*args, **kwargs)
292
+ if hasattr(self.logger, 'warning'):
293
+ self.logger.warning = old_func
294
+ return res
295
+
296
+ return wrapper
297
+
298
+
299
+ logger = get_logger()
uvronnx/src/utils/spec_utils.py ADDED
@@ -0,0 +1,388 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding:utf-8 -*-
2
+ # @FileName :spec_utils.py
3
+ # @Time :2023/8/2 17:16
4
+ # @Author :lovemefan
5
+ # @Email :[email protected]
6
+ import os, librosa
7
+ import numpy as np
8
+ import json, math, hashlib
9
+
10
+
11
+ def crop_center(h1, h2):
12
+ h1_shape = h1.size()
13
+ h2_shape = h2.size()
14
+
15
+ if h1_shape[3] == h2_shape[3]:
16
+ return h1
17
+ elif h1_shape[3] < h2_shape[3]:
18
+ raise ValueError('h1_shape[3] must be greater than h2_shape[3]')
19
+
20
+ s_time = (h1_shape[3] - h2_shape[3]) // 2
21
+ e_time = s_time + h2_shape[3]
22
+ h1 = h1[:, :, :, s_time:e_time]
23
+
24
+ return h1
25
+
26
+
27
+ def wave_to_spectrogram(wave, hop_length, n_fft, mid_side=False, mid_side_b2=False, reverse=False):
28
+ if reverse:
29
+ wave_left = np.flip(np.asfortranarray(wave[0]))
30
+ wave_right = np.flip(np.asfortranarray(wave[1]))
31
+ elif mid_side:
32
+ wave_left = np.asfortranarray(np.add(wave[0], wave[1]) / 2)
33
+ wave_right = np.asfortranarray(np.subtract(wave[0], wave[1]))
34
+ elif mid_side_b2:
35
+ wave_left = np.asfortranarray(np.add(wave[1], wave[0] * .5))
36
+ wave_right = np.asfortranarray(np.subtract(wave[0], wave[1] * .5))
37
+ else:
38
+ wave_left = np.asfortranarray(wave[0])
39
+ wave_right = np.asfortranarray(wave[1])
40
+
41
+ spec_left = librosa.stft(wave_left, n_fft, hop_length=hop_length)
42
+ spec_right = librosa.stft(wave_right, n_fft, hop_length=hop_length)
43
+
44
+ spec = np.asfortranarray([spec_left, spec_right])
45
+
46
+ return spec
47
+
48
+
49
+ def wave_to_spectrogram_mt(wave, hop_length, n_fft, mid_side=False, mid_side_b2=False, reverse=False):
50
+ import threading
51
+
52
+ if reverse:
53
+ wave_left = np.flip(np.asfortranarray(wave[0]))
54
+ wave_right = np.flip(np.asfortranarray(wave[1]))
55
+ elif mid_side:
56
+ wave_left = np.asfortranarray(np.add(wave[0], wave[1]) / 2)
57
+ wave_right = np.asfortranarray(np.subtract(wave[0], wave[1]))
58
+ elif mid_side_b2:
59
+ wave_left = np.asfortranarray(np.add(wave[1], wave[0] * .5))
60
+ wave_right = np.asfortranarray(np.subtract(wave[0], wave[1] * .5))
61
+ else:
62
+ wave_left = np.asfortranarray(wave[0])
63
+ wave_right = np.asfortranarray(wave[1])
64
+
65
+ def run_thread(**kwargs):
66
+ global spec_left
67
+ spec_left = librosa.stft(**kwargs)
68
+
69
+ thread = threading.Thread(target=run_thread, kwargs={'y': wave_left, 'n_fft': n_fft, 'hop_length': hop_length})
70
+ thread.start()
71
+ spec_right = librosa.stft(wave_right, n_fft=n_fft, hop_length=hop_length)
72
+ thread.join()
73
+
74
+ spec = np.asfortranarray([spec_left, spec_right])
75
+
76
+ return spec
77
+
78
+
79
+ def combine_spectrograms(specs, param):
80
+ l = min([specs[i].shape[2] for i in specs])
81
+ spec_c = np.zeros(shape=(2, param['bins'] + 1, l), dtype=np.complex64)
82
+ offset = 0
83
+ bands_n = len(param['band'])
84
+
85
+ for d in range(1, bands_n + 1):
86
+ h = param['band'][d]['crop_stop'] - param['band'][d]['crop_start']
87
+ spec_c[:, offset:offset + h, :l] = specs[d][:,
88
+ param['band'][d]['crop_start']:param['band'][d]['crop_stop'], :l]
89
+ offset += h
90
+
91
+ if offset > param['bins']:
92
+ raise ValueError('Too much bins')
93
+
94
+ # lowpass fiter
95
+ if param['pre_filter_start'] > 0: # and mp.param['band'][bands_n]['res_type'] in ['scipy', 'polyphase']:
96
+ if bands_n == 1:
97
+ spec_c = fft_lp_filter(spec_c, param['pre_filter_start'], param['pre_filter_stop'])
98
+ else:
99
+ gp = 1
100
+ for b in range(param['pre_filter_start'] + 1, param['pre_filter_stop']):
101
+ g = math.pow(10, -(b - param['pre_filter_start']) * (3.5 - gp) / 20.0)
102
+ gp = g
103
+ spec_c[:, b, :] *= g
104
+
105
+ return np.asfortranarray(spec_c)
106
+
107
+
108
+ def spectrogram_to_image(spec, mode='magnitude'):
109
+ if mode == 'magnitude':
110
+ if np.iscomplexobj(spec):
111
+ y = np.abs(spec)
112
+ else:
113
+ y = spec
114
+ y = np.log10(y ** 2 + 1e-8)
115
+ elif mode == 'phase':
116
+ if np.iscomplexobj(spec):
117
+ y = np.angle(spec)
118
+ else:
119
+ y = spec
120
+
121
+ y -= y.min()
122
+ y *= 255 / y.max()
123
+ img = np.uint8(y)
124
+
125
+ if y.ndim == 3:
126
+ img = img.transpose(1, 2, 0)
127
+ img = np.concatenate([
128
+ np.max(img, axis=2, keepdims=True), img
129
+ ], axis=2)
130
+
131
+ return img
132
+
133
+
134
+ def reduce_vocal_aggressively(X, y, softmask):
135
+ v = X - y
136
+ y_mag_tmp = np.abs(y)
137
+ v_mag_tmp = np.abs(v)
138
+
139
+ v_mask = v_mag_tmp > y_mag_tmp
140
+ y_mag = np.clip(y_mag_tmp - v_mag_tmp * v_mask * softmask, 0, np.inf)
141
+
142
+ return y_mag * np.exp(1.j * np.angle(y))
143
+
144
+
145
+ def mask_silence(mag, ref, thres=0.2, min_range=64, fade_size=32):
146
+ if min_range < fade_size * 2:
147
+ raise ValueError('min_range must be >= fade_area * 2')
148
+
149
+ mag = mag.copy()
150
+
151
+ idx = np.where(ref.mean(axis=(0, 1)) < thres)[0]
152
+ starts = np.insert(idx[np.where(np.diff(idx) != 1)[0] + 1], 0, idx[0])
153
+ ends = np.append(idx[np.where(np.diff(idx) != 1)[0]], idx[-1])
154
+ uninformative = np.where(ends - starts > min_range)[0]
155
+ if len(uninformative) > 0:
156
+ starts = starts[uninformative]
157
+ ends = ends[uninformative]
158
+ old_e = None
159
+ for s, e in zip(starts, ends):
160
+ if old_e is not None and s - old_e < fade_size:
161
+ s = old_e - fade_size * 2
162
+
163
+ if s != 0:
164
+ weight = np.linspace(0, 1, fade_size)
165
+ mag[:, :, s:s + fade_size] += weight * ref[:, :, s:s + fade_size]
166
+ else:
167
+ s -= fade_size
168
+
169
+ if e != mag.shape[2]:
170
+ weight = np.linspace(1, 0, fade_size)
171
+ mag[:, :, e - fade_size:e] += weight * ref[:, :, e - fade_size:e]
172
+ else:
173
+ e += fade_size
174
+
175
+ mag[:, :, s + fade_size:e - fade_size] += ref[:, :, s + fade_size:e - fade_size]
176
+ old_e = e
177
+
178
+ return mag
179
+
180
+
181
+ def align_wave_head_and_tail(a, b):
182
+ l = min([a[0].size, b[0].size])
183
+
184
+ return a[:l, :l], b[:l, :l]
185
+
186
+
187
+ def cache_or_load(mix_path, inst_path, mp):
188
+ mix_basename = os.path.splitext(os.path.basename(mix_path))[0]
189
+ inst_basename = os.path.splitext(os.path.basename(inst_path))[0]
190
+
191
+ cache_dir = 'mph{}'.format(hashlib.sha1(json.dumps(mp.param, sort_keys=True).encode('utf-8')).hexdigest())
192
+ mix_cache_dir = os.path.join('cache', cache_dir)
193
+ inst_cache_dir = os.path.join('cache', cache_dir)
194
+
195
+ os.makedirs(mix_cache_dir, exist_ok=True)
196
+ os.makedirs(inst_cache_dir, exist_ok=True)
197
+
198
+ mix_cache_path = os.path.join(mix_cache_dir, mix_basename + '.npy')
199
+ inst_cache_path = os.path.join(inst_cache_dir, inst_basename + '.npy')
200
+
201
+ if os.path.exists(mix_cache_path) and os.path.exists(inst_cache_path):
202
+ X_spec_m = np.load(mix_cache_path)
203
+ y_spec_m = np.load(inst_cache_path)
204
+ else:
205
+ X_wave, y_wave, X_spec_s, y_spec_s = {}, {}, {}, {}
206
+
207
+ for d in range(len(mp.param['band']), 0, -1):
208
+ bp = mp.param['band'][d]
209
+
210
+ if d == len(mp.param['band']): # high-end band
211
+ X_wave[d], _ = librosa.load(
212
+ mix_path, bp['sr'], False, dtype=np.float32, res_type=bp['res_type'])
213
+ y_wave[d], _ = librosa.load(
214
+ inst_path, bp['sr'], False, dtype=np.float32, res_type=bp['res_type'])
215
+ else: # lower bands
216
+ X_wave[d] = librosa.resample(X_wave[d + 1], mp.param['band'][d + 1]['sr'], bp['sr'],
217
+ res_type=bp['res_type'])
218
+ y_wave[d] = librosa.resample(y_wave[d + 1], mp.param['band'][d + 1]['sr'], bp['sr'],
219
+ res_type=bp['res_type'])
220
+
221
+ X_wave[d], y_wave[d] = align_wave_head_and_tail(X_wave[d], y_wave[d])
222
+
223
+ X_spec_s[d] = wave_to_spectrogram(X_wave[d], bp['hl'], bp['n_fft'], mp.param['mid_side'],
224
+ mp.param['mid_side_b2'], mp.param['reverse'])
225
+ y_spec_s[d] = wave_to_spectrogram(y_wave[d], bp['hl'], bp['n_fft'], mp.param['mid_side'],
226
+ mp.param['mid_side_b2'], mp.param['reverse'])
227
+
228
+ del X_wave, y_wave
229
+
230
+ X_spec_m = combine_spectrograms(X_spec_s, mp)
231
+ y_spec_m = combine_spectrograms(y_spec_s, mp)
232
+
233
+ if X_spec_m.shape != y_spec_m.shape:
234
+ raise ValueError('The combined spectrograms are different: ' + mix_path)
235
+
236
+ _, ext = os.path.splitext(mix_path)
237
+
238
+ np.save(mix_cache_path, X_spec_m)
239
+ np.save(inst_cache_path, y_spec_m)
240
+
241
+ return X_spec_m, y_spec_m
242
+
243
+
244
+ def spectrogram_to_wave(spec, hop_length, mid_side, mid_side_b2, reverse):
245
+ spec_left = np.asfortranarray(spec[0])
246
+ spec_right = np.asfortranarray(spec[1])
247
+
248
+ wave_left = librosa.istft(spec_left, hop_length=hop_length)
249
+ wave_right = librosa.istft(spec_right, hop_length=hop_length)
250
+
251
+ if reverse:
252
+ return np.asfortranarray([np.flip(wave_left), np.flip(wave_right)])
253
+ elif mid_side:
254
+ return np.asfortranarray([np.add(wave_left, wave_right / 2), np.subtract(wave_left, wave_right / 2)])
255
+ elif mid_side_b2:
256
+ return np.asfortranarray(
257
+ [np.add(wave_right / 1.25, .4 * wave_left), np.subtract(wave_left / 1.25, .4 * wave_right)])
258
+ else:
259
+ return np.asfortranarray([wave_left, wave_right])
260
+
261
+
262
+ def cmb_spectrogram_to_wave(spec_m, param, extra_bins_h=None, extra_bins=None):
263
+ wave_band = {}
264
+ bands_n = len(param['band'])
265
+ offset = 0
266
+
267
+ for d in range(1, bands_n + 1):
268
+ bp = param['band'][d]
269
+ spec_s = np.ndarray(shape=(2, bp['n_fft'] // 2 + 1, spec_m.shape[2]), dtype=complex)
270
+ h = bp['crop_stop'] - bp['crop_start']
271
+ spec_s[:, bp['crop_start']:bp['crop_stop'], :] = spec_m[:, offset:offset + h, :]
272
+
273
+ offset += h
274
+ if d == bands_n: # higher
275
+ if extra_bins_h: # if --high_end_process bypass
276
+ max_bin = bp['n_fft'] // 2
277
+ spec_s[:, max_bin - extra_bins_h:max_bin, :] = extra_bins[:, :extra_bins_h, :]
278
+ if bp['hpf_start'] > 0:
279
+ spec_s = fft_hp_filter(spec_s, bp['hpf_start'], bp['hpf_stop'] - 1)
280
+ if bands_n == 1:
281
+ wave = spectrogram_to_wave(spec_s, bp['hl'], param['mid_side'], param['mid_side_b2'],
282
+ param['reverse'])
283
+ else:
284
+ wave = np.add(wave, spectrogram_to_wave(spec_s, bp['hl'], param['mid_side'], param['mid_side_b2'],
285
+ param['reverse']))
286
+ else:
287
+ sr = param['band'][d + 1]['sr']
288
+ if d == 1: # lower
289
+ spec_s = fft_lp_filter(spec_s, bp['lpf_start'], bp['lpf_stop'])
290
+ wave = librosa.resample(
291
+ spectrogram_to_wave(spec_s, bp['hl'], param['mid_side'], param['mid_side_b2'],
292
+ param['reverse']), orig_sr=bp['sr'], target_sr=sr, res_type="sinc_fastest")
293
+ else: # mid
294
+ spec_s = fft_hp_filter(spec_s, bp['hpf_start'], bp['hpf_stop'] - 1)
295
+ spec_s = fft_lp_filter(spec_s, bp['lpf_start'], bp['lpf_stop'])
296
+ wave2 = np.add(wave,
297
+ spectrogram_to_wave(spec_s, bp['hl'], param['mid_side'], param['mid_side_b2'],
298
+ param['reverse']))
299
+ # wave = librosa.core.resample(wave2, bp['sr'], sr, res_type="sinc_fastest")
300
+ wave = librosa.core.resample(wave2, orig_sr=bp['sr'], target_sr=sr, res_type='scipy')
301
+
302
+ return wave.T
303
+
304
+
305
+ def fft_lp_filter(spec, bin_start, bin_stop):
306
+ g = 1.0
307
+ for b in range(bin_start, bin_stop):
308
+ g -= 1 / (bin_stop - bin_start)
309
+ spec[:, b, :] = g * spec[:, b, :]
310
+
311
+ spec[:, bin_stop:, :] *= 0
312
+
313
+ return spec
314
+
315
+
316
+ def fft_hp_filter(spec, bin_start, bin_stop):
317
+ g = 1.0
318
+ for b in range(bin_start, bin_stop, -1):
319
+ g -= 1 / (bin_start - bin_stop)
320
+ spec[:, b, :] = g * spec[:, b, :]
321
+
322
+ spec[:, 0:bin_stop + 1, :] *= 0
323
+
324
+ return spec
325
+
326
+
327
+ def mirroring(a, spec_m, input_high_end, param):
328
+ if 'mirroring' == a:
329
+ mirror = np.flip(np.abs(
330
+ spec_m[:, param['pre_filter_start'] - 10 - input_high_end.shape[1]:param['pre_filter_start'] - 10,
331
+ :]), 1)
332
+ mirror = mirror * np.exp(1.j * np.angle(input_high_end))
333
+
334
+ return np.where(np.abs(input_high_end) <= np.abs(mirror), input_high_end, mirror)
335
+
336
+ if 'mirroring2' == a:
337
+ mirror = np.flip(np.abs(
338
+ spec_m[:, param['pre_filter_start'] - 10 - input_high_end.shape[1]:param['pre_filter_start'] - 10,
339
+ :]), 1)
340
+ mi = np.multiply(mirror, input_high_end * 1.7)
341
+
342
+ return np.where(np.abs(input_high_end) <= np.abs(mi), input_high_end, mi)
343
+
344
+
345
+ def ensembling(a, specs):
346
+ for i in range(1, len(specs)):
347
+ if i == 1:
348
+ spec = specs[0]
349
+
350
+ ln = min([spec.shape[2], specs[i].shape[2]])
351
+ spec = spec[:, :, :ln]
352
+ specs[i] = specs[i][:, :, :ln]
353
+
354
+ if 'min_mag' == a:
355
+ spec = np.where(np.abs(specs[i]) <= np.abs(spec), specs[i], spec)
356
+ if 'max_mag' == a:
357
+ spec = np.where(np.abs(specs[i]) >= np.abs(spec), specs[i], spec)
358
+
359
+ return spec
360
+
361
+
362
+ def stft(wave, nfft, hl):
363
+ wave_left = np.asfortranarray(wave[0])
364
+ wave_right = np.asfortranarray(wave[1])
365
+ spec_left = librosa.stft(wave_left, nfft, hop_length=hl)
366
+ spec_right = librosa.stft(wave_right, nfft, hop_length=hl)
367
+ spec = np.asfortranarray([spec_left, spec_right])
368
+
369
+ return spec
370
+
371
+
372
+ def istft(spec, hl):
373
+ spec_left = np.asfortranarray(spec[0])
374
+ spec_right = np.asfortranarray(spec[1])
375
+
376
+ wave_left = librosa.istft(spec_left, hop_length=hl)
377
+ wave_right = librosa.istft(spec_right, hop_length=hl)
378
+ wave = np.asfortranarray([wave_left, wave_right])
379
+
380
+
381
+ def make_padding(width, cropsize, offset):
382
+ left = offset
383
+ roi_size = cropsize - left * 2
384
+ if roi_size == 0:
385
+ roi_size = cropsize
386
+ right = roi_size - (width % roi_size) + left
387
+ return left, right, roi_size
388
+
uvronnx/src/uvr.py ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding:utf-8 -*-
2
+ # @FileName :uvr.py
3
+ # @Time :2023/8/2 10:47
4
+ # @Author :lovemefan
5
+ # @Email :[email protected]
6
+ import os.path
7
+
8
+ import numpy as np
9
+ import librosa
10
+ from tqdm import tqdm
11
+
12
+ from uvronnx.src.config import UVR_CONFIG
13
+ from uvronnx.src.ortInferSession import UVROrtInferSession
14
+ from uvronnx.src.utils import spec_utils
15
+ from uvronnx.src.utils.AudioHelper import AudioReader
16
+ from uvronnx.src.utils.spec_utils import make_padding
17
+
18
+
19
+ class UVRModel:
20
+ def __init__(self, model_path=None):
21
+ project_dir = os.path.dirname(os.path.dirname(__file__))
22
+ model_path = model_path or os.path.join(project_dir, 'onnx/uvr-sim.onnx')
23
+ assert os.path.exists(model_path), f"{model_path} is not exist"
24
+
25
+ self.model = UVROrtInferSession({
26
+ 'model_path': model_path,
27
+ 'use_cuda': False
28
+ })
29
+ self.offset = 128
30
+ self.window_size = 512
31
+
32
+ def preprocess(x_spec):
33
+ x_mag = np.abs(x_spec)
34
+ x_phase = np.angle(x_spec)
35
+
36
+ return x_mag, x_phase
37
+
38
+ def separate_offline(self, mixed_audio, sample_rate=44100):
39
+ if isinstance(mixed_audio, str):
40
+ mixed_audio, sample_rate = AudioReader.read_wav_file(mixed_audio)
41
+
42
+ x_wave, y_wave, x_spec_s, y_spec_s = {}, {}, {}, {}
43
+ bands_n = len(UVR_CONFIG['band'])
44
+ for d in range(bands_n, 0, -1):
45
+ bp = UVR_CONFIG['band'][d]
46
+ if d == bands_n: # high-end band
47
+ x_wave[d] = mixed_audio
48
+ if x_wave[d].ndim == 1:
49
+ x_wave[d] = np.asfortranarray([x_wave[d], x_wave[d]])
50
+ else: # lower bands
51
+ x_wave[d] = librosa.core.resample(x_wave[d + 1], orig_sr=UVR_CONFIG['band'][d + 1]['sr'], target_sr=bp['sr'],
52
+ res_type=bp['res_type'])
53
+ # Stft of wave source
54
+ x_spec_s[d] = spec_utils.wave_to_spectrogram_mt(x_wave[d], bp['hl'], bp['n_fft'], UVR_CONFIG['mid_side'],
55
+ UVR_CONFIG['mid_side_b2'], UVR_CONFIG['reverse'])
56
+ # pdb.set_trace()
57
+ if d == bands_n:
58
+ input_high_end_h = (bp['n_fft'] // 2 - bp['crop_stop']) + (
59
+ UVR_CONFIG['pre_filter_stop'] - UVR_CONFIG['pre_filter_start'])
60
+ input_high_end = x_spec_s[d][:, bp['n_fft'] // 2 - input_high_end_h:bp['n_fft'] // 2, :]
61
+
62
+ x_spec_m = spec_utils.combine_spectrograms(x_spec_s, UVR_CONFIG)
63
+
64
+ def preprocess(x_spec):
65
+ x_mag = np.abs(x_spec)
66
+ x_phase = np.angle(x_spec)
67
+ return x_mag, x_phase
68
+
69
+ x_mag, x_phase = preprocess(x_spec_m)
70
+
71
+ coef = x_mag.max()
72
+ x_mag_pre = x_mag / coef
73
+
74
+ n_frame = x_mag_pre.shape[2]
75
+ pad_l, pad_r, roi_size = make_padding(n_frame,
76
+ self.window_size, self.offset)
77
+ n_window = int(np.ceil(n_frame / roi_size))
78
+
79
+ x_mag_pad = np.pad(
80
+ x_mag_pre, ((0, 0), (0, 0), (pad_l, pad_r)), mode='constant')
81
+
82
+ preds = []
83
+
84
+ iterations = [n_window]
85
+
86
+ total_iterations = sum(iterations)
87
+ for i in tqdm(range(n_window)):
88
+ start = i * roi_size
89
+ x_mag_window = x_mag_pad[None, :, :, start:start + self.window_size]
90
+ # if (is_half == True): x_mag_window = x_mag_window.half()
91
+
92
+ h = self.model(x_mag_window)
93
+ pred = h[:, :, :, self.offset:-self.offset]
94
+ assert pred.shape[3] > 0
95
+
96
+ preds.append(pred[0])
97
+
98
+ pred = np.concatenate(preds, axis=2)
99
+ pred = pred[:, :, :n_frame]
100
+ pred, x_mag, x_phase = pred * coef, x_mag, np.exp(1.j * x_phase)
101
+
102
+ y_spec_m = pred * x_phase
103
+ v_spec_m = x_spec_m - y_spec_m
104
+
105
+ input_high_end_ = spec_utils.mirroring('mirroring', y_spec_m, input_high_end, UVR_CONFIG)
106
+ wav_instrument = spec_utils.cmb_spectrogram_to_wave(y_spec_m, UVR_CONFIG, input_high_end_h,
107
+ input_high_end_)
108
+ print('instruments done')
109
+
110
+ input_high_end_ = spec_utils.mirroring('mirroring', v_spec_m, input_high_end, UVR_CONFIG)
111
+ wav_vocals = spec_utils.cmb_spectrogram_to_wave(v_spec_m, UVR_CONFIG, input_high_end_h, input_high_end_)
112
+
113
+ return wav_instrument, wav_vocals
114
+
115
+
116
+ if __name__ == '__main__':
117
+ model = UVRModel()
118
+ audio, sample_rate = AudioReader.read_wav_file('/Users/cenglingfan/Downloads/晴天.wav_-4key_fumin.wav')
119
+ instrument, vocal = model.separate_offline(audio, sample_rate)
120
+ print(instrument)
121
+ print(vocal)