lorneluo commited on
Commit
60c7461
·
1 Parent(s): 75cd846

add inst.py

Browse files
Files changed (2) hide show
  1. args.py +419 -0
  2. inst.py +100 -0
args.py ADDED
@@ -0,0 +1,419 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ import yaml
4
+ from addict import Dict
5
+
6
+ from ml_collections import ConfigDict
7
+
8
+ from UVR import DENOISER_MODEL_PATH, DEVERBER_MODEL_PATH, MDX_C_CONFIG_PATH
9
+ from gui_data.constants import DEMUCS_2_SOURCE, DEMUCS_2_SOURCE_MAPPER, VR_ARCH_TYPE, MDX_ARCH_TYPE, DEMUCS_ARCH_TYPE
10
+
11
+ vr_cache_source_mapper = {}
12
+ mdx_cache_source_mapper = {}
13
+ demucs_cache_source_mapper = {}
14
+
15
+
16
+ def cached_source_callback(process_method, model_name=None):
17
+ model, sources = None, None
18
+
19
+ if process_method == VR_ARCH_TYPE:
20
+ mapper = vr_cache_source_mapper
21
+ if process_method == MDX_ARCH_TYPE:
22
+ mapper = mdx_cache_source_mapper
23
+ if process_method == DEMUCS_ARCH_TYPE:
24
+ mapper = demucs_cache_source_mapper
25
+
26
+ for key, value in mapper.items():
27
+ if model_name in key:
28
+ model = key
29
+ sources = value
30
+
31
+ return model, sources
32
+
33
+
34
+ def cached_model_source_holder(self, process_method, sources, model_name=None):
35
+ if process_method == VR_ARCH_TYPE:
36
+ self.vr_cache_source_mapper = {**vr_cache_source_mapper, **{model_name: sources}}
37
+ if process_method == MDX_ARCH_TYPE:
38
+ self.mdx_cache_source_mapper = {**mdx_cache_source_mapper, **{model_name: sources}}
39
+ if process_method == DEMUCS_ARCH_TYPE:
40
+ self.demucs_cache_source_mapper = {**demucs_cache_source_mapper, **{model_name: sources}}
41
+
42
+
43
+ config_path = os.path.join(MDX_C_CONFIG_PATH, 'model_2_stem_full_band_8k.yaml')
44
+ with open(config_path) as f:
45
+ config = ConfigDict(yaml.load(f, Loader=yaml.FullLoader))
46
+
47
+ mdx23c_8kfft_instvoc_hq_model_data = {
48
+ 'DENOISER_MODEL': DENOISER_MODEL_PATH,
49
+ 'DEVERBER_MODEL': DEVERBER_MODEL_PATH,
50
+ 'all_models': [],
51
+ 'bv_model_rebalance': 0,
52
+ 'chunks': 0,
53
+ 'compensate': None,
54
+ 'demucs_4_stem_added_count': 0,
55
+ 'demucs_source_list': [],
56
+ 'demucs_stem_count': 0,
57
+ 'demucs_stems': 'All Stems',
58
+ 'deverb_vocal_opt': 'Vocals',
59
+ 'device_set': 'Default',
60
+ 'ensemble_primary_stem': 'Vocals',
61
+ 'ensemble_secondary_stem': 'Instrumental',
62
+ 'is_4_stem_ensemble': False,
63
+ 'is_bv_model': False,
64
+ 'is_change_def': False,
65
+ 'is_demucs_4_stem_secondaries': False,
66
+ 'is_demucs_combine_stems': True,
67
+ 'is_demucs_pre_proc_model_inst_mix': False,
68
+ 'is_denoise': False,
69
+ 'is_denoise_model': False,
70
+ 'is_deverb_vocals': False,
71
+ 'is_dry_check': False,
72
+ 'is_ensemble_mode': True,
73
+ 'is_get_hash_dir_only': False,
74
+ 'is_gpu_conversion': 0,
75
+ 'is_inst_only_voc_splitter': False,
76
+ 'is_invert_spec': False,
77
+ 'is_karaoke': False,
78
+ 'is_match_frequency_pitch': True,
79
+ 'is_mdx_c': True,
80
+ 'is_mdx_c_seg_def': False,
81
+ 'is_mdx_ckpt': False,
82
+ 'is_mdx_combine_stems': True,
83
+ 'is_mixer_mode': False,
84
+ 'is_multi_stem_ensemble': False,
85
+ 'is_normalization': False,
86
+ 'is_pitch_change': False,
87
+ 'is_pre_proc_model': False,
88
+ 'is_primary_model_primary_stem_only': False,
89
+ 'is_primary_model_secondary_stem_only': False,
90
+ 'is_primary_stem_only': False,
91
+ 'is_save_inst_vocal_splitter': False,
92
+ 'is_save_vocal_only': False,
93
+ 'is_sec_bv_rebalance': False,
94
+ 'is_secondary_model': False,
95
+ 'is_secondary_model_activated': False,
96
+ 'is_secondary_stem_only': False,
97
+ 'is_use_opencl': False,
98
+ 'is_vocal_split_model': False,
99
+ 'is_vocal_split_model_activated': False,
100
+ 'is_vr_51_model': False,
101
+ 'manual_download_Button': None,
102
+ 'margin': 44100,
103
+ 'mdx_batch_size': 1,
104
+ 'mdx_c_configs': config,
105
+ 'mdx_dim_f_set': None,
106
+ 'mdx_dim_t_set': None,
107
+ 'mdx_model_stems': ['Vocals', 'Instrumental'],
108
+ 'mdx_n_fft_scale_set': None,
109
+ 'mdx_segment_size': 256,
110
+ 'mdx_stem_count': 2,
111
+ 'mdxnet_stem_select': 'Vocals',
112
+ 'mixer_path': '/Users/taoluo/Workspace/github/ultimatevocalremovergui/models/MDX_Net_Models/mixer_val.ckpt',
113
+ 'model_and_process_tag': 'MDX-Net: MDX23C-InstVoc HQ',
114
+ 'model_basename': 'MDX23C-8KFFT-InstVoc_HQ',
115
+ 'model_capacity': (32, 128),
116
+ 'model_data': {'config_yaml': 'model_2_stem_full_band_8k.yaml'},
117
+ 'model_hash': '99b6ceaae542265a3b6d657bf9fde79f',
118
+ 'model_hash_dir': '/Users/taoluo/Workspace/github/ultimatevocalremovergui/models/MDX_Net_Models/model_data/99b6ceaae542265a3b6d657bf9fde79f.json',
119
+ 'model_name': 'MDX23C-InstVoc HQ',
120
+ 'model_path': '/Users/taoluo/Workspace/github/ultimatevocalremovergui/models/MDX_Net_Models/MDX23C-8KFFT-InstVoc_HQ.ckpt',
121
+ 'model_samplerate': 44100,
122
+ 'model_status': True,
123
+ 'mp3_bit_set': '320k',
124
+ 'overlap': 0.25,
125
+ 'overlap_mdx': 'Default',
126
+ 'overlap_mdx23': 8,
127
+ 'pre_proc_model': None,
128
+ 'pre_proc_model_activated': False,
129
+ 'primary_model_primary_stem': None,
130
+ 'primary_stem': 'Vocals',
131
+ 'primary_stem_native': None,
132
+ 'process_method': 'MDX-Net',
133
+ 'save_format': 'WAV',
134
+ 'secondary_model': None,
135
+ 'secondary_model_4_stem': [],
136
+ 'secondary_model_4_stem_model_names_list': [],
137
+ 'secondary_model_4_stem_names': [],
138
+ 'secondary_model_4_stem_scale': [],
139
+ 'secondary_model_bass': None,
140
+ 'secondary_model_drums': None,
141
+ 'secondary_model_other': None,
142
+ 'secondary_model_scale': None,
143
+ 'secondary_model_scale_bass': None,
144
+ 'secondary_model_scale_drums': None,
145
+ 'secondary_model_scale_other': None,
146
+ 'secondary_stem': 'Instrumental',
147
+ 'semitone_shift': 0.0,
148
+ 'vocal_split_model': None,
149
+ 'wav_type_set': 'PCM_16'}
150
+
151
+ mdx23c_8kfft_instvoc_hq_process_data = {
152
+ 'model_name': 'MDX23C Model: MDX23C-InstVoc HQ',
153
+ 'model_data': Dict(mdx23c_8kfft_instvoc_hq_model_data),
154
+ 'export_path': '/Users/taoluo/Downloads/Ensembled_Outputs_1702201464',
155
+ 'audio_file_base': '1_test_audio_MDX23C-8KFFT-InstVoc_HQ',
156
+ 'audio_file': '/Users/taoluo/Downloads/test_audio.mp3',
157
+
158
+ 'set_progress_bar': lambda step, inference_iterations=0: print(
159
+ f"iteration {inference_iterations} of step #{step}"),
160
+ 'write_to_console': lambda progress_text, base_text='': print(
161
+ f"{progress_text} {base_text}"),
162
+ 'process_iteration': lambda iteration: iteration + 1,
163
+ 'cached_source_callback': cached_source_callback,
164
+ 'cached_model_source_holder': cached_model_source_holder,
165
+
166
+ 'list_all_models': ['MDX23C-8KFFT-InstVoc_HQ', 'UVR-MDX-NET-Voc_FT',
167
+ 'htdemucs_ft'],
168
+ 'is_ensemble_master': True,
169
+ 'is_4_stem_ensemble': False}
170
+
171
+ uvr_mdx_net_voc_ft_model_data = {
172
+ 'DENOISER_MODEL': DENOISER_MODEL_PATH,
173
+ 'DEVERBER_MODEL': DEVERBER_MODEL_PATH,
174
+ 'all_models': [],
175
+ 'bv_model_rebalance': 0,
176
+ 'chunks': 0,
177
+ 'compensate': 1.021,
178
+ 'demucs_4_stem_added_count': 0,
179
+ 'demucs_source_list': [],
180
+ 'demucs_stem_count': 0,
181
+ 'demucs_stems': 'All Stems',
182
+ 'deverb_vocal_opt': 'Vocals',
183
+ 'device_set': 'Default',
184
+ 'ensemble_primary_stem': 'Vocals',
185
+ 'ensemble_secondary_stem': 'Instrumental',
186
+ 'is_4_stem_ensemble': False,
187
+ 'is_bv_model': False,
188
+ 'is_change_def': False,
189
+ 'is_demucs_4_stem_secondaries': False,
190
+ 'is_demucs_combine_stems': True,
191
+ 'is_demucs_pre_proc_model_inst_mix': False,
192
+ 'is_denoise': False,
193
+ 'is_denoise_model': False,
194
+ 'is_deverb_vocals': False,
195
+ 'is_dry_check': False,
196
+ 'is_ensemble_mode': True,
197
+ 'is_get_hash_dir_only': False,
198
+ 'is_gpu_conversion': 0,
199
+ 'is_inst_only_voc_splitter': False,
200
+ 'is_invert_spec': False,
201
+ 'is_karaoke': False,
202
+ 'is_match_frequency_pitch': True,
203
+ 'is_mdx_c': False,
204
+ 'is_mdx_c_seg_def': False,
205
+ 'is_mdx_ckpt': False,
206
+ 'is_mdx_combine_stems': True,
207
+ 'is_mixer_mode': False,
208
+ 'is_multi_stem_ensemble': False,
209
+ 'is_normalization': False,
210
+ 'is_pitch_change': False,
211
+ 'is_pre_proc_model': False,
212
+ 'is_primary_model_primary_stem_only': False,
213
+ 'is_primary_model_secondary_stem_only': False,
214
+ 'is_primary_stem_only': False,
215
+ 'is_save_inst_vocal_splitter': False,
216
+ 'is_save_vocal_only': False,
217
+ 'is_sec_bv_rebalance': False,
218
+ 'is_secondary_model': False,
219
+ 'is_secondary_model_activated': False,
220
+ 'is_secondary_stem_only': False,
221
+ 'is_use_opencl': False,
222
+ 'is_vocal_split_model': False,
223
+ 'is_vocal_split_model_activated': False,
224
+ 'is_vr_51_model': False,
225
+ 'manual_download_Button': None,
226
+ 'margin': 44100,
227
+ 'mdx_batch_size': 1,
228
+ 'window_size': 512,
229
+ 'mdx_c_configs': None,
230
+ 'mdx_dim_f_set': 3072,
231
+ 'mdx_dim_t_set': 8,
232
+ 'mdx_model_stems': [],
233
+ 'mdx_n_fft_scale_set': 7680,
234
+ 'mdx_segment_size': 256,
235
+ 'mdx_stem_count': 1,
236
+ 'mdxnet_stem_select': 'All Stems',
237
+ 'mixer_path': '/Users/taoluo/Workspace/github/ultimatevocalremovergui/models/MDX_Net_Models/mixer_val.ckpt',
238
+ 'model_and_process_tag': 'MDX-Net: UVR-MDX-NET-Voc_FT',
239
+ 'model_basename': 'UVR-MDX-NET-Voc_FT',
240
+ 'model_capacity': (32, 128),
241
+ 'model_data': {'compensate': 1.021,
242
+ 'mdx_dim_f_set': 3072,
243
+ 'mdx_dim_t_set': 8,
244
+ 'mdx_n_fft_scale_set': 7680,
245
+ 'primary_stem': 'Vocals'},
246
+ 'model_hash': '77d07b2667ddf05b9e3175941b4454a0',
247
+ 'model_hash_dir': '/Users/taoluo/Workspace/github/ultimatevocalremovergui/models/MDX_Net_Models/model_data/77d07b2667ddf05b9e3175941b4454a0.json',
248
+ 'model_name': 'UVR-MDX-NET-Voc_FT',
249
+ 'model_path': '/Users/taoluo/Workspace/github/ultimatevocalremovergui/models/MDX_Net_Models/UVR-MDX-NET-Voc_FT.onnx',
250
+ 'model_samplerate': 44100,
251
+ 'model_status': True,
252
+ 'mp3_bit_set': '320k',
253
+ 'overlap': 0.25,
254
+ 'overlap_mdx': 'Default',
255
+ 'overlap_mdx23': 8,
256
+ 'pre_proc_model': None,
257
+ 'pre_proc_model_activated': False,
258
+ 'primary_model_primary_stem': None,
259
+ 'primary_stem': 'Vocals',
260
+ 'primary_stem_native': 'Vocals',
261
+ 'process_method': 'MDX-Net',
262
+ 'save_format': 'WAV',
263
+ 'secondary_model': None,
264
+ 'secondary_model_4_stem': [],
265
+ 'secondary_model_4_stem_model_names_list': [],
266
+ 'secondary_model_4_stem_names': [],
267
+ 'secondary_model_4_stem_scale': [],
268
+ 'secondary_model_bass': None,
269
+ 'secondary_model_drums': None,
270
+ 'secondary_model_other': None,
271
+ 'secondary_model_scale': None,
272
+ 'secondary_model_scale_bass': None,
273
+ 'secondary_model_scale_drums': None,
274
+ 'secondary_model_scale_other': None,
275
+ 'secondary_stem': 'Instrumental',
276
+ 'semitone_shift': 0.0,
277
+ 'vocal_split_model': None,
278
+ 'wav_type_set': 'PCM_16'}
279
+
280
+ uvr_mdx_net_voc_ft_process_data = {
281
+ 'model_name': 'MDX-Net Model: UVR-MDX-NET Voc FT',
282
+ 'model_data': Dict(uvr_mdx_net_voc_ft_model_data),
283
+ 'export_path': '/Users/taoluo/Downloads/Ensembled_Outputs_1702201464',
284
+ 'audio_file_base': '1_test_audio_UVR-MDX-NET-Voc_FT',
285
+ 'audio_file': '/Users/taoluo/Downloads/test_audio.mp3',
286
+ 'set_progress_bar': lambda step, inference_iterations=0: print(
287
+ f"iteration {inference_iterations} of step #{step}"),
288
+ 'write_to_console': lambda progress_text, base_text='base_text': print(
289
+ f"{progress_text} {base_text}"),
290
+ 'process_iteration': lambda iteration: iteration + 1,
291
+ 'cached_source_callback': cached_source_callback,
292
+ 'cached_model_source_holder': cached_model_source_holder,
293
+ 'list_all_models': ['MDX23C-8KFFT-InstVoc_HQ', 'UVR-MDX-NET-Voc_FT', 'htdemucs_ft'],
294
+ 'is_ensemble_master': True,
295
+ 'is_4_stem_ensemble': False}
296
+
297
+ htdemucs_ft_model_data = {
298
+ 'DENOISER_MODEL': DENOISER_MODEL_PATH,
299
+ 'DEVERBER_MODEL': DEVERBER_MODEL_PATH,
300
+ 'all_models': [],
301
+ 'bv_model_rebalance': 0,
302
+ 'chunks_demucs': 0,
303
+ 'compensate': None,
304
+ 'demucs_4_stem_added_count': 0,
305
+ 'demucs_source_list': DEMUCS_2_SOURCE,
306
+ 'demucs_source_map': DEMUCS_2_SOURCE_MAPPER,
307
+ 'demucs_stem_count': 2,
308
+ 'demucs_stems': 'All Stems',
309
+ 'demucs_version': 'v4',
310
+ 'deverb_vocal_opt': 'Vocals',
311
+ 'device_set': 'Default',
312
+ 'ensemble_primary_stem': 'Vocals',
313
+ 'ensemble_secondary_stem': 'Instrumental',
314
+ 'is_4_stem_ensemble': False,
315
+ 'is_bv_model': False,
316
+ 'is_change_def': False,
317
+ 'is_chunk_demucs': False,
318
+ 'is_demucs_4_stem_secondaries': False,
319
+ 'is_demucs_combine_stems': True,
320
+ 'is_demucs_pre_proc_model_inst_mix': False,
321
+ 'is_denoise': False,
322
+ 'is_denoise_model': False,
323
+ 'is_deverb_vocals': False,
324
+ 'is_dry_check': False,
325
+ 'is_ensemble_mode': True,
326
+ 'is_get_hash_dir_only': False,
327
+ 'is_gpu_conversion': 0,
328
+ 'is_inst_only_voc_splitter': False,
329
+ 'is_invert_spec': False,
330
+ 'is_karaoke': False,
331
+ 'is_match_frequency_pitch': True,
332
+ 'is_mdx_c': False,
333
+ 'is_mdx_c_seg_def': False,
334
+ 'is_mdx_ckpt': False,
335
+ 'is_mdx_combine_stems': True,
336
+ 'is_mixer_mode': False,
337
+ 'is_multi_stem_ensemble': False,
338
+ 'is_normalization': False,
339
+ 'is_pitch_change': False,
340
+ 'is_pre_proc_model': False,
341
+ 'is_primary_model_primary_stem_only': False,
342
+ 'is_primary_model_secondary_stem_only': False,
343
+ 'is_primary_stem_only': False,
344
+ 'is_save_inst_vocal_splitter': False,
345
+ 'is_save_vocal_only': False,
346
+ 'is_sec_bv_rebalance': False,
347
+ 'is_secondary_model': False,
348
+ 'is_secondary_model_activated': False,
349
+ 'is_secondary_stem_only': False,
350
+ 'is_split_mode': True,
351
+ 'is_use_opencl': False,
352
+ 'is_vocal_split_model': False,
353
+ 'is_vocal_split_model_activated': False,
354
+ 'is_vr_51_model': False,
355
+ 'manual_download_Button': None,
356
+ 'margin_demucs': 44100,
357
+ 'mdx_batch_size': 1,
358
+ 'mdx_c_configs': None,
359
+ 'mdx_dim_f_set': None,
360
+ 'mdx_dim_t_set': None,
361
+ 'mdx_model_stems': [],
362
+ 'mdx_n_fft_scale_set': None,
363
+ 'mdx_stem_count': 1,
364
+ 'mdxnet_stem_select': 'All Stems',
365
+ 'mixer_path': '/Users/taoluo/Workspace/github/ultimatevocalremovergui/lib_v5/mixer.ckpt',
366
+ 'model_and_process_tag': 'Demucs: v4 | htdemucs_ft',
367
+ 'model_basename': 'htdemucs_ft',
368
+ 'model_capacity': (32, 128),
369
+ 'model_hash_dir': None,
370
+ 'model_name': 'v4 | htdemucs_ft',
371
+ 'model_path': '/Users/taoluo/Workspace/github/ultimatevocalremovergui/models/Demucs_Models/v3_v4_repo/htdemucs_ft.yaml',
372
+ 'model_samplerate': 44100,
373
+ 'model_status': True,
374
+ 'mp3_bit_set': '320k',
375
+ 'overlap': 0.25,
376
+ 'overlap_mdx': 'Default',
377
+ 'overlap_mdx23': 8,
378
+ 'pre_proc_model': None,
379
+ 'pre_proc_model_activated': False,
380
+ 'primary_model_primary_stem': None,
381
+ 'primary_stem': None,
382
+ 'primary_stem_native': None,
383
+ 'process_method': 'Demucs',
384
+ 'save_format': 'WAV',
385
+ 'secondary_model': None,
386
+ 'secondary_model_4_stem': [],
387
+ 'secondary_model_4_stem_model_names_list': [],
388
+ 'secondary_model_4_stem_names': [],
389
+ 'secondary_model_4_stem_scale': [],
390
+ 'secondary_model_bass': None,
391
+ 'secondary_model_drums': None,
392
+ 'secondary_model_other': None,
393
+ 'secondary_model_scale': None,
394
+ 'secondary_model_scale_bass': None,
395
+ 'secondary_model_scale_drums': None,
396
+ 'secondary_model_scale_other': None,
397
+ 'secondary_stem': None,
398
+ 'segment': 'Default',
399
+ 'semitone_shift': 0.0,
400
+ 'shifts': 2,
401
+ 'vocal_split_model': None,
402
+ 'wav_type_set': 'PCM_16'}
403
+
404
+ htdemucs_ft_process_data = {
405
+ 'model_name': 'Demucs v4: htdemucs_ft',
406
+ 'model_data': Dict(htdemucs_ft_model_data),
407
+ 'export_path': '/Users/taoluo/Downloads/Ensembled_Outputs_1702201464',
408
+ 'audio_file_base': '1_test_audio_htdemucs_ft',
409
+ 'audio_file': '/Users/taoluo/Downloads/test_audio.mp3',
410
+ 'set_progress_bar': lambda step, inference_iterations=0: print(
411
+ f"iteration {inference_iterations} of step #{step}"),
412
+ 'write_to_console': lambda progress_text, base_text='base_text': print(
413
+ f"{progress_text} {base_text}"),
414
+ 'process_iteration': lambda iteration: iteration + 1,
415
+ 'cached_source_callback': cached_source_callback,
416
+ 'cached_model_source_holder': cached_model_source_holder,
417
+ 'list_all_models': ['MDX23C-8KFFT-InstVoc_HQ', 'UVR-MDX-NET-Voc_FT', 'htdemucs_ft'],
418
+ 'is_ensemble_master': True,
419
+ 'is_4_stem_ensemble': False}
inst.py ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import hashlib
2
+ import os
3
+ import shutil
4
+ import sys
5
+ from datetime import datetime
6
+ from pathlib import Path
7
+
8
+ import click
9
+ import yaml
10
+ from ml_collections import ConfigDict
11
+
12
+ from UVR import ModelData, AudioTools, Ensembler, DENOISER_MODEL_PATH, DEVERBER_MODEL_PATH, MDX_MODELS_DIR, \
13
+ MDX_MIXER_PATH, load_model_hash_data, MDX_MODEL_NAME_SELECT, model_hash_table, MDX_HASH_DIR, MDX_C_CONFIG_PATH
14
+ from args import mdx23c_8kfft_instvoc_hq_process_data, htdemucs_ft_process_data, uvr_mdx_net_voc_ft_process_data
15
+ from download import download_model, get_model_file
16
+ from gui_data.constants import VR_ARCH_TYPE, MDX_ARCH_TYPE, DEMUCS_ARCH_TYPE, ENSEMBLE_MODE, TIME_STRETCH, \
17
+ MANUAL_ENSEMBLE, MATCH_INPUTS, ALIGN_INPUTS, ALL_STEMS, DEFAULT, VOCAL_STEM, MP3_BIT_RATES, WAV, DEMUCS_2_SOURCE, \
18
+ DEMUCS_2_SOURCE_MAPPER, INST_STEM, CKPT, ONNX, MDX_POP_NFFT, secondary_stem, PRIMARY_STEM, SECONDARY_STEM
19
+ from lib_v5 import spec_utils
20
+ from separate import (
21
+ SeperateDemucs, SeperateMDX, SeperateMDXC, SeperateVR, # Model-related
22
+ save_format, clear_gpu_cache, # Utility functions
23
+ cuda_available, mps_available, # directml_available,
24
+ )
25
+
26
+
27
+ def run_ensemble_models(audio_path, export_path, format=WAV, clean=True):
28
+ start = datetime.now()
29
+ process_datas = [mdx23c_8kfft_instvoc_hq_process_data, uvr_mdx_net_voc_ft_process_data,
30
+ htdemucs_ft_process_data]
31
+ # download models
32
+ for process_data in process_datas:
33
+ download_model(process_data['model_name'])
34
+
35
+ # create folder
36
+ os.makedirs(export_path, exist_ok=True)
37
+ temp_export_path = os.path.join(export_path, 'uvr5_' + datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
38
+ os.makedirs(temp_export_path, exist_ok=True)
39
+ print(f'temp_export_path', temp_export_path)
40
+
41
+ instrumental_export_paths = []
42
+ vocals_export_paths = []
43
+
44
+ for process_data in process_datas:
45
+ current_model = process_data['model_data']
46
+ audio_file_base = Path(audio_path).stem + '_' + current_model.model_basename
47
+ process_data['export_path'] = temp_export_path
48
+ process_data['audio_file_base'] = audio_file_base
49
+ process_data['audio_file'] = audio_path
50
+
51
+ if current_model.process_method == VR_ARCH_TYPE:
52
+ seperator = SeperateVR(current_model, process_data)
53
+ elif current_model.process_method == MDX_ARCH_TYPE:
54
+ seperator = SeperateMDXC(current_model, process_data) if current_model.is_mdx_c else SeperateMDX(
55
+ current_model, process_data)
56
+ elif current_model.process_method == DEMUCS_ARCH_TYPE:
57
+ seperator = SeperateDemucs(current_model, process_data, vocal_stem_path=(audio_path, audio_file_base))
58
+ else:
59
+ raise Exception(f'model not found')
60
+
61
+ seperator.seperate()
62
+
63
+ instrumental_path = Path(temp_export_path) / f"{audio_file_base}_(Instrumental).{format.lower()}"
64
+ vocals_path = Path(temp_export_path) / f"{audio_file_base}_(Vocals).{format.lower()}"
65
+ instrumental_export_paths.append(str(instrumental_path))
66
+ vocals_export_paths.append(str(vocals_path))
67
+
68
+ # merge each model outputs
69
+ vocals_final_path = Path(export_path) / f"{Path(audio_path).stem}.vocal.{format.lower()}"
70
+ instrumental_final_path = Path(export_path) / f"{Path(audio_path).stem}.instrumental.{format.lower()}"
71
+
72
+ ensemble(vocals_export_paths, vocals_final_path)
73
+ ensemble(instrumental_export_paths, instrumental_final_path)
74
+
75
+ if clean:
76
+ shutil.rmtree(temp_export_path)
77
+ print(f'instrumental_final_path', instrumental_final_path)
78
+ print(f'vocals_final_path', vocals_final_path)
79
+ print(f'Finished in {datetime.now() - start}')
80
+ return instrumental_final_path, vocals_final_path
81
+
82
+
83
+ def ensemble(stem_outputs, stem_save_path, format=WAV):
84
+ algorithm = 'Average'
85
+ is_normalization = True
86
+ spec_utils.ensemble_inputs(stem_outputs, algorithm, is_normalization, 'PCM_16', stem_save_path, is_wave=True)
87
+ save_format(stem_save_path, format, '320k')
88
+
89
+
90
+ # /Users/taoluo/Downloads/test/kimk_audio_MDX23C-8KFFT-InstVoc_HQ_(Instrumental).WAV
91
+ #
92
+ if __name__ == '__main__':
93
+ audio_file = '/Users/taoluo/Downloads/assets/audio/kimk_audio.mp3'
94
+ audio_file = sys.argv[1]
95
+ if not os.path.isfile(audio_file):
96
+ raise FileNotFoundError(f"File {audio_file} not exist.")
97
+
98
+ output_dir = os.path.dirname(audio_file)
99
+ print(output_dir)
100
+ run_ensemble_models(audio_file, output_dir)