|
import os |
|
|
|
import yaml |
|
from addict import Dict |
|
|
|
from ml_collections import ConfigDict |
|
|
|
from UVR import DENOISER_MODEL_PATH, DEVERBER_MODEL_PATH, MDX_C_CONFIG_PATH |
|
from gui_data.constants import DEMUCS_2_SOURCE, DEMUCS_2_SOURCE_MAPPER, VR_ARCH_TYPE, MDX_ARCH_TYPE, DEMUCS_ARCH_TYPE |
|
|
|
vr_cache_source_mapper = {} |
|
mdx_cache_source_mapper = {} |
|
demucs_cache_source_mapper = {} |
|
|
|
|
|
def cached_source_callback(process_method, model_name=None): |
|
model, sources = None, None |
|
|
|
if process_method == VR_ARCH_TYPE: |
|
mapper = vr_cache_source_mapper |
|
if process_method == MDX_ARCH_TYPE: |
|
mapper = mdx_cache_source_mapper |
|
if process_method == DEMUCS_ARCH_TYPE: |
|
mapper = demucs_cache_source_mapper |
|
|
|
for key, value in mapper.items(): |
|
if model_name in key: |
|
model = key |
|
sources = value |
|
|
|
return model, sources |
|
|
|
|
|
def cached_model_source_holder(self, process_method, sources, model_name=None): |
|
if process_method == VR_ARCH_TYPE: |
|
self.vr_cache_source_mapper = {**vr_cache_source_mapper, **{model_name: sources}} |
|
if process_method == MDX_ARCH_TYPE: |
|
self.mdx_cache_source_mapper = {**mdx_cache_source_mapper, **{model_name: sources}} |
|
if process_method == DEMUCS_ARCH_TYPE: |
|
self.demucs_cache_source_mapper = {**demucs_cache_source_mapper, **{model_name: sources}} |
|
|
|
|
|
config_path = os.path.join(MDX_C_CONFIG_PATH, 'model_2_stem_full_band_8k.yaml') |
|
with open(config_path) as f: |
|
config = ConfigDict(yaml.load(f, Loader=yaml.FullLoader)) |
|
|
|
mdx23c_8kfft_instvoc_hq_model_data = { |
|
'DENOISER_MODEL': DENOISER_MODEL_PATH, |
|
'DEVERBER_MODEL': DEVERBER_MODEL_PATH, |
|
'all_models': [], |
|
'bv_model_rebalance': 0, |
|
'chunks': 0, |
|
'compensate': None, |
|
'demucs_4_stem_added_count': 0, |
|
'demucs_source_list': [], |
|
'demucs_stem_count': 0, |
|
'demucs_stems': 'All Stems', |
|
'deverb_vocal_opt': 'Vocals', |
|
'device_set': 'Default', |
|
'ensemble_primary_stem': 'Vocals', |
|
'ensemble_secondary_stem': 'Instrumental', |
|
'is_4_stem_ensemble': False, |
|
'is_bv_model': False, |
|
'is_change_def': False, |
|
'is_demucs_4_stem_secondaries': False, |
|
'is_demucs_combine_stems': True, |
|
'is_demucs_pre_proc_model_inst_mix': False, |
|
'is_denoise': False, |
|
'is_denoise_model': False, |
|
'is_deverb_vocals': False, |
|
'is_dry_check': False, |
|
'is_ensemble_mode': True, |
|
'is_get_hash_dir_only': False, |
|
'is_gpu_conversion': 0, |
|
'is_inst_only_voc_splitter': False, |
|
'is_invert_spec': False, |
|
'is_karaoke': False, |
|
'is_match_frequency_pitch': True, |
|
'is_mdx_c': True, |
|
'is_mdx_c_seg_def': False, |
|
'is_mdx_ckpt': False, |
|
'is_mdx_combine_stems': True, |
|
'is_mixer_mode': False, |
|
'is_multi_stem_ensemble': False, |
|
'is_normalization': False, |
|
'is_pitch_change': False, |
|
'is_pre_proc_model': False, |
|
'is_primary_model_primary_stem_only': False, |
|
'is_primary_model_secondary_stem_only': False, |
|
'is_primary_stem_only': False, |
|
'is_save_inst_vocal_splitter': False, |
|
'is_save_vocal_only': False, |
|
'is_sec_bv_rebalance': False, |
|
'is_secondary_model': False, |
|
'is_secondary_model_activated': False, |
|
'is_secondary_stem_only': False, |
|
'is_use_opencl': False, |
|
'is_vocal_split_model': False, |
|
'is_vocal_split_model_activated': False, |
|
'is_vr_51_model': False, |
|
'manual_download_Button': None, |
|
'margin': 44100, |
|
'mdx_batch_size': 1, |
|
'mdx_c_configs': config, |
|
'mdx_dim_f_set': None, |
|
'mdx_dim_t_set': None, |
|
'mdx_model_stems': ['Vocals', 'Instrumental'], |
|
'mdx_n_fft_scale_set': None, |
|
'mdx_segment_size': 256, |
|
'mdx_stem_count': 2, |
|
'mdxnet_stem_select': 'Vocals', |
|
'mixer_path': '/Users/taoluo/Workspace/github/ultimatevocalremovergui/models/MDX_Net_Models/mixer_val.ckpt', |
|
'model_and_process_tag': 'MDX-Net: MDX23C-InstVoc HQ', |
|
'model_basename': 'MDX23C-8KFFT-InstVoc_HQ', |
|
'model_capacity': (32, 128), |
|
'model_data': {'config_yaml': 'model_2_stem_full_band_8k.yaml'}, |
|
'model_hash': '99b6ceaae542265a3b6d657bf9fde79f', |
|
'model_hash_dir': '/Users/taoluo/Workspace/github/ultimatevocalremovergui/models/MDX_Net_Models/model_data/99b6ceaae542265a3b6d657bf9fde79f.json', |
|
'model_name': 'MDX23C-InstVoc HQ', |
|
'model_path': '/Users/taoluo/Workspace/github/ultimatevocalremovergui/models/MDX_Net_Models/MDX23C-8KFFT-InstVoc_HQ.ckpt', |
|
'model_samplerate': 44100, |
|
'model_status': True, |
|
'mp3_bit_set': '320k', |
|
'overlap': 0.25, |
|
'overlap_mdx': 'Default', |
|
'overlap_mdx23': 8, |
|
'pre_proc_model': None, |
|
'pre_proc_model_activated': False, |
|
'primary_model_primary_stem': None, |
|
'primary_stem': 'Vocals', |
|
'primary_stem_native': None, |
|
'process_method': 'MDX-Net', |
|
'save_format': 'WAV', |
|
'secondary_model': None, |
|
'secondary_model_4_stem': [], |
|
'secondary_model_4_stem_model_names_list': [], |
|
'secondary_model_4_stem_names': [], |
|
'secondary_model_4_stem_scale': [], |
|
'secondary_model_bass': None, |
|
'secondary_model_drums': None, |
|
'secondary_model_other': None, |
|
'secondary_model_scale': None, |
|
'secondary_model_scale_bass': None, |
|
'secondary_model_scale_drums': None, |
|
'secondary_model_scale_other': None, |
|
'secondary_stem': 'Instrumental', |
|
'semitone_shift': 0.0, |
|
'vocal_split_model': None, |
|
'wav_type_set': 'PCM_16'} |
|
|
|
mdx23c_8kfft_instvoc_hq_process_data = { |
|
'model_name': 'MDX23C Model: MDX23C-InstVoc HQ', |
|
'model_data': Dict(mdx23c_8kfft_instvoc_hq_model_data), |
|
'export_path': '/Users/taoluo/Downloads/Ensembled_Outputs_1702201464', |
|
'audio_file_base': '1_test_audio_MDX23C-8KFFT-InstVoc_HQ', |
|
'audio_file': '/Users/taoluo/Downloads/test_audio.mp3', |
|
|
|
'set_progress_bar': lambda step, inference_iterations=0: print( |
|
f"iteration {inference_iterations} of step #{step}"), |
|
'write_to_console': lambda progress_text, base_text='': print( |
|
f"{progress_text} {base_text}"), |
|
'process_iteration': lambda iteration: iteration + 1, |
|
'cached_source_callback': cached_source_callback, |
|
'cached_model_source_holder': cached_model_source_holder, |
|
|
|
'list_all_models': ['MDX23C-8KFFT-InstVoc_HQ', 'UVR-MDX-NET-Voc_FT', |
|
'htdemucs_ft'], |
|
'is_ensemble_master': True, |
|
'is_4_stem_ensemble': False} |
|
|
|
uvr_mdx_net_voc_ft_model_data = { |
|
'DENOISER_MODEL': DENOISER_MODEL_PATH, |
|
'DEVERBER_MODEL': DEVERBER_MODEL_PATH, |
|
'all_models': [], |
|
'bv_model_rebalance': 0, |
|
'chunks': 0, |
|
'compensate': 1.021, |
|
'demucs_4_stem_added_count': 0, |
|
'demucs_source_list': [], |
|
'demucs_stem_count': 0, |
|
'demucs_stems': 'All Stems', |
|
'deverb_vocal_opt': 'Vocals', |
|
'device_set': 'Default', |
|
'ensemble_primary_stem': 'Vocals', |
|
'ensemble_secondary_stem': 'Instrumental', |
|
'is_4_stem_ensemble': False, |
|
'is_bv_model': False, |
|
'is_change_def': False, |
|
'is_demucs_4_stem_secondaries': False, |
|
'is_demucs_combine_stems': True, |
|
'is_demucs_pre_proc_model_inst_mix': False, |
|
'is_denoise': False, |
|
'is_denoise_model': False, |
|
'is_deverb_vocals': False, |
|
'is_dry_check': False, |
|
'is_ensemble_mode': True, |
|
'is_get_hash_dir_only': False, |
|
'is_gpu_conversion': 0, |
|
'is_inst_only_voc_splitter': False, |
|
'is_invert_spec': False, |
|
'is_karaoke': False, |
|
'is_match_frequency_pitch': True, |
|
'is_mdx_c': False, |
|
'is_mdx_c_seg_def': False, |
|
'is_mdx_ckpt': False, |
|
'is_mdx_combine_stems': True, |
|
'is_mixer_mode': False, |
|
'is_multi_stem_ensemble': False, |
|
'is_normalization': False, |
|
'is_pitch_change': False, |
|
'is_pre_proc_model': False, |
|
'is_primary_model_primary_stem_only': False, |
|
'is_primary_model_secondary_stem_only': False, |
|
'is_primary_stem_only': False, |
|
'is_save_inst_vocal_splitter': False, |
|
'is_save_vocal_only': False, |
|
'is_sec_bv_rebalance': False, |
|
'is_secondary_model': False, |
|
'is_secondary_model_activated': False, |
|
'is_secondary_stem_only': False, |
|
'is_use_opencl': False, |
|
'is_vocal_split_model': False, |
|
'is_vocal_split_model_activated': False, |
|
'is_vr_51_model': False, |
|
'manual_download_Button': None, |
|
'margin': 44100, |
|
'mdx_batch_size': 1, |
|
'window_size': 512, |
|
'mdx_c_configs': None, |
|
'mdx_dim_f_set': 3072, |
|
'mdx_dim_t_set': 8, |
|
'mdx_model_stems': [], |
|
'mdx_n_fft_scale_set': 7680, |
|
'mdx_segment_size': 256, |
|
'mdx_stem_count': 1, |
|
'mdxnet_stem_select': 'All Stems', |
|
'mixer_path': '/Users/taoluo/Workspace/github/ultimatevocalremovergui/models/MDX_Net_Models/mixer_val.ckpt', |
|
'model_and_process_tag': 'MDX-Net: UVR-MDX-NET-Voc_FT', |
|
'model_basename': 'UVR-MDX-NET-Voc_FT', |
|
'model_capacity': (32, 128), |
|
'model_data': {'compensate': 1.021, |
|
'mdx_dim_f_set': 3072, |
|
'mdx_dim_t_set': 8, |
|
'mdx_n_fft_scale_set': 7680, |
|
'primary_stem': 'Vocals'}, |
|
'model_hash': '77d07b2667ddf05b9e3175941b4454a0', |
|
'model_hash_dir': '/Users/taoluo/Workspace/github/ultimatevocalremovergui/models/MDX_Net_Models/model_data/77d07b2667ddf05b9e3175941b4454a0.json', |
|
'model_name': 'UVR-MDX-NET-Voc_FT', |
|
'model_path': '/Users/taoluo/Workspace/github/ultimatevocalremovergui/models/MDX_Net_Models/UVR-MDX-NET-Voc_FT.onnx', |
|
'model_samplerate': 44100, |
|
'model_status': True, |
|
'mp3_bit_set': '320k', |
|
'overlap': 0.25, |
|
'overlap_mdx': 'Default', |
|
'overlap_mdx23': 8, |
|
'pre_proc_model': None, |
|
'pre_proc_model_activated': False, |
|
'primary_model_primary_stem': None, |
|
'primary_stem': 'Vocals', |
|
'primary_stem_native': 'Vocals', |
|
'process_method': 'MDX-Net', |
|
'save_format': 'WAV', |
|
'secondary_model': None, |
|
'secondary_model_4_stem': [], |
|
'secondary_model_4_stem_model_names_list': [], |
|
'secondary_model_4_stem_names': [], |
|
'secondary_model_4_stem_scale': [], |
|
'secondary_model_bass': None, |
|
'secondary_model_drums': None, |
|
'secondary_model_other': None, |
|
'secondary_model_scale': None, |
|
'secondary_model_scale_bass': None, |
|
'secondary_model_scale_drums': None, |
|
'secondary_model_scale_other': None, |
|
'secondary_stem': 'Instrumental', |
|
'semitone_shift': 0.0, |
|
'vocal_split_model': None, |
|
'wav_type_set': 'PCM_16'} |
|
|
|
uvr_mdx_net_voc_ft_process_data = { |
|
'model_name': 'MDX-Net Model: UVR-MDX-NET Voc FT', |
|
'model_data': Dict(uvr_mdx_net_voc_ft_model_data), |
|
'export_path': '/Users/taoluo/Downloads/Ensembled_Outputs_1702201464', |
|
'audio_file_base': '1_test_audio_UVR-MDX-NET-Voc_FT', |
|
'audio_file': '/Users/taoluo/Downloads/test_audio.mp3', |
|
'set_progress_bar': lambda step, inference_iterations=0: print( |
|
f"iteration {inference_iterations} of step #{step}"), |
|
'write_to_console': lambda progress_text, base_text='base_text': print( |
|
f"{progress_text} {base_text}"), |
|
'process_iteration': lambda iteration: iteration + 1, |
|
'cached_source_callback': cached_source_callback, |
|
'cached_model_source_holder': cached_model_source_holder, |
|
'list_all_models': ['MDX23C-8KFFT-InstVoc_HQ', 'UVR-MDX-NET-Voc_FT', 'htdemucs_ft'], |
|
'is_ensemble_master': True, |
|
'is_4_stem_ensemble': False} |
|
|
|
htdemucs_ft_model_data = { |
|
'DENOISER_MODEL': DENOISER_MODEL_PATH, |
|
'DEVERBER_MODEL': DEVERBER_MODEL_PATH, |
|
'all_models': [], |
|
'bv_model_rebalance': 0, |
|
'chunks_demucs': 0, |
|
'compensate': None, |
|
'demucs_4_stem_added_count': 0, |
|
'demucs_source_list': DEMUCS_2_SOURCE, |
|
'demucs_source_map': DEMUCS_2_SOURCE_MAPPER, |
|
'demucs_stem_count': 2, |
|
'demucs_stems': 'All Stems', |
|
'demucs_version': 'v4', |
|
'deverb_vocal_opt': 'Vocals', |
|
'device_set': 'Default', |
|
'ensemble_primary_stem': 'Vocals', |
|
'ensemble_secondary_stem': 'Instrumental', |
|
'is_4_stem_ensemble': False, |
|
'is_bv_model': False, |
|
'is_change_def': False, |
|
'is_chunk_demucs': False, |
|
'is_demucs_4_stem_secondaries': False, |
|
'is_demucs_combine_stems': True, |
|
'is_demucs_pre_proc_model_inst_mix': False, |
|
'is_denoise': False, |
|
'is_denoise_model': False, |
|
'is_deverb_vocals': False, |
|
'is_dry_check': False, |
|
'is_ensemble_mode': True, |
|
'is_get_hash_dir_only': False, |
|
'is_gpu_conversion': 0, |
|
'is_inst_only_voc_splitter': False, |
|
'is_invert_spec': False, |
|
'is_karaoke': False, |
|
'is_match_frequency_pitch': True, |
|
'is_mdx_c': False, |
|
'is_mdx_c_seg_def': False, |
|
'is_mdx_ckpt': False, |
|
'is_mdx_combine_stems': True, |
|
'is_mixer_mode': False, |
|
'is_multi_stem_ensemble': False, |
|
'is_normalization': False, |
|
'is_pitch_change': False, |
|
'is_pre_proc_model': False, |
|
'is_primary_model_primary_stem_only': False, |
|
'is_primary_model_secondary_stem_only': False, |
|
'is_primary_stem_only': False, |
|
'is_save_inst_vocal_splitter': False, |
|
'is_save_vocal_only': False, |
|
'is_sec_bv_rebalance': False, |
|
'is_secondary_model': False, |
|
'is_secondary_model_activated': False, |
|
'is_secondary_stem_only': False, |
|
'is_split_mode': True, |
|
'is_use_opencl': False, |
|
'is_vocal_split_model': False, |
|
'is_vocal_split_model_activated': False, |
|
'is_vr_51_model': False, |
|
'manual_download_Button': None, |
|
'margin_demucs': 44100, |
|
'mdx_batch_size': 1, |
|
'mdx_c_configs': None, |
|
'mdx_dim_f_set': None, |
|
'mdx_dim_t_set': None, |
|
'mdx_model_stems': [], |
|
'mdx_n_fft_scale_set': None, |
|
'mdx_stem_count': 1, |
|
'mdxnet_stem_select': 'All Stems', |
|
'mixer_path': '/Users/taoluo/Workspace/github/ultimatevocalremovergui/lib_v5/mixer.ckpt', |
|
'model_and_process_tag': 'Demucs: v4 | htdemucs_ft', |
|
'model_basename': 'htdemucs_ft', |
|
'model_capacity': (32, 128), |
|
'model_hash_dir': None, |
|
'model_name': 'v4 | htdemucs_ft', |
|
'model_path': '/Users/taoluo/Workspace/github/ultimatevocalremovergui/models/Demucs_Models/v3_v4_repo/htdemucs_ft.yaml', |
|
'model_samplerate': 44100, |
|
'model_status': True, |
|
'mp3_bit_set': '320k', |
|
'overlap': 0.25, |
|
'overlap_mdx': 'Default', |
|
'overlap_mdx23': 8, |
|
'pre_proc_model': None, |
|
'pre_proc_model_activated': False, |
|
'primary_model_primary_stem': None, |
|
'primary_stem': None, |
|
'primary_stem_native': None, |
|
'process_method': 'Demucs', |
|
'save_format': 'WAV', |
|
'secondary_model': None, |
|
'secondary_model_4_stem': [], |
|
'secondary_model_4_stem_model_names_list': [], |
|
'secondary_model_4_stem_names': [], |
|
'secondary_model_4_stem_scale': [], |
|
'secondary_model_bass': None, |
|
'secondary_model_drums': None, |
|
'secondary_model_other': None, |
|
'secondary_model_scale': None, |
|
'secondary_model_scale_bass': None, |
|
'secondary_model_scale_drums': None, |
|
'secondary_model_scale_other': None, |
|
'secondary_stem': None, |
|
'segment': 'Default', |
|
'semitone_shift': 0.0, |
|
'shifts': 2, |
|
'vocal_split_model': None, |
|
'wav_type_set': 'PCM_16'} |
|
|
|
htdemucs_ft_process_data = { |
|
'model_name': 'Demucs v4: htdemucs_ft', |
|
'model_data': Dict(htdemucs_ft_model_data), |
|
'export_path': '/Users/taoluo/Downloads/Ensembled_Outputs_1702201464', |
|
'audio_file_base': '1_test_audio_htdemucs_ft', |
|
'audio_file': '/Users/taoluo/Downloads/test_audio.mp3', |
|
'set_progress_bar': lambda step, inference_iterations=0: print( |
|
f"iteration {inference_iterations} of step #{step}"), |
|
'write_to_console': lambda progress_text, base_text='base_text': print( |
|
f"{progress_text} {base_text}"), |
|
'process_iteration': lambda iteration: iteration + 1, |
|
'cached_source_callback': cached_source_callback, |
|
'cached_model_source_holder': cached_model_source_holder, |
|
'list_all_models': ['MDX23C-8KFFT-InstVoc_HQ', 'UVR-MDX-NET-Voc_FT', 'htdemucs_ft'], |
|
'is_ensemble_master': True, |
|
'is_4_stem_ensemble': False} |
|
|