uvr5 / args.py
lorneluo's picture
add inst.py
60c7461
raw
history blame
15.7 kB
import os
import yaml
from addict import Dict
from ml_collections import ConfigDict
from UVR import DENOISER_MODEL_PATH, DEVERBER_MODEL_PATH, MDX_C_CONFIG_PATH
from gui_data.constants import DEMUCS_2_SOURCE, DEMUCS_2_SOURCE_MAPPER, VR_ARCH_TYPE, MDX_ARCH_TYPE, DEMUCS_ARCH_TYPE
vr_cache_source_mapper = {}
mdx_cache_source_mapper = {}
demucs_cache_source_mapper = {}
def cached_source_callback(process_method, model_name=None):
model, sources = None, None
if process_method == VR_ARCH_TYPE:
mapper = vr_cache_source_mapper
if process_method == MDX_ARCH_TYPE:
mapper = mdx_cache_source_mapper
if process_method == DEMUCS_ARCH_TYPE:
mapper = demucs_cache_source_mapper
for key, value in mapper.items():
if model_name in key:
model = key
sources = value
return model, sources
def cached_model_source_holder(self, process_method, sources, model_name=None):
if process_method == VR_ARCH_TYPE:
self.vr_cache_source_mapper = {**vr_cache_source_mapper, **{model_name: sources}}
if process_method == MDX_ARCH_TYPE:
self.mdx_cache_source_mapper = {**mdx_cache_source_mapper, **{model_name: sources}}
if process_method == DEMUCS_ARCH_TYPE:
self.demucs_cache_source_mapper = {**demucs_cache_source_mapper, **{model_name: sources}}
config_path = os.path.join(MDX_C_CONFIG_PATH, 'model_2_stem_full_band_8k.yaml')
with open(config_path) as f:
config = ConfigDict(yaml.load(f, Loader=yaml.FullLoader))
mdx23c_8kfft_instvoc_hq_model_data = {
'DENOISER_MODEL': DENOISER_MODEL_PATH,
'DEVERBER_MODEL': DEVERBER_MODEL_PATH,
'all_models': [],
'bv_model_rebalance': 0,
'chunks': 0,
'compensate': None,
'demucs_4_stem_added_count': 0,
'demucs_source_list': [],
'demucs_stem_count': 0,
'demucs_stems': 'All Stems',
'deverb_vocal_opt': 'Vocals',
'device_set': 'Default',
'ensemble_primary_stem': 'Vocals',
'ensemble_secondary_stem': 'Instrumental',
'is_4_stem_ensemble': False,
'is_bv_model': False,
'is_change_def': False,
'is_demucs_4_stem_secondaries': False,
'is_demucs_combine_stems': True,
'is_demucs_pre_proc_model_inst_mix': False,
'is_denoise': False,
'is_denoise_model': False,
'is_deverb_vocals': False,
'is_dry_check': False,
'is_ensemble_mode': True,
'is_get_hash_dir_only': False,
'is_gpu_conversion': 0,
'is_inst_only_voc_splitter': False,
'is_invert_spec': False,
'is_karaoke': False,
'is_match_frequency_pitch': True,
'is_mdx_c': True,
'is_mdx_c_seg_def': False,
'is_mdx_ckpt': False,
'is_mdx_combine_stems': True,
'is_mixer_mode': False,
'is_multi_stem_ensemble': False,
'is_normalization': False,
'is_pitch_change': False,
'is_pre_proc_model': False,
'is_primary_model_primary_stem_only': False,
'is_primary_model_secondary_stem_only': False,
'is_primary_stem_only': False,
'is_save_inst_vocal_splitter': False,
'is_save_vocal_only': False,
'is_sec_bv_rebalance': False,
'is_secondary_model': False,
'is_secondary_model_activated': False,
'is_secondary_stem_only': False,
'is_use_opencl': False,
'is_vocal_split_model': False,
'is_vocal_split_model_activated': False,
'is_vr_51_model': False,
'manual_download_Button': None,
'margin': 44100,
'mdx_batch_size': 1,
'mdx_c_configs': config,
'mdx_dim_f_set': None,
'mdx_dim_t_set': None,
'mdx_model_stems': ['Vocals', 'Instrumental'],
'mdx_n_fft_scale_set': None,
'mdx_segment_size': 256,
'mdx_stem_count': 2,
'mdxnet_stem_select': 'Vocals',
'mixer_path': '/Users/taoluo/Workspace/github/ultimatevocalremovergui/models/MDX_Net_Models/mixer_val.ckpt',
'model_and_process_tag': 'MDX-Net: MDX23C-InstVoc HQ',
'model_basename': 'MDX23C-8KFFT-InstVoc_HQ',
'model_capacity': (32, 128),
'model_data': {'config_yaml': 'model_2_stem_full_band_8k.yaml'},
'model_hash': '99b6ceaae542265a3b6d657bf9fde79f',
'model_hash_dir': '/Users/taoluo/Workspace/github/ultimatevocalremovergui/models/MDX_Net_Models/model_data/99b6ceaae542265a3b6d657bf9fde79f.json',
'model_name': 'MDX23C-InstVoc HQ',
'model_path': '/Users/taoluo/Workspace/github/ultimatevocalremovergui/models/MDX_Net_Models/MDX23C-8KFFT-InstVoc_HQ.ckpt',
'model_samplerate': 44100,
'model_status': True,
'mp3_bit_set': '320k',
'overlap': 0.25,
'overlap_mdx': 'Default',
'overlap_mdx23': 8,
'pre_proc_model': None,
'pre_proc_model_activated': False,
'primary_model_primary_stem': None,
'primary_stem': 'Vocals',
'primary_stem_native': None,
'process_method': 'MDX-Net',
'save_format': 'WAV',
'secondary_model': None,
'secondary_model_4_stem': [],
'secondary_model_4_stem_model_names_list': [],
'secondary_model_4_stem_names': [],
'secondary_model_4_stem_scale': [],
'secondary_model_bass': None,
'secondary_model_drums': None,
'secondary_model_other': None,
'secondary_model_scale': None,
'secondary_model_scale_bass': None,
'secondary_model_scale_drums': None,
'secondary_model_scale_other': None,
'secondary_stem': 'Instrumental',
'semitone_shift': 0.0,
'vocal_split_model': None,
'wav_type_set': 'PCM_16'}
mdx23c_8kfft_instvoc_hq_process_data = {
'model_name': 'MDX23C Model: MDX23C-InstVoc HQ',
'model_data': Dict(mdx23c_8kfft_instvoc_hq_model_data),
'export_path': '/Users/taoluo/Downloads/Ensembled_Outputs_1702201464',
'audio_file_base': '1_test_audio_MDX23C-8KFFT-InstVoc_HQ',
'audio_file': '/Users/taoluo/Downloads/test_audio.mp3',
'set_progress_bar': lambda step, inference_iterations=0: print(
f"iteration {inference_iterations} of step #{step}"),
'write_to_console': lambda progress_text, base_text='': print(
f"{progress_text} {base_text}"),
'process_iteration': lambda iteration: iteration + 1,
'cached_source_callback': cached_source_callback,
'cached_model_source_holder': cached_model_source_holder,
'list_all_models': ['MDX23C-8KFFT-InstVoc_HQ', 'UVR-MDX-NET-Voc_FT',
'htdemucs_ft'],
'is_ensemble_master': True,
'is_4_stem_ensemble': False}
uvr_mdx_net_voc_ft_model_data = {
'DENOISER_MODEL': DENOISER_MODEL_PATH,
'DEVERBER_MODEL': DEVERBER_MODEL_PATH,
'all_models': [],
'bv_model_rebalance': 0,
'chunks': 0,
'compensate': 1.021,
'demucs_4_stem_added_count': 0,
'demucs_source_list': [],
'demucs_stem_count': 0,
'demucs_stems': 'All Stems',
'deverb_vocal_opt': 'Vocals',
'device_set': 'Default',
'ensemble_primary_stem': 'Vocals',
'ensemble_secondary_stem': 'Instrumental',
'is_4_stem_ensemble': False,
'is_bv_model': False,
'is_change_def': False,
'is_demucs_4_stem_secondaries': False,
'is_demucs_combine_stems': True,
'is_demucs_pre_proc_model_inst_mix': False,
'is_denoise': False,
'is_denoise_model': False,
'is_deverb_vocals': False,
'is_dry_check': False,
'is_ensemble_mode': True,
'is_get_hash_dir_only': False,
'is_gpu_conversion': 0,
'is_inst_only_voc_splitter': False,
'is_invert_spec': False,
'is_karaoke': False,
'is_match_frequency_pitch': True,
'is_mdx_c': False,
'is_mdx_c_seg_def': False,
'is_mdx_ckpt': False,
'is_mdx_combine_stems': True,
'is_mixer_mode': False,
'is_multi_stem_ensemble': False,
'is_normalization': False,
'is_pitch_change': False,
'is_pre_proc_model': False,
'is_primary_model_primary_stem_only': False,
'is_primary_model_secondary_stem_only': False,
'is_primary_stem_only': False,
'is_save_inst_vocal_splitter': False,
'is_save_vocal_only': False,
'is_sec_bv_rebalance': False,
'is_secondary_model': False,
'is_secondary_model_activated': False,
'is_secondary_stem_only': False,
'is_use_opencl': False,
'is_vocal_split_model': False,
'is_vocal_split_model_activated': False,
'is_vr_51_model': False,
'manual_download_Button': None,
'margin': 44100,
'mdx_batch_size': 1,
'window_size': 512,
'mdx_c_configs': None,
'mdx_dim_f_set': 3072,
'mdx_dim_t_set': 8,
'mdx_model_stems': [],
'mdx_n_fft_scale_set': 7680,
'mdx_segment_size': 256,
'mdx_stem_count': 1,
'mdxnet_stem_select': 'All Stems',
'mixer_path': '/Users/taoluo/Workspace/github/ultimatevocalremovergui/models/MDX_Net_Models/mixer_val.ckpt',
'model_and_process_tag': 'MDX-Net: UVR-MDX-NET-Voc_FT',
'model_basename': 'UVR-MDX-NET-Voc_FT',
'model_capacity': (32, 128),
'model_data': {'compensate': 1.021,
'mdx_dim_f_set': 3072,
'mdx_dim_t_set': 8,
'mdx_n_fft_scale_set': 7680,
'primary_stem': 'Vocals'},
'model_hash': '77d07b2667ddf05b9e3175941b4454a0',
'model_hash_dir': '/Users/taoluo/Workspace/github/ultimatevocalremovergui/models/MDX_Net_Models/model_data/77d07b2667ddf05b9e3175941b4454a0.json',
'model_name': 'UVR-MDX-NET-Voc_FT',
'model_path': '/Users/taoluo/Workspace/github/ultimatevocalremovergui/models/MDX_Net_Models/UVR-MDX-NET-Voc_FT.onnx',
'model_samplerate': 44100,
'model_status': True,
'mp3_bit_set': '320k',
'overlap': 0.25,
'overlap_mdx': 'Default',
'overlap_mdx23': 8,
'pre_proc_model': None,
'pre_proc_model_activated': False,
'primary_model_primary_stem': None,
'primary_stem': 'Vocals',
'primary_stem_native': 'Vocals',
'process_method': 'MDX-Net',
'save_format': 'WAV',
'secondary_model': None,
'secondary_model_4_stem': [],
'secondary_model_4_stem_model_names_list': [],
'secondary_model_4_stem_names': [],
'secondary_model_4_stem_scale': [],
'secondary_model_bass': None,
'secondary_model_drums': None,
'secondary_model_other': None,
'secondary_model_scale': None,
'secondary_model_scale_bass': None,
'secondary_model_scale_drums': None,
'secondary_model_scale_other': None,
'secondary_stem': 'Instrumental',
'semitone_shift': 0.0,
'vocal_split_model': None,
'wav_type_set': 'PCM_16'}
uvr_mdx_net_voc_ft_process_data = {
'model_name': 'MDX-Net Model: UVR-MDX-NET Voc FT',
'model_data': Dict(uvr_mdx_net_voc_ft_model_data),
'export_path': '/Users/taoluo/Downloads/Ensembled_Outputs_1702201464',
'audio_file_base': '1_test_audio_UVR-MDX-NET-Voc_FT',
'audio_file': '/Users/taoluo/Downloads/test_audio.mp3',
'set_progress_bar': lambda step, inference_iterations=0: print(
f"iteration {inference_iterations} of step #{step}"),
'write_to_console': lambda progress_text, base_text='base_text': print(
f"{progress_text} {base_text}"),
'process_iteration': lambda iteration: iteration + 1,
'cached_source_callback': cached_source_callback,
'cached_model_source_holder': cached_model_source_holder,
'list_all_models': ['MDX23C-8KFFT-InstVoc_HQ', 'UVR-MDX-NET-Voc_FT', 'htdemucs_ft'],
'is_ensemble_master': True,
'is_4_stem_ensemble': False}
htdemucs_ft_model_data = {
'DENOISER_MODEL': DENOISER_MODEL_PATH,
'DEVERBER_MODEL': DEVERBER_MODEL_PATH,
'all_models': [],
'bv_model_rebalance': 0,
'chunks_demucs': 0,
'compensate': None,
'demucs_4_stem_added_count': 0,
'demucs_source_list': DEMUCS_2_SOURCE,
'demucs_source_map': DEMUCS_2_SOURCE_MAPPER,
'demucs_stem_count': 2,
'demucs_stems': 'All Stems',
'demucs_version': 'v4',
'deverb_vocal_opt': 'Vocals',
'device_set': 'Default',
'ensemble_primary_stem': 'Vocals',
'ensemble_secondary_stem': 'Instrumental',
'is_4_stem_ensemble': False,
'is_bv_model': False,
'is_change_def': False,
'is_chunk_demucs': False,
'is_demucs_4_stem_secondaries': False,
'is_demucs_combine_stems': True,
'is_demucs_pre_proc_model_inst_mix': False,
'is_denoise': False,
'is_denoise_model': False,
'is_deverb_vocals': False,
'is_dry_check': False,
'is_ensemble_mode': True,
'is_get_hash_dir_only': False,
'is_gpu_conversion': 0,
'is_inst_only_voc_splitter': False,
'is_invert_spec': False,
'is_karaoke': False,
'is_match_frequency_pitch': True,
'is_mdx_c': False,
'is_mdx_c_seg_def': False,
'is_mdx_ckpt': False,
'is_mdx_combine_stems': True,
'is_mixer_mode': False,
'is_multi_stem_ensemble': False,
'is_normalization': False,
'is_pitch_change': False,
'is_pre_proc_model': False,
'is_primary_model_primary_stem_only': False,
'is_primary_model_secondary_stem_only': False,
'is_primary_stem_only': False,
'is_save_inst_vocal_splitter': False,
'is_save_vocal_only': False,
'is_sec_bv_rebalance': False,
'is_secondary_model': False,
'is_secondary_model_activated': False,
'is_secondary_stem_only': False,
'is_split_mode': True,
'is_use_opencl': False,
'is_vocal_split_model': False,
'is_vocal_split_model_activated': False,
'is_vr_51_model': False,
'manual_download_Button': None,
'margin_demucs': 44100,
'mdx_batch_size': 1,
'mdx_c_configs': None,
'mdx_dim_f_set': None,
'mdx_dim_t_set': None,
'mdx_model_stems': [],
'mdx_n_fft_scale_set': None,
'mdx_stem_count': 1,
'mdxnet_stem_select': 'All Stems',
'mixer_path': '/Users/taoluo/Workspace/github/ultimatevocalremovergui/lib_v5/mixer.ckpt',
'model_and_process_tag': 'Demucs: v4 | htdemucs_ft',
'model_basename': 'htdemucs_ft',
'model_capacity': (32, 128),
'model_hash_dir': None,
'model_name': 'v4 | htdemucs_ft',
'model_path': '/Users/taoluo/Workspace/github/ultimatevocalremovergui/models/Demucs_Models/v3_v4_repo/htdemucs_ft.yaml',
'model_samplerate': 44100,
'model_status': True,
'mp3_bit_set': '320k',
'overlap': 0.25,
'overlap_mdx': 'Default',
'overlap_mdx23': 8,
'pre_proc_model': None,
'pre_proc_model_activated': False,
'primary_model_primary_stem': None,
'primary_stem': None,
'primary_stem_native': None,
'process_method': 'Demucs',
'save_format': 'WAV',
'secondary_model': None,
'secondary_model_4_stem': [],
'secondary_model_4_stem_model_names_list': [],
'secondary_model_4_stem_names': [],
'secondary_model_4_stem_scale': [],
'secondary_model_bass': None,
'secondary_model_drums': None,
'secondary_model_other': None,
'secondary_model_scale': None,
'secondary_model_scale_bass': None,
'secondary_model_scale_drums': None,
'secondary_model_scale_other': None,
'secondary_stem': None,
'segment': 'Default',
'semitone_shift': 0.0,
'shifts': 2,
'vocal_split_model': None,
'wav_type_set': 'PCM_16'}
htdemucs_ft_process_data = {
'model_name': 'Demucs v4: htdemucs_ft',
'model_data': Dict(htdemucs_ft_model_data),
'export_path': '/Users/taoluo/Downloads/Ensembled_Outputs_1702201464',
'audio_file_base': '1_test_audio_htdemucs_ft',
'audio_file': '/Users/taoluo/Downloads/test_audio.mp3',
'set_progress_bar': lambda step, inference_iterations=0: print(
f"iteration {inference_iterations} of step #{step}"),
'write_to_console': lambda progress_text, base_text='base_text': print(
f"{progress_text} {base_text}"),
'process_iteration': lambda iteration: iteration + 1,
'cached_source_callback': cached_source_callback,
'cached_model_source_holder': cached_model_source_holder,
'list_all_models': ['MDX23C-8KFFT-InstVoc_HQ', 'UVR-MDX-NET-Voc_FT', 'htdemucs_ft'],
'is_ensemble_master': True,
'is_4_stem_ensemble': False}