|
import os
|
|
import shutil
|
|
import sys
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
from time import sleep
|
|
|
|
from tqdm import tqdm
|
|
|
|
from args import mdx23c_8kfft_instvoc_hq_process_data, htdemucs_ft_process_data, uvr_mdx_net_voc_ft_process_data
|
|
from download import download_model, get_model_file
|
|
from gui_data.constants import VR_ARCH_TYPE, MDX_ARCH_TYPE, DEMUCS_ARCH_TYPE, ENSEMBLE_MODE, TIME_STRETCH, \
|
|
MANUAL_ENSEMBLE, MATCH_INPUTS, ALIGN_INPUTS, ALL_STEMS, DEFAULT, VOCAL_STEM, MP3_BIT_RATES, WAV, DEMUCS_2_SOURCE, \
|
|
DEMUCS_2_SOURCE_MAPPER, INST_STEM, CKPT, ONNX, MDX_POP_NFFT, secondary_stem, PRIMARY_STEM, SECONDARY_STEM
|
|
from lib_v5 import spec_utils
|
|
from separate import (
|
|
SeperateDemucs, SeperateMDX, SeperateMDXC, SeperateVR,
|
|
save_format, clear_gpu_cache,
|
|
cuda_available, mps_available,
|
|
)
|
|
|
|
|
|
def run_ensemble_models(audio_path, export_path, format=WAV, clean=True):
|
|
start = datetime.now()
|
|
process_datas = [mdx23c_8kfft_instvoc_hq_process_data, uvr_mdx_net_voc_ft_process_data,
|
|
htdemucs_ft_process_data]
|
|
|
|
for process_data in process_datas:
|
|
download_model(process_data['model_name'])
|
|
|
|
|
|
os.makedirs(export_path, exist_ok=True)
|
|
temp_export_path = os.path.join(export_path, 'uvr5_' + datetime.now().strftime("%Y-%m-%d_%H%M%S"))
|
|
os.makedirs(temp_export_path, exist_ok=True)
|
|
print(f'temp_export_path', temp_export_path)
|
|
|
|
instrumental_export_paths = []
|
|
vocals_export_paths = []
|
|
|
|
for process_data in process_datas:
|
|
progress_bar = tqdm(total=100, desc=process_data["model_name"], unit="%")
|
|
|
|
def set_progress_bar(step, inference_iterations=0):
|
|
|
|
if inference_iterations > 0:
|
|
progress_bar.update(round(inference_iterations * 100, 2) - progress_bar.n)
|
|
|
|
def write_to_console(progress_text, base_text=''):
|
|
text = f"{progress_text} {base_text}"
|
|
if text.strip():
|
|
return f'{text} @ process_data["model_name"]'
|
|
|
|
current_model = process_data['model_data']
|
|
audio_file_base = Path(audio_path).stem + '_' + current_model.model_basename
|
|
process_data['export_path'] = temp_export_path
|
|
process_data['audio_file_base'] = audio_file_base
|
|
process_data['audio_file'] = audio_path
|
|
process_data['set_progress_bar'] = set_progress_bar
|
|
process_data['write_to_console'] = write_to_console
|
|
|
|
if current_model.process_method == VR_ARCH_TYPE:
|
|
seperator = SeperateVR(current_model, process_data)
|
|
elif current_model.process_method == MDX_ARCH_TYPE:
|
|
seperator = SeperateMDXC(current_model, process_data) if current_model.is_mdx_c else SeperateMDX(
|
|
current_model, process_data)
|
|
elif current_model.process_method == DEMUCS_ARCH_TYPE:
|
|
seperator = SeperateDemucs(current_model, process_data, vocal_stem_path=(audio_path, audio_file_base))
|
|
else:
|
|
raise Exception(f'model not found')
|
|
|
|
seperator.seperate()
|
|
|
|
instrumental_path = Path(temp_export_path) / f"{audio_file_base}_(Instrumental).{format.lower()}"
|
|
vocals_path = Path(temp_export_path) / f"{audio_file_base}_(Vocals).{format.lower()}"
|
|
instrumental_export_paths.append(str(instrumental_path))
|
|
vocals_export_paths.append(str(vocals_path))
|
|
|
|
|
|
vocals_final_path = Path(export_path) / f"{Path(audio_path).stem}.vocal.{format.lower()}"
|
|
instrumental_final_path = Path(export_path) / f"{Path(audio_path).stem}.instrumental.{format.lower()}"
|
|
|
|
ensemble(vocals_export_paths, vocals_final_path)
|
|
ensemble(instrumental_export_paths, instrumental_final_path)
|
|
|
|
print(f'instrumental_final_path', instrumental_final_path)
|
|
print(f'vocals_final_path', vocals_final_path)
|
|
print(f'Finished in {datetime.now() - start}')
|
|
if clean:
|
|
sleep(10)
|
|
shutil.rmtree(temp_export_path, ignore_errors=True)
|
|
return instrumental_final_path, vocals_final_path
|
|
|
|
|
|
def ensemble(stem_outputs, stem_save_path, format=WAV):
|
|
stem_save_path = str(stem_save_path)
|
|
stem_outputs = [str(s) for s in stem_outputs]
|
|
algorithm = 'Average'
|
|
is_normalization = True
|
|
spec_utils.ensemble_inputs(stem_outputs, algorithm, is_normalization, 'PCM_16', stem_save_path, is_wave=True)
|
|
save_format(stem_save_path, format, '320k')
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
audio_file = '/Users/taoluo/Downloads/assets/audio/kimk_audio.mp3'
|
|
audio_file = sys.argv[1]
|
|
if not os.path.isfile(audio_file):
|
|
raise FileNotFoundError(f"File {audio_file} not exist.")
|
|
|
|
output_dir = os.path.dirname(audio_file)
|
|
print(output_dir)
|
|
run_ensemble_models(audio_file, output_dir)
|
|
|