import os import shutil import sys from datetime import datetime from pathlib import Path from time import sleep import requests from tqdm import tqdm from args import mdx23c_8kfft_instvoc_hq_process_data, htdemucs_ft_process_data, uvr_mdx_net_voc_ft_process_data from download import download_model, get_model_file from gui_data.constants import VR_ARCH_TYPE, MDX_ARCH_TYPE, DEMUCS_ARCH_TYPE, ENSEMBLE_MODE, TIME_STRETCH, \ MANUAL_ENSEMBLE, MATCH_INPUTS, ALIGN_INPUTS, ALL_STEMS, DEFAULT, VOCAL_STEM, MP3_BIT_RATES, WAV, DEMUCS_2_SOURCE, \ DEMUCS_2_SOURCE_MAPPER, INST_STEM, CKPT, ONNX, MDX_POP_NFFT, secondary_stem, PRIMARY_STEM, SECONDARY_STEM from lib_v5 import spec_utils from separate import ( SeperateDemucs, SeperateMDX, SeperateMDXC, SeperateVR, # Model-related save_format, clear_gpu_cache, # Utility functions cuda_available, mps_available, # directml_available, ) def run_ensemble_models(audio_path, export_path, format=WAV, clean=True): start = datetime.now() process_datas = [mdx23c_8kfft_instvoc_hq_process_data, uvr_mdx_net_voc_ft_process_data, htdemucs_ft_process_data] # download models for process_data in process_datas: download_model(process_data['model_name']) # create folder os.makedirs(export_path, exist_ok=True) temp_export_path = os.path.join(export_path, 'uvr5_' + datetime.now().strftime("%Y-%m-%d_%H%M%S")) os.makedirs(temp_export_path, exist_ok=True) print(f'temp_export_path', temp_export_path) instrumental_export_paths = [] vocals_export_paths = [] for process_data in process_datas: progress_bar = tqdm(total=100, desc=process_data["model_name"], unit="%") def set_progress_bar(step, inference_iterations=0): # print(step, inference_iterations, round(inference_iterations * 100, 2)) if inference_iterations > 0: progress_bar.update(round(inference_iterations * 100, 2) - progress_bar.n) def write_to_console(progress_text, base_text=''): text = f"{progress_text} {base_text}" if text.strip(): return f'{text} @ process_data["model_name"]' current_model = process_data['model_data'] audio_file_base = Path(audio_path).stem + '_' + current_model.model_basename process_data['export_path'] = temp_export_path process_data['audio_file_base'] = audio_file_base process_data['audio_file'] = audio_path process_data['set_progress_bar'] = set_progress_bar process_data['write_to_console'] = write_to_console if current_model.process_method == VR_ARCH_TYPE: seperator = SeperateVR(current_model, process_data) elif current_model.process_method == MDX_ARCH_TYPE: seperator = SeperateMDXC(current_model, process_data) if current_model.is_mdx_c else SeperateMDX( current_model, process_data) elif current_model.process_method == DEMUCS_ARCH_TYPE: seperator = SeperateDemucs(current_model, process_data, vocal_stem_path=(audio_path, audio_file_base)) else: raise Exception(f'model not found') seperator.seperate() instrumental_path = Path(temp_export_path) / f"{audio_file_base}_(Instrumental).{format.lower()}" vocals_path = Path(temp_export_path) / f"{audio_file_base}_(Vocals).{format.lower()}" instrumental_export_paths.append(str(instrumental_path)) vocals_export_paths.append(str(vocals_path)) # merge each model outputs vocals_final_path = Path(export_path) / f"{Path(audio_path).stem}.vocal.{format.lower()}" instrumental_final_path = Path(export_path) / f"{Path(audio_path).stem}.instrumental.{format.lower()}" ensemble(vocals_export_paths, vocals_final_path) ensemble(instrumental_export_paths, instrumental_final_path) print(f'instrumental_final_path', instrumental_final_path) print(f'vocals_final_path', vocals_final_path) print(f'Finished in {datetime.now() - start}') if clean: sleep(10) shutil.rmtree(temp_export_path, ignore_errors=True) return instrumental_final_path, vocals_final_path def ensemble(stem_outputs, stem_save_path, format=WAV): stem_save_path = str(stem_save_path) stem_outputs = [str(s) for s in stem_outputs] algorithm = 'Average' is_normalization = True spec_utils.ensemble_inputs(stem_outputs, algorithm, is_normalization, 'PCM_16', stem_save_path, is_wave=True) save_format(stem_save_path, format, '320k') def uvr_job(song_id, platform='netease'): audio_dir = os.getcwd() audio_file = f'{song_id}.m4a' if platform == 'youtube' else f'{song_id}.mp3' audio_path = os.path.join(audio_dir, audio_file) url = f"http://or.luotao.net/api/download_song?song_id={song_id}&platform={platform}" r = requests.get(url, allow_redirects=True) open(audio_path, 'wb').write(r.content) instrumental_path, vocals_path = run_ensemble_models(audio_file, audio_dir) return instrumental_path # /Users/taoluo/Downloads/test/kimk_audio_MDX23C-8KFFT-InstVoc_HQ_(Instrumental).WAV # if __name__ == '__main__': audio_file = '/Users/taoluo/Downloads/assets/audio/kimk_audio.mp3' audio_file = sys.argv[1] if not os.path.isfile(audio_file): raise FileNotFoundError(f"File {audio_file} not exist.") output_dir = os.path.dirname(audio_file) print(output_dir) run_ensemble_models(audio_file, output_dir)