File size: 6,093 Bytes
60c7461
 
 
 
 
8f14b67
60c7461
a090989
351457d
 
60c7461
 
 
 
 
 
 
 
 
 
 
 
 
 
9d26a29
 
 
 
 
60c7461
 
 
 
 
 
 
 
 
74a8d2d
60c7461
 
 
 
 
 
 
351457d
 
 
 
 
 
 
 
 
 
 
 
60c7461
 
 
 
 
351457d
 
60c7461
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8f14b67
 
218611c
60c7461
 
 
 
351457d
 
60c7461
 
 
 
 
 
a090989
 
 
 
 
9d26a29
 
 
 
a090989
 
 
 
 
60c7461
 
 
 
 
034c3ea
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
import os
import shutil
import sys
from datetime import datetime
from pathlib import Path
from time import sleep

import requests
from tqdm import tqdm

from args import mdx23c_8kfft_instvoc_hq_process_data, htdemucs_ft_process_data, uvr_mdx_net_voc_ft_process_data
from download import download_model, get_model_file
from gui_data.constants import VR_ARCH_TYPE, MDX_ARCH_TYPE, DEMUCS_ARCH_TYPE, ENSEMBLE_MODE, TIME_STRETCH, \
    MANUAL_ENSEMBLE, MATCH_INPUTS, ALIGN_INPUTS, ALL_STEMS, DEFAULT, VOCAL_STEM, MP3_BIT_RATES, WAV, DEMUCS_2_SOURCE, \
    DEMUCS_2_SOURCE_MAPPER, INST_STEM, CKPT, ONNX, MDX_POP_NFFT, secondary_stem, PRIMARY_STEM, SECONDARY_STEM
from lib_v5 import spec_utils
from separate import (
    SeperateDemucs, SeperateMDX, SeperateMDXC, SeperateVR,  # Model-related
    save_format, clear_gpu_cache,  # Utility functions
    cuda_available, mps_available,  # directml_available,
)


def run_ensemble_models(audio_path, export_path, format=WAV, clean=True):
    vocals_final_path = Path(export_path) / f"{Path(audio_path).stem}.vocal.{format.lower()}"
    instrumental_final_path = Path(export_path) / f"{Path(audio_path).stem}.instrumental.{format.lower()}"
    if os.path.isfile(instrumental_final_path) and os.path.isfile(vocals_final_path):
        return instrumental_final_path, vocals_final_path

    start = datetime.now()
    process_datas = [mdx23c_8kfft_instvoc_hq_process_data, uvr_mdx_net_voc_ft_process_data,
                     htdemucs_ft_process_data]
    # download models
    for process_data in process_datas:
        download_model(process_data['model_name'])

    # create folder
    os.makedirs(export_path, exist_ok=True)
    temp_export_path = os.path.join(export_path, 'uvr5_' + datetime.now().strftime("%Y-%m-%d_%H%M%S"))
    os.makedirs(temp_export_path, exist_ok=True)
    print(f'temp_export_path', temp_export_path)

    instrumental_export_paths = []
    vocals_export_paths = []

    for process_data in process_datas:
        progress_bar = tqdm(total=100, desc=process_data["model_name"], unit="%")

        def set_progress_bar(step, inference_iterations=0):
            # print(step, inference_iterations, round(inference_iterations * 100, 2))
            if inference_iterations > 0:
                progress_bar.update(round(inference_iterations * 100, 2) - progress_bar.n)

        def write_to_console(progress_text, base_text=''):
            text = f"{progress_text} {base_text}"
            if text.strip():
                return f'{text} @ process_data["model_name"]'

        current_model = process_data['model_data']
        audio_file_base = Path(audio_path).stem + '_' + current_model.model_basename
        process_data['export_path'] = temp_export_path
        process_data['audio_file_base'] = audio_file_base
        process_data['audio_file'] = audio_path
        process_data['set_progress_bar'] = set_progress_bar
        process_data['write_to_console'] = write_to_console

        if current_model.process_method == VR_ARCH_TYPE:
            seperator = SeperateVR(current_model, process_data)
        elif current_model.process_method == MDX_ARCH_TYPE:
            seperator = SeperateMDXC(current_model, process_data) if current_model.is_mdx_c else SeperateMDX(
                current_model, process_data)
        elif current_model.process_method == DEMUCS_ARCH_TYPE:
            seperator = SeperateDemucs(current_model, process_data, vocal_stem_path=(audio_path, audio_file_base))
        else:
            raise Exception(f'model not found')

        seperator.seperate()

        instrumental_path = Path(temp_export_path) / f"{audio_file_base}_(Instrumental).{format.lower()}"
        vocals_path = Path(temp_export_path) / f"{audio_file_base}_(Vocals).{format.lower()}"
        instrumental_export_paths.append(str(instrumental_path))
        vocals_export_paths.append(str(vocals_path))

    # merge each model outputs
    ensemble(vocals_export_paths, vocals_final_path)
    ensemble(instrumental_export_paths, instrumental_final_path)

    print(f'instrumental_final_path', instrumental_final_path)
    print(f'vocals_final_path', vocals_final_path)
    print(f'Finished in {datetime.now() - start}')
    if clean:
        sleep(10)
        shutil.rmtree(temp_export_path, ignore_errors=True)
    return instrumental_final_path, vocals_final_path


def ensemble(stem_outputs, stem_save_path, format=WAV):
    stem_save_path = str(stem_save_path)
    stem_outputs = [str(s) for s in stem_outputs]
    algorithm = 'Average'
    is_normalization = True
    spec_utils.ensemble_inputs(stem_outputs, algorithm, is_normalization, 'PCM_16', stem_save_path, is_wave=True)
    save_format(stem_save_path, format, '320k')


def uvr_job(song_id, platform='netease'):
    audio_dir = os.getcwd()
    audio_file = f'{song_id}.m4a' if platform == 'youtube' else f'{song_id}.mp3'
    audio_path = os.path.join(audio_dir, audio_file)

    if not os.path.isfile(audio_path):
        url = f"http://or.luotao.net/api/download_song?song_id={song_id}&platform={platform}"
        r = requests.get(url, allow_redirects=True)
        open(audio_path, 'wb').write(r.content)

    instrumental_path, vocals_path = run_ensemble_models(audio_file, audio_dir)
    return instrumental_path


# /Users/taoluo/Downloads/test/kimk_audio_MDX23C-8KFFT-InstVoc_HQ_(Instrumental).WAV
#
if __name__ == '__main__':
    audio_file = '/Users/taoluo/Downloads/assets/audio/kimk_audio.mp3'
    audio_file = sys.argv[1]
    platform = sys.argv[2] if len(sys.argv) > 2 else 'netease'

    # exist file
    if os.path.isfile(audio_file):
        output_dir = os.path.dirname(audio_file)
        instrumental_path, vocals_path = run_ensemble_models(audio_file, output_dir)
        print('instrumental_path: ', instrumental_path)
        sys.exit(0)

    # download from platform
    song_id = sys.argv[1]
    instrumental_path = uvr_job(song_id, platform)
    print('instrumental_path: ', instrumental_path)