Spaces:
Sleeping
Sleeping
print debug
Browse files- src/main.py +25 -2
src/main.py
CHANGED
@@ -38,7 +38,7 @@ def get_youtube_video_id(url, ignore_playlist=True):
|
|
38 |
http://www.youtube.com/embed/SA2iWivDJiE
|
39 |
http://www.youtube.com/v/SA2iWivDJiE?version=3&hl=en_US
|
40 |
"""
|
41 |
-
query = urlparse(url)
|
42 |
if query.hostname == 'youtu.be':
|
43 |
if query.path[1:] == 'watch':
|
44 |
return query.query[2:]
|
@@ -63,6 +63,7 @@ def get_youtube_video_id(url, ignore_playlist=True):
|
|
63 |
|
64 |
|
65 |
def yt_download(link):
|
|
|
66 |
ydl_opts = {
|
67 |
'format': 'bestaudio',
|
68 |
'outtmpl': '%(title)s',
|
@@ -77,6 +78,7 @@ def yt_download(link):
|
|
77 |
result = ydl.extract_info(link, download=True)
|
78 |
download_path = ydl.prepare_filename(result, outtmpl='%(title)s.mp3')
|
79 |
|
|
|
80 |
return download_path
|
81 |
|
82 |
|
@@ -88,6 +90,7 @@ def raise_exception(error_msg, is_webui):
|
|
88 |
|
89 |
|
90 |
def get_rvc_model(voice_model, is_webui):
|
|
|
91 |
rvc_model_filename, rvc_index_filename = None, None
|
92 |
model_dir = os.path.join(rvc_models_dir, voice_model)
|
93 |
for file in os.listdir(model_dir):
|
@@ -101,10 +104,12 @@ def get_rvc_model(voice_model, is_webui):
|
|
101 |
error_msg = f'No model file exists in {model_dir}.'
|
102 |
raise_exception(error_msg, is_webui)
|
103 |
|
|
|
104 |
return os.path.join(model_dir, rvc_model_filename), os.path.join(model_dir, rvc_index_filename) if rvc_index_filename else ''
|
105 |
|
106 |
|
107 |
def get_audio_paths(song_dir):
|
|
|
108 |
orig_song_path = None
|
109 |
instrumentals_path = None
|
110 |
main_vocals_dereverb_path = None
|
@@ -121,10 +126,12 @@ def get_audio_paths(song_dir):
|
|
121 |
elif file.endswith('_Vocals_Backup.wav'):
|
122 |
backup_vocals_path = os.path.join(song_dir, file)
|
123 |
|
|
|
124 |
return orig_song_path, instrumentals_path, main_vocals_dereverb_path, backup_vocals_path
|
125 |
|
126 |
|
127 |
def convert_to_stereo(audio_path):
|
|
|
128 |
wave, sr = librosa.load(audio_path, mono=False, sr=44100)
|
129 |
|
130 |
# check if mono
|
@@ -132,12 +139,15 @@ def convert_to_stereo(audio_path):
|
|
132 |
stereo_path = f'{os.path.splitext(audio_path)[0]}_stereo.wav'
|
133 |
command = shlex.split(f'ffmpeg -y -loglevel error -i "{audio_path}" -ac 2 -f wav "{stereo_path}"')
|
134 |
subprocess.run(command)
|
|
|
135 |
return stereo_path
|
136 |
else:
|
|
|
137 |
return audio_path
|
138 |
|
139 |
|
140 |
def pitch_shift(audio_path, pitch_change):
|
|
|
141 |
output_path = f'{os.path.splitext(audio_path)[0]}_p{pitch_change}.wav'
|
142 |
if not os.path.exists(output_path):
|
143 |
y, sr = sf.read(audio_path)
|
@@ -146,16 +156,20 @@ def pitch_shift(audio_path, pitch_change):
|
|
146 |
y_shifted = tfm.build_array(input_array=y, sample_rate_in=sr)
|
147 |
sf.write(output_path, y_shifted, sr)
|
148 |
|
|
|
149 |
return output_path
|
150 |
|
151 |
|
152 |
def get_hash(filepath):
|
|
|
153 |
with open(filepath, 'rb') as f:
|
154 |
file_hash = hashlib.blake2b()
|
155 |
while chunk := f.read(8192):
|
156 |
file_hash.update(chunk)
|
157 |
|
158 |
-
|
|
|
|
|
159 |
|
160 |
|
161 |
def display_progress(message, percent, is_webui, progress=None):
|
@@ -166,6 +180,7 @@ def display_progress(message, percent, is_webui, progress=None):
|
|
166 |
|
167 |
|
168 |
def preprocess_song(song_input, mdx_model_params, song_id, is_webui, input_type, progress=None):
|
|
|
169 |
keep_orig = False
|
170 |
if input_type == 'yt':
|
171 |
display_progress('[~] Downloading song...', 0, is_webui, progress)
|
@@ -189,10 +204,12 @@ def preprocess_song(song_input, mdx_model_params, song_id, is_webui, input_type,
|
|
189 |
display_progress('[~] Applying DeReverb to Vocals...', 0.3, is_webui, progress)
|
190 |
_, main_vocals_dereverb_path = run_mdx(mdx_model_params, song_output_dir, os.path.join(mdxnet_models_dir, 'Reverb_HQ_By_FoxJoy.onnx'), main_vocals_path, invert_suffix='DeReverb', exclude_main=True, denoise=True)
|
191 |
|
|
|
192 |
return orig_song_path, vocals_path, instrumentals_path, main_vocals_path, backup_vocals_path, main_vocals_dereverb_path
|
193 |
|
194 |
|
195 |
def voice_change(voice_model, vocals_path, output_path, pitch_change, f0_method, index_rate, filter_radius, rms_mix_rate, protect, crepe_hop_length, is_webui):
|
|
|
196 |
rvc_model_path, rvc_index_path = get_rvc_model(voice_model, is_webui)
|
197 |
device = 'cuda:0'
|
198 |
config = Config(device, True)
|
@@ -203,9 +220,11 @@ def voice_change(voice_model, vocals_path, output_path, pitch_change, f0_method,
|
|
203 |
rvc_infer(rvc_index_path, index_rate, vocals_path, output_path, pitch_change, f0_method, cpt, version, net_g, filter_radius, tgt_sr, rms_mix_rate, protect, crepe_hop_length, vc, hubert_model)
|
204 |
del hubert_model, cpt
|
205 |
gc.collect()
|
|
|
206 |
|
207 |
|
208 |
def add_audio_effects(audio_path, reverb_rm_size, reverb_wet, reverb_dry, reverb_damping):
|
|
|
209 |
output_path = f'{os.path.splitext(audio_path)[0]}_mixed.wav'
|
210 |
|
211 |
# Initialize audio effects plugins
|
@@ -225,14 +244,17 @@ def add_audio_effects(audio_path, reverb_rm_size, reverb_wet, reverb_dry, reverb
|
|
225 |
effected = board(chunk, f.samplerate, reset=False)
|
226 |
o.write(effected)
|
227 |
|
|
|
228 |
return output_path
|
229 |
|
230 |
|
231 |
def combine_audio(audio_paths, output_path, main_gain, backup_gain, inst_gain, output_format):
|
|
|
232 |
main_vocal_audio = AudioSegment.from_wav(audio_paths[0]) - 4 + main_gain
|
233 |
backup_vocal_audio = AudioSegment.from_wav(audio_paths[1]) - 6 + backup_gain
|
234 |
instrumental_audio = AudioSegment.from_wav(audio_paths[2]) - 7 + inst_gain
|
235 |
main_vocal_audio.overlay(backup_vocal_audio).overlay(instrumental_audio).export(output_path, format=output_format)
|
|
|
236 |
|
237 |
@spaces.GPU
|
238 |
def song_cover_pipeline(song_input, voice_model, pitch_change, keep_files,
|
@@ -241,6 +263,7 @@ def song_cover_pipeline(song_input, voice_model, pitch_change, keep_files,
|
|
241 |
reverb_rm_size=0.15, reverb_wet=0.2, reverb_dry=0.8, reverb_damping=0.7, output_format='mp3',
|
242 |
progress=gr.Progress()):
|
243 |
try:
|
|
|
244 |
if not song_input or not voice_model:
|
245 |
raise_exception('Ensure that the song input field and voice model field is filled.', is_webui)
|
246 |
|
|
|
38 |
http://www.youtube.com/embed/SA2iWivDJiE
|
39 |
http://www.youtube.com/v/SA2iWivDJiE?version=3&hl=en_US
|
40 |
"""
|
41 |
+
query = urlparse(url, allow_fragments=True)
|
42 |
if query.hostname == 'youtu.be':
|
43 |
if query.path[1:] == 'watch':
|
44 |
return query.query[2:]
|
|
|
63 |
|
64 |
|
65 |
def yt_download(link):
|
66 |
+
print("[~] Downloading YouTube audio...")
|
67 |
ydl_opts = {
|
68 |
'format': 'bestaudio',
|
69 |
'outtmpl': '%(title)s',
|
|
|
78 |
result = ydl.extract_info(link, download=True)
|
79 |
download_path = ydl.prepare_filename(result, outtmpl='%(title)s.mp3')
|
80 |
|
81 |
+
print(f"[+] YouTube audio downloaded: {download_path}")
|
82 |
return download_path
|
83 |
|
84 |
|
|
|
90 |
|
91 |
|
92 |
def get_rvc_model(voice_model, is_webui):
|
93 |
+
print(f"[~] Getting RVC model: {voice_model}")
|
94 |
rvc_model_filename, rvc_index_filename = None, None
|
95 |
model_dir = os.path.join(rvc_models_dir, voice_model)
|
96 |
for file in os.listdir(model_dir):
|
|
|
104 |
error_msg = f'No model file exists in {model_dir}.'
|
105 |
raise_exception(error_msg, is_webui)
|
106 |
|
107 |
+
print(f"[+] RVC model found: {rvc_model_filename}")
|
108 |
return os.path.join(model_dir, rvc_model_filename), os.path.join(model_dir, rvc_index_filename) if rvc_index_filename else ''
|
109 |
|
110 |
|
111 |
def get_audio_paths(song_dir):
|
112 |
+
print(f"[~] Getting audio paths from: {song_dir}")
|
113 |
orig_song_path = None
|
114 |
instrumentals_path = None
|
115 |
main_vocals_dereverb_path = None
|
|
|
126 |
elif file.endswith('_Vocals_Backup.wav'):
|
127 |
backup_vocals_path = os.path.join(song_dir, file)
|
128 |
|
129 |
+
print(f"[+] Audio paths found: {orig_song_path}, {instrumentals_path}, {main_vocals_dereverb_path}, {backup_vocals_path}")
|
130 |
return orig_song_path, instrumentals_path, main_vocals_dereverb_path, backup_vocals_path
|
131 |
|
132 |
|
133 |
def convert_to_stereo(audio_path):
|
134 |
+
print(f"[~] Converting to stereo: {audio_path}")
|
135 |
wave, sr = librosa.load(audio_path, mono=False, sr=44100)
|
136 |
|
137 |
# check if mono
|
|
|
139 |
stereo_path = f'{os.path.splitext(audio_path)[0]}_stereo.wav'
|
140 |
command = shlex.split(f'ffmpeg -y -loglevel error -i "{audio_path}" -ac 2 -f wav "{stereo_path}"')
|
141 |
subprocess.run(command)
|
142 |
+
print(f"[+] Converted to stereo: {stereo_path}")
|
143 |
return stereo_path
|
144 |
else:
|
145 |
+
print("[+] Audio already in stereo")
|
146 |
return audio_path
|
147 |
|
148 |
|
149 |
def pitch_shift(audio_path, pitch_change):
|
150 |
+
print(f"[~] Pitch shifting: {audio_path} by {pitch_change}")
|
151 |
output_path = f'{os.path.splitext(audio_path)[0]}_p{pitch_change}.wav'
|
152 |
if not os.path.exists(output_path):
|
153 |
y, sr = sf.read(audio_path)
|
|
|
156 |
y_shifted = tfm.build_array(input_array=y, sample_rate_in=sr)
|
157 |
sf.write(output_path, y_shifted, sr)
|
158 |
|
159 |
+
print(f"[+] Pitch shifted audio saved: {output_path}")
|
160 |
return output_path
|
161 |
|
162 |
|
163 |
def get_hash(filepath):
|
164 |
+
print(f"[~] Generating hash for: {filepath}")
|
165 |
with open(filepath, 'rb') as f:
|
166 |
file_hash = hashlib.blake2b()
|
167 |
while chunk := f.read(8192):
|
168 |
file_hash.update(chunk)
|
169 |
|
170 |
+
hash_value = file_hash.hexdigest()[:11]
|
171 |
+
print(f"[+] Hash generated: {hash_value}")
|
172 |
+
return hash_value
|
173 |
|
174 |
|
175 |
def display_progress(message, percent, is_webui, progress=None):
|
|
|
180 |
|
181 |
|
182 |
def preprocess_song(song_input, mdx_model_params, song_id, is_webui, input_type, progress=None):
|
183 |
+
print("[~] Starting song preprocessing...")
|
184 |
keep_orig = False
|
185 |
if input_type == 'yt':
|
186 |
display_progress('[~] Downloading song...', 0, is_webui, progress)
|
|
|
204 |
display_progress('[~] Applying DeReverb to Vocals...', 0.3, is_webui, progress)
|
205 |
_, main_vocals_dereverb_path = run_mdx(mdx_model_params, song_output_dir, os.path.join(mdxnet_models_dir, 'Reverb_HQ_By_FoxJoy.onnx'), main_vocals_path, invert_suffix='DeReverb', exclude_main=True, denoise=True)
|
206 |
|
207 |
+
print("[+] Song preprocessing completed")
|
208 |
return orig_song_path, vocals_path, instrumentals_path, main_vocals_path, backup_vocals_path, main_vocals_dereverb_path
|
209 |
|
210 |
|
211 |
def voice_change(voice_model, vocals_path, output_path, pitch_change, f0_method, index_rate, filter_radius, rms_mix_rate, protect, crepe_hop_length, is_webui):
|
212 |
+
print(f"[~] Starting voice change: {voice_model}")
|
213 |
rvc_model_path, rvc_index_path = get_rvc_model(voice_model, is_webui)
|
214 |
device = 'cuda:0'
|
215 |
config = Config(device, True)
|
|
|
220 |
rvc_infer(rvc_index_path, index_rate, vocals_path, output_path, pitch_change, f0_method, cpt, version, net_g, filter_radius, tgt_sr, rms_mix_rate, protect, crepe_hop_length, vc, hubert_model)
|
221 |
del hubert_model, cpt
|
222 |
gc.collect()
|
223 |
+
print(f"[+] Voice change completed: {output_path}")
|
224 |
|
225 |
|
226 |
def add_audio_effects(audio_path, reverb_rm_size, reverb_wet, reverb_dry, reverb_damping):
|
227 |
+
print(f"[~] Adding audio effects: {audio_path}")
|
228 |
output_path = f'{os.path.splitext(audio_path)[0]}_mixed.wav'
|
229 |
|
230 |
# Initialize audio effects plugins
|
|
|
244 |
effected = board(chunk, f.samplerate, reset=False)
|
245 |
o.write(effected)
|
246 |
|
247 |
+
print(f"[+] Audio effects added: {output_path}")
|
248 |
return output_path
|
249 |
|
250 |
|
251 |
def combine_audio(audio_paths, output_path, main_gain, backup_gain, inst_gain, output_format):
|
252 |
+
print(f"[~] Combining audio: {audio_paths}")
|
253 |
main_vocal_audio = AudioSegment.from_wav(audio_paths[0]) - 4 + main_gain
|
254 |
backup_vocal_audio = AudioSegment.from_wav(audio_paths[1]) - 6 + backup_gain
|
255 |
instrumental_audio = AudioSegment.from_wav(audio_paths[2]) - 7 + inst_gain
|
256 |
main_vocal_audio.overlay(backup_vocal_audio).overlay(instrumental_audio).export(output_path, format=output_format)
|
257 |
+
print(f"[+] Audio combined: {output_path}")
|
258 |
|
259 |
@spaces.GPU
|
260 |
def song_cover_pipeline(song_input, voice_model, pitch_change, keep_files,
|
|
|
263 |
reverb_rm_size=0.15, reverb_wet=0.2, reverb_dry=0.8, reverb_damping=0.7, output_format='mp3',
|
264 |
progress=gr.Progress()):
|
265 |
try:
|
266 |
+
print("[~] Starting AI Cover Generation Pipeline...")
|
267 |
if not song_input or not voice_model:
|
268 |
raise_exception('Ensure that the song input field and voice model field is filled.', is_webui)
|
269 |
|