Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -9,6 +9,7 @@ import torch
|
|
9 |
from demucs import pretrained
|
10 |
from demucs.apply import apply_model
|
11 |
import torchaudio
|
|
|
12 |
import matplotlib.pyplot as plt
|
13 |
from io import BytesIO
|
14 |
from PIL import Image
|
@@ -18,8 +19,8 @@ import librosa
|
|
18 |
import warnings
|
19 |
from faster_whisper import WhisperModel
|
20 |
from TTS.api import TTS
|
21 |
-
import pickle
|
22 |
import base64
|
|
|
23 |
|
24 |
# Suppress warnings
|
25 |
warnings.filterwarnings("ignore")
|
@@ -156,39 +157,47 @@ def auto_eq(audio, genre="Pop"):
|
|
156 |
|
157 |
return array_to_audiosegment(samples.astype(np.int16), sr, channels=audio.channels)
|
158 |
|
159 |
-
# ===
|
160 |
-
def
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
174 |
return out_path
|
175 |
|
176 |
-
# ===
|
177 |
-
def
|
178 |
-
|
|
|
|
|
|
|
|
|
|
|
179 |
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
elif saturation_type == "Console":
|
185 |
-
saturated = np.clip(samples, -32768, 32768) * intensity
|
186 |
-
elif saturation_type == "Mix Bus":
|
187 |
-
saturated = np.log1p(np.abs(samples)) * np.sign(samples) * intensity
|
188 |
-
else:
|
189 |
-
saturated = samples
|
190 |
|
191 |
-
return
|
192 |
|
193 |
# === Process Audio Function ===
|
194 |
def process_audio(audio_file, selected_effects, isolate_vocals, preset_name, export_format):
|
@@ -246,7 +255,7 @@ def process_audio(audio_file, selected_effects, isolate_vocals, preset_name, exp
|
|
246 |
status = f"β Error: {str(e)}"
|
247 |
return None, None, status, "", status
|
248 |
|
249 |
-
# === Waveform
|
250 |
def show_waveform(audio_file):
|
251 |
try:
|
252 |
audio = AudioSegment.from_file(audio_file)
|
@@ -301,25 +310,60 @@ preset_choices = {
|
|
301 |
|
302 |
preset_names = list(preset_choices.keys())
|
303 |
|
304 |
-
# ===
|
305 |
-
|
306 |
-
|
307 |
-
|
308 |
-
|
309 |
-
|
310 |
-
|
311 |
-
|
312 |
-
|
313 |
-
|
314 |
-
|
315 |
-
|
316 |
-
|
317 |
-
|
318 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
319 |
|
320 |
# === Main UI ===
|
321 |
with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
|
322 |
-
gr.HTML(
|
323 |
gr.Markdown("### Upload, edit, export β powered by AI!")
|
324 |
|
325 |
with gr.Tab("π΅ Single File Studio"):
|
@@ -344,6 +388,23 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
|
|
344 |
output_audio, waveform_img, session_log_out, genre_out, status_box
|
345 |
])
|
346 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
347 |
# --- AI Mastering Chain Tab ===
|
348 |
with gr.Tab("π§ AI Mastering Chain"):
|
349 |
gr.Interface(
|
@@ -376,9 +437,19 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
|
|
376 |
# --- Preset Cards Gallery ===
|
377 |
with gr.Tab("π Preset Gallery"):
|
378 |
gr.Markdown("### Select a preset visually")
|
379 |
-
preset_gallery = gr.Gallery(value=
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
380 |
preset_name_out = gr.Dropdown(choices=preset_names, label="Selected Preset")
|
381 |
-
preset_effects_out = gr.CheckboxGroup(choices=list(preset_choices
|
382 |
|
383 |
def load_preset_by_card(evt: gr.SelectData):
|
384 |
index = evt.index % len(preset_names)
|
@@ -397,23 +468,6 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
|
|
397 |
description="Enhance vocals with doubling or harmony"
|
398 |
)
|
399 |
|
400 |
-
# --- Remix Mode ---
|
401 |
-
with gr.Tab("π Remix Mode"):
|
402 |
-
gr.Interface(
|
403 |
-
fn=stem_split,
|
404 |
-
inputs=gr.Audio(label="Upload Music Track", type="filepath"),
|
405 |
-
outputs=[
|
406 |
-
gr.File(label="Vocals"),
|
407 |
-
gr.File(label="Drums"),
|
408 |
-
gr.File(label="Bass"),
|
409 |
-
gr.File(label="Other")
|
410 |
-
],
|
411 |
-
title="Split Into Drums, Bass, Vocals, and More",
|
412 |
-
description="Use AI to separate musical elements like vocals, drums, and bass.",
|
413 |
-
flagging_mode="never",
|
414 |
-
clear_btn=None
|
415 |
-
)
|
416 |
-
|
417 |
# --- Batch Processing ---
|
418 |
with gr.Tab("π Batch Processing"):
|
419 |
gr.Interface(
|
@@ -450,23 +504,6 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
|
|
450 |
)
|
451 |
|
452 |
# --- Real-Time Spectrum Analyzer + Live EQ Preview ===
|
453 |
-
def visualize_spectrum(audio_path):
|
454 |
-
y, sr = torchaudio.load(audio_path)
|
455 |
-
y_np = y.numpy().flatten()
|
456 |
-
stft = librosa.stft(y_np)
|
457 |
-
db = librosa.amplitude_to_db(abs(stft))
|
458 |
-
|
459 |
-
plt.figure(figsize=(10, 4))
|
460 |
-
img = librosa.display.specshow(db, sr=sr, x_axis="time", y_axis="hz", cmap="magma")
|
461 |
-
plt.colorbar(img, format="%+2.0f dB")
|
462 |
-
plt.title("Frequency Spectrum")
|
463 |
-
plt.tight_layout()
|
464 |
-
buf = BytesIO()
|
465 |
-
plt.savefig(buf, format="png")
|
466 |
-
plt.close()
|
467 |
-
buf.seek(0)
|
468 |
-
return Image.open(buf)
|
469 |
-
|
470 |
with gr.Tab("π Frequency Spectrum"):
|
471 |
gr.Interface(
|
472 |
fn=visualize_spectrum,
|
@@ -490,6 +527,22 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
|
|
490 |
)
|
491 |
|
492 |
# --- Save/Load Mix Session (.aiproj) ===
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
493 |
with gr.Tab("π Save/Load Project"):
|
494 |
gr.Interface(
|
495 |
fn=save_project,
|
|
|
9 |
from demucs import pretrained
|
10 |
from demucs.apply import apply_model
|
11 |
import torchaudio
|
12 |
+
from pathlib import Path
|
13 |
import matplotlib.pyplot as plt
|
14 |
from io import BytesIO
|
15 |
from PIL import Image
|
|
|
19 |
import warnings
|
20 |
from faster_whisper import WhisperModel
|
21 |
from TTS.api import TTS
|
|
|
22 |
import base64
|
23 |
+
import pickle
|
24 |
|
25 |
# Suppress warnings
|
26 |
warnings.filterwarnings("ignore")
|
|
|
157 |
|
158 |
return array_to_audiosegment(samples.astype(np.int16), sr, channels=audio.channels)
|
159 |
|
160 |
+
# === Vocal Isolation Helpers ===
|
161 |
+
def load_track_local(path, sample_rate, channels=2):
|
162 |
+
sig, rate = torchaudio.load(path)
|
163 |
+
if rate != sample_rate:
|
164 |
+
sig = torchaudio.functional.resample(sig, rate, sample_rate)
|
165 |
+
if channels == 1:
|
166 |
+
sig = sig.mean(0)
|
167 |
+
return sig
|
168 |
+
|
169 |
+
def save_track(path, wav, sample_rate):
|
170 |
+
path = Path(path)
|
171 |
+
torchaudio.save(str(path), wav, sample_rate)
|
172 |
+
|
173 |
+
def apply_vocal_isolation(audio_path):
|
174 |
+
model = pretrained.get_model(name='htdemucs')
|
175 |
+
wav = load_track_local(audio_path, model.samplerate, channels=2)
|
176 |
+
ref = wav.mean(0)
|
177 |
+
wav -= ref[:, None]
|
178 |
+
sources = apply_model(model, wav[None])[0]
|
179 |
+
wav += ref[:, None]
|
180 |
+
|
181 |
+
vocal_track = sources[3].cpu()
|
182 |
+
out_path = os.path.join(tempfile.gettempdir(), "vocals.wav")
|
183 |
+
save_track(out_path, vocal_track, model.samplerate)
|
184 |
return out_path
|
185 |
|
186 |
+
# === Stem Splitting (Drums, Bass, Other, Vocals) β Now Defined! ===
|
187 |
+
def stem_split(audio_path):
|
188 |
+
model = pretrained.get_model(name='htdemucs')
|
189 |
+
wav = load_track_local(audio_path, model.samplerate, channels=2)
|
190 |
+
sources = apply_model(model, wav[None])[0]
|
191 |
+
|
192 |
+
output_dir = tempfile.mkdtemp()
|
193 |
+
stem_paths = []
|
194 |
|
195 |
+
for i, name in enumerate(['drums', 'bass', 'other', 'vocals']):
|
196 |
+
path = os.path.join(output_dir, f"{name}.wav")
|
197 |
+
save_track(path, sources[i].cpu(), model.samplerate)
|
198 |
+
stem_paths.append(gr.File(value=path))
|
|
|
|
|
|
|
|
|
|
|
|
|
199 |
|
200 |
+
return stem_paths
|
201 |
|
202 |
# === Process Audio Function ===
|
203 |
def process_audio(audio_file, selected_effects, isolate_vocals, preset_name, export_format):
|
|
|
255 |
status = f"β Error: {str(e)}"
|
256 |
return None, None, status, "", status
|
257 |
|
258 |
+
# === Visualize Waveform ===
|
259 |
def show_waveform(audio_file):
|
260 |
try:
|
261 |
audio = AudioSegment.from_file(audio_file)
|
|
|
310 |
|
311 |
preset_names = list(preset_choices.keys())
|
312 |
|
313 |
+
# === Batch Processing Function ===
|
314 |
+
def batch_process_audio(files, selected_effects, isolate_vocals, preset_name, export_format):
|
315 |
+
status = "π Loading files..."
|
316 |
+
try:
|
317 |
+
output_dir = tempfile.mkdtemp()
|
318 |
+
results = []
|
319 |
+
session_logs = []
|
320 |
+
|
321 |
+
for file in files:
|
322 |
+
processed_path, _, log, _, _ = process_audio(file.name, selected_effects, isolate_vocals, preset_name, export_format)
|
323 |
+
results.append(processed_path)
|
324 |
+
session_logs.append(log)
|
325 |
+
|
326 |
+
zip_path = os.path.join(output_dir, "batch_output.zip")
|
327 |
+
with zipfile.ZipFile(zip_path, 'w') as zipf:
|
328 |
+
for i, res in enumerate(results):
|
329 |
+
filename = f"processed_{i}.{export_format.lower()}"
|
330 |
+
zipf.write(res, filename)
|
331 |
+
zipf.writestr(f"session_info_{i}.json", session_logs[i])
|
332 |
+
|
333 |
+
return zip_path, "π¦ ZIP created successfully!"
|
334 |
+
|
335 |
+
except Exception as e:
|
336 |
+
return None, f"β Batch processing failed: {str(e)}"
|
337 |
+
|
338 |
+
# === Vocal Pitch Correction β Auto-Tune Style ===
|
339 |
+
def auto_tune_vocal(audio_path, target_key="C"):
|
340 |
+
try:
|
341 |
+
# Placeholder for real-time pitch detection
|
342 |
+
return apply_pitch_shift(AudioSegment.from_file(audio_path), 0.2)
|
343 |
+
except Exception as e:
|
344 |
+
return None
|
345 |
+
|
346 |
+
# === Real-Time Spectrum Analyzer + Live EQ Preview ===
|
347 |
+
def visualize_spectrum(audio_path):
|
348 |
+
y, sr = torchaudio.load(audio_path)
|
349 |
+
y_np = y.numpy().flatten()
|
350 |
+
stft = librosa.stft(y_np)
|
351 |
+
db = librosa.amplitude_to_db(abs(stft))
|
352 |
+
|
353 |
+
plt.figure(figsize=(10, 4))
|
354 |
+
img = librosa.display.specshow(db, sr=sr, x_axis="time", y_axis="hz", cmap="magma")
|
355 |
+
plt.colorbar(img, format="%+2.0f dB")
|
356 |
+
plt.title("Frequency Spectrum")
|
357 |
+
plt.tight_layout()
|
358 |
+
buf = BytesIO()
|
359 |
+
plt.savefig(buf, format="png")
|
360 |
+
plt.close()
|
361 |
+
buf.seek(0)
|
362 |
+
return Image.open(buf)
|
363 |
|
364 |
# === Main UI ===
|
365 |
with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
|
366 |
+
gr.HTML('<div class="studio-header"><img src="logo.png" width="400" /></div>')
|
367 |
gr.Markdown("### Upload, edit, export β powered by AI!")
|
368 |
|
369 |
with gr.Tab("π΅ Single File Studio"):
|
|
|
388 |
output_audio, waveform_img, session_log_out, genre_out, status_box
|
389 |
])
|
390 |
|
391 |
+
# --- Remix Mode ---
|
392 |
+
with gr.Tab("π Remix Mode"):
|
393 |
+
gr.Interface(
|
394 |
+
fn=stem_split,
|
395 |
+
inputs=gr.Audio(label="Upload Music Track", type="filepath"),
|
396 |
+
outputs=[
|
397 |
+
gr.File(label="Vocals"),
|
398 |
+
gr.File(label="Drums"),
|
399 |
+
gr.File(label="Bass"),
|
400 |
+
gr.File(label="Other")
|
401 |
+
],
|
402 |
+
title="Split Into Drums, Bass, Vocals, and More",
|
403 |
+
description="Use AI to separate musical elements like vocals, drums, and bass.",
|
404 |
+
flagging_mode="never",
|
405 |
+
clear_btn=None
|
406 |
+
)
|
407 |
+
|
408 |
# --- AI Mastering Chain Tab ===
|
409 |
with gr.Tab("π§ AI Mastering Chain"):
|
410 |
gr.Interface(
|
|
|
437 |
# --- Preset Cards Gallery ===
|
438 |
with gr.Tab("π Preset Gallery"):
|
439 |
gr.Markdown("### Select a preset visually")
|
440 |
+
preset_gallery = gr.Gallery(value=[
|
441 |
+
("images/pop_card.png", "Pop"),
|
442 |
+
("images/edm_card.png", "EDM"),
|
443 |
+
("images/rock_card.png", "Rock"),
|
444 |
+
("images/hiphop_card.png", "Hip-Hop"),
|
445 |
+
("images/acoustic_card.png", "Acoustic"),
|
446 |
+
("images/stage_mode_card.png", "Stage Mode"),
|
447 |
+
("images/vocal_distortion_card.png", "Vocal Distortion"),
|
448 |
+
("images/tube_saturation_card.png", "Tube Saturation")
|
449 |
+
], label="Preset Cards", columns=4, height="auto")
|
450 |
+
|
451 |
preset_name_out = gr.Dropdown(choices=preset_names, label="Selected Preset")
|
452 |
+
preset_effects_out = gr.CheckboxGroup(choices=list(preset_choices["Default"]), label="Effects")
|
453 |
|
454 |
def load_preset_by_card(evt: gr.SelectData):
|
455 |
index = evt.index % len(preset_names)
|
|
|
468 |
description="Enhance vocals with doubling or harmony"
|
469 |
)
|
470 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
471 |
# --- Batch Processing ---
|
472 |
with gr.Tab("π Batch Processing"):
|
473 |
gr.Interface(
|
|
|
504 |
)
|
505 |
|
506 |
# --- Real-Time Spectrum Analyzer + Live EQ Preview ===
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
507 |
with gr.Tab("π Frequency Spectrum"):
|
508 |
gr.Interface(
|
509 |
fn=visualize_spectrum,
|
|
|
527 |
)
|
528 |
|
529 |
# --- Save/Load Mix Session (.aiproj) ===
|
530 |
+
def save_project(audio, preset, effects):
|
531 |
+
project_data = {
|
532 |
+
"audio": AudioSegment.from_file(audio).raw_data,
|
533 |
+
"preset": preset,
|
534 |
+
"effects": effects
|
535 |
+
}
|
536 |
+
out_path = os.path.join(tempfile.gettempdir(), "project.aiproj")
|
537 |
+
with open(out_path, "wb") as f:
|
538 |
+
pickle.dump(project_data, f)
|
539 |
+
return out_path
|
540 |
+
|
541 |
+
def load_project(project_file):
|
542 |
+
with open(project_file.name, "rb") as f:
|
543 |
+
data = pickle.load(f)
|
544 |
+
return data["preset"], data["effects"]
|
545 |
+
|
546 |
with gr.Tab("π Save/Load Project"):
|
547 |
gr.Interface(
|
548 |
fn=save_project,
|