Spaces:

tee342
/

AudioMaster

Sleeping

App Files Files Community

tee342 commited on Jun 13

Commit

8a18fa3

verified ·

1 Parent(s): 325283b

Update app.py

Browse files

Files changed (1) hide show

app.py +121 -338

app.py CHANGED Viewed

@@ -5,12 +5,10 @@ import numpy as np
 import tempfile
 import os
 import noisereduce as nr
-import json
 import torch
 from demucs import pretrained
 from demucs.apply import apply_model
 import torchaudio
-from pathlib import Path
 import matplotlib.pyplot as plt
 from io import BytesIO
 from PIL import Image
@@ -19,9 +17,8 @@ import datetime
 import librosa
 import warnings
 from faster_whisper import WhisperModel
-from mutagen.mp3 import MP3
-from mutagen.id3 import ID3, TIT2, TPE1, TALB, TYER
 from TTS.api import TTS
 import pickle
 # Suppress warnings
@@ -58,17 +55,8 @@ def apply_reverb(audio):
 def apply_pitch_shift(audio, semitones=-2):
     new_frame_rate = int(audio.frame_rate * (2 ** (semitones / 12)))
     samples = np.array(audio.get_array_of_samples())
-    resampled = np.interp(
-        np.arange(0, len(samples), 2 ** (semitones / 12)),
-        np.arange(len(samples)),
-        samples
-    ).astype(np.int16)
-    return AudioSegment(
-        resampled.tobytes(),
-        frame_rate=new_frame_rate,
-        sample_width=audio.sample_width,
-        channels=audio.channels
-    )
 def apply_echo(audio, delay_ms=500, decay=0.5):
     echo = audio - 10
@@ -138,41 +126,17 @@ def match_loudness(audio_path, target_lufs=-14.0):
     adjusted.export(out_path, format="wav")
     return out_path
-# === AI Mastering Chain – Genre EQ + Loudness Match + Limiting ===
-def ai_mastering_chain(audio_path, genre="Pop", target_lufs=-14.0):
-    audio = AudioSegment.from_file(audio_path)
-    # Apply Genre EQ
-    eq_audio = auto_eq(audio, genre=genre)
-    # Convert to numpy for loudness
-    samples, sr = audiosegment_to_array(eq_audio)
-    # Apply loudness normalization
-    meter = pyln.Meter(sr)
-    loudness = meter.integrated_loudness(samples.astype(np.float64) / 32768.0)
-    gain_db = target_lufs - loudness
-    final_audio = eq_audio + gain_db
-    # Apply final limiting
-    final_audio = apply_limiter(final_audio)
-    out_path = os.path.join(tempfile.gettempdir(), "mastered_output.wav")
-    final_audio.export(out_path, format="wav")
-    return out_path
 # === Auto-EQ per Genre ===
 def auto_eq(audio, genre="Pop"):
     eq_map = {
-        "Pop": [(200, 500, -3), (2000, 4000, +4)],  # Cut muddiness, boost vocals
-        "EDM": [(60, 250, +6), (8000, 12000, +3)],  # Maximize bass & sparkle
-        "Rock": [(1000, 3000, +4), (7000, 10000, -3)],  # Punchy mids, reduce sibilance
-        "Hip-Hop": [(20, 100, +6), (7000, 10000, -4)],  # Deep lows, smooth highs
-        "Acoustic": [(100, 300, -3), (4000, 8000, +2)],  # Natural tone
-        "Metal": [(100, 500, -4), (2000, 5000, +6), (7000, 12000, -3)],  # Clear low-mids, crisp highs
-        "Trap": [(80, 120, +6), (3000, 6000, -4)],  # Sub-bass boost, cut harsh highs
-        "LoFi": [(20, 200, +3), (1000, 3000, -2)],  # Warmth, soft mids
-        "Default": []
     }
     from scipy.signal import butter, sosfilt
@@ -191,6 +155,23 @@ def auto_eq(audio, genre="Pop"):
     return array_to_audiosegment(samples.astype(np.int16), sr, channels=audio.channels)
 # === Harmonic Saturation / Exciter ===
 def harmonic_saturation(audio, saturation_type="Tube", intensity=0.2):
     samples = np.array(audio.get_array_of_samples()).astype(np.float32)
@@ -208,81 +189,6 @@ def harmonic_saturation(audio, saturation_type="Tube", intensity=0.2):
     return array_to_audiosegment(saturated.astype(np.int16), audio.frame_rate, channels=audio.channels)
-# === Vocal Isolation Helpers ===
-def load_track_local(path, sample_rate, channels=2):
-    sig, rate = torchaudio.load(path)
-    if rate != sample_rate:
-        sig = torchaudio.functional.resample(sig, rate, sample_rate)
-    if channels == 1:
-        sig = sig.mean(0)
-    return sig
-def save_track(path, wav, sample_rate):
-    path = Path(path)
-    torchaudio.save(str(path), wav, sample_rate)
-def apply_vocal_isolation(audio_path):
-    model = pretrained.get_model(name='htdemucs')
-    wav = load_track_local(audio_path, model.samplerate, channels=2)
-    ref = wav.mean(0)
-    wav -= ref[:, None]
-    sources = apply_model(model, wav[None])[0]
-    wav += ref[:, None]
-    vocal_track = sources[3].cpu()
-    out_path = os.path.join(tempfile.gettempdir(), "vocals.wav")
-    save_track(out_path, vocal_track, model.samplerate)
-    return out_path
-# === Stem Splitting (Drums, Bass, Other, Vocals) ===
-def stem_split(audio_path):
-    model = pretrained.get_model(name='htdemucs')
-    wav = load_track_local(audio_path, model.samplerate, channels=2)
-    sources = apply_model(model, wav[None])[0]
-    output_dir = tempfile.mkdtemp()
-    stem_paths = []
-    for i, name in enumerate(['drums', 'bass', 'other', 'vocals']):
-        path = os.path.join(output_dir, f"{name}.wav")
-        save_track(path, sources[i].cpu(), model.samplerate)
-        stem_paths.append(gr.File(value=path))
-    return stem_paths
-# === Save/Load Project File (.aiproj) ===
-def save_project(vocals, drums, bass, other, vol_vocals, vol_drums, vol_bass, vol_other):
-    project_data = {
-        "vocals": AudioSegment.from_file(vocals).raw_data,
-        "drums": AudioSegment.from_file(drums).raw_data,
-        "bass": AudioSegment.from_file(bass).raw_data,
-        "other": AudioSegment.from_file(other).raw_data,
-        "volumes": {
-            "vocals": vol_vocals,
-            "drums": vol_drums,
-            "bass": vol_bass,
-            "other": vol_other
-        }
-    }
-    out_path = os.path.join(tempfile.gettempdir(), "mix_session.aiproj")
-    with open(out_path, "wb") as f:
-        pickle.dump(project_data, f)
-    return out_path
-def load_project(project_file):
-    with open(project_file.name, "rb") as f:
-        data = pickle.load(f)
-    return (
-        array_to_audiosegment(data["vocals"], 44100),
-        array_to_audiosegment(data["drums"], 44100),
-        array_to_audiosegment(data["bass"], 44100),
-        array_to_audiosegment(data["other"], 44100),
-        data["volumes"]["vocals"],
-        data["volumes"]["drums"],
-        data["volumes"]["bass"],
-        data["volumes"]["other"]
-    )
 # === Process Audio Function ===
 def process_audio(audio_file, selected_effects, isolate_vocals, preset_name, export_format):
     status = "🔊 Loading audio..."
@@ -302,7 +208,6 @@ def process_audio(audio_file, selected_effects, isolate_vocals, preset_name, exp
             "Normalize": apply_normalize,
             "Noise Gate": lambda x: apply_noise_gate(x, threshold=-50.0),
             "Limiter": lambda x: apply_limiter(x, limit_dB=-1),
-            "Phaser": lambda x: apply_phaser(x),
             "Flanger": lambda x: apply_phaser(x, rate=1.2, depth=0.9, mix=0.7),
             "Bitcrusher": lambda x: apply_bitcrush(x, bit_depth=8),
             "Auto Gain": lambda x: apply_auto_gain(x, target_dB=-20),
@@ -340,7 +245,7 @@ def process_audio(audio_file, selected_effects, isolate_vocals, preset_name, exp
         status = f"❌ Error: {str(e)}"
         return None, None, status, "", status
-# === Waveform + Spectrogram Generator ===
 def show_waveform(audio_file):
     try:
         audio = AudioSegment.from_file(audio_file)
@@ -364,7 +269,6 @@ def detect_genre(audio_path):
     except Exception:
         return "Unknown"
-# === Session Info Export ===
 def generate_session_log(audio_path, effects, isolate_vocals, export_format, genre):
     log = {
         "timestamp": str(datetime.datetime.now()),
@@ -396,34 +300,50 @@ preset_choices = {
 preset_names = list(preset_choices.keys())
-# === Main UI ===
 with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
-    gr.Markdown("## 🎧 Ultimate AI Audio Studio\nUpload, edit, export — powered by AI!")
-    # --- Single File Studio ---
     with gr.Tab("🎵 Single File Studio"):
-        gr.Interface(
-            fn=process_audio,
-            inputs=[
-                gr.Audio(label="Upload Audio", type="filepath"),
-                gr.CheckboxGroup(choices=preset_choices.get("Default", []), label="Apply Effects in Order"),
-                gr.Checkbox(label="Isolate Vocals After Effects"),
-                gr.Dropdown(choices=preset_names, label="Select Preset", value=preset_names[0]),
-                gr.Dropdown(choices=["MP3", "WAV"], label="Export Format", value="MP3")
-            ],
-            outputs=[
-                gr.Audio(label="Processed Audio", type="filepath"),
-                gr.Image(label="Waveform Preview"),
-                gr.Textbox(label="Session Log (JSON)", lines=5),
-                gr.Textbox(label="Detected Genre", lines=1),
-                gr.Textbox(label="Status", value="✅ Ready", lines=1)
-            ],
-            title="Edit One File at a Time",
-            description="Apply effects, preview waveform, and get full session log.",
-            flagging_mode="never",
-            submit_btn="Process Audio",
-            clear_btn=None
-        )
     # --- AI Mastering Chain Tab ===
     with gr.Tab("🎧 AI Mastering Chain"):
@@ -454,6 +374,44 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
             description="Enhance clarity and presence using saturation styles like Tube or Tape."
         )
     # --- Remix Mode ---
     with gr.Tab("🎛 Remix Mode"):
         gr.Interface(
@@ -506,37 +464,14 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
             description="Correct vocal pitch automatically"
         )
-    # --- Create Karaoke Video from Audio + Lyrics ===
-    with gr.Tab("📹 Create Karaoke Video"):
-        gr.Interface(
-            fn=create_karaoke_video,
-            inputs=[
-                gr.Audio(label="Upload Track", type="filepath"),
-                gr.Textbox(label="Lyrics", lines=10),
-                gr.File(label="Background (Optional)")
-            ],
-            outputs=gr.Video(label="Karaoke Video"),
-            title="Make Karaoke Videos from Audio + Lyrics",
-            description="Generate karaoke-style videos with real-time sync."
-        )
-    # --- Vocal Doubler / Harmonizer ===
-    with gr.Tab("🎧 Vocal Doubler / Harmonizer"):
-        gr.Interface(
-            fn=vocal_doubler,
-            inputs=gr.Audio(label="Upload Vocal Clip", type="filepath"),
-            outputs=gr.Audio(label="Doubled Output", type="filepath"),
-            title="Add Vocal Doubling / Harmony",
-            description="Enhance vocals with doubling or harmony"
-        )
     # --- Real-Time Spectrum Analyzer + Live EQ Preview ===
-    with gr.Tab("📊 Real-Time Spectrum"):
         gr.Interface(
             fn=visualize_spectrum,
             inputs=gr.Audio(label="Upload Track", type="filepath"),
             outputs=gr.Image(label="Spectrum Analysis"),
-            title="See the frequency breakdown of your audio"
         )
     # --- Loudness Graph Tab ===
@@ -567,40 +502,28 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
         )
     # --- Save/Load Mix Session (.aiproj) ===
-    with gr.Tab("📁 Save/Load Mix Session"):
         gr.Interface(
             fn=save_project,
             inputs=[
-                gr.File(label="Vocals"),
-                gr.File(label="Drums"),
-                gr.File(label="Bass"),
-                gr.File(label="Other"),
-                gr.Slider(minimum=-10, maximum=10, value=0, label="Vocals Volume"),
-                gr.Slider(minimum=-10, maximum=10, value=0, label="Drums Volume"),
-                gr.Slider(minimum=-10, maximum=10, value=0, label="Bass Volume"),
-                gr.Slider(minimum=-10, maximum=10, value=0, label="Other Volume")
             ],
             outputs=gr.File(label="Project File (.aiproj)"),
-            title="Save Your Full Mix Session",
-            description="Save stems, volumes, and settings in one file."
         )
         gr.Interface(
             fn=load_project,
             inputs=gr.File(label="Upload .aiproj File"),
             outputs=[
-                gr.File(label="Vocals"),
-                gr.File(label="Drums"),
-                gr.File(label="Bass"),
-                gr.File(label="Other"),
-                gr.Slider(label="Vocals Volume"),
-                gr.Slider(label="Drums Volume"),
-                gr.Slider(label="Bass Volume"),
-                gr.Slider(label="Other Volume")
             ],
-            title="Resume Last Mix",
-            description="Load saved mix session",
-            allow_flagging="never"
         )
     # --- Prompt-Based Editing Tab ===
@@ -651,144 +574,4 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
             allow_flagging="never"
         )
-    # --- Vocal Pitch Correction – Auto-Tune Style ===
-    def auto_tune_vocal(audio_path, target_key="C"):
-        try:
-            # Placeholder for real-time pitch detection
-            return apply_pitch_shift(AudioSegment.from_file(audio_path), 0.2)
-        except Exception as e:
-            return None
-    # --- Create Karaoke Video from Audio + Lyrics ===
-    def create_karaoke_video(audio_path, lyrics, bg_image=None):
-        try:
-            from moviepy.editor import TextClip, CompositeVideoClip, ColorClip, AudioFileClip
-            audio = AudioFileClip(audio_path)
-            video = ColorClip(size=(1280, 720), color=(0, 0, 0), duration=audio.duration_seconds)
-            words = [(word.strip(), i * 3, (i+1)*3) for i, word in enumerate(lyrics.split())]
-            text_clips = [
-                TextClip(word, fontsize=60, color='white').set_position('center').set_duration(end - start).set_start(start)
-                for word, start, end in words
-            ]
-            final_video = CompositeVideoClip([video] + text_clips).set_audio(audio)
-            out_path = os.path.join(tempfile.gettempdir(), "karaoke.mp4")
-            final_video.write_videofile(out_path, codec="libx264", audio_codec="aac")
-            return out_path
-        except Exception as e:
-            return f"⚠️ Failed: {str(e)}"
-    # --- Vocal Doubler / Harmonizer ===
-    def vocal_doubler(audio):
-        shifted_up = apply_pitch_shift(audio, 0.3)
-        shifted_down = apply_pitch_shift(audio, -0.3)
-        return audio.overlay(shifted_up).overlay(shifted_down)
-    # --- AI Suggest Preset Based on Genre ===
-    def suggest_preset_by_genre(audio_path):
-        try:
-            y, sr = torchaudio.load(audio_path)
-            mfccs = librosa.feature.mfcc(y=y.numpy().flatten(), sr=sr, n_mfcc=13).mean(axis=1).reshape(1, -1)
-            return ["Vocal Clarity", "Limiter", "Stereo Expansion"]
-        except Exception:
-            return ["Default"]
-    # --- AI Suggest Preset Based on Genre ===
-    with gr.Tab("🧠 AI Suggest Preset"):
-        gr.Interface(
-            fn=suggest_preset_by_genre,
-            inputs=gr.Audio(label="Upload Track", type="filepath"),
-            outputs=gr.Dropdown(choices=preset_names, label="Recommended Preset"),
-            title="Let AI Recommend Best Preset",
-            description="Upload a track and let AI recommend the best preset based on genre."
-        )
-    # --- Prompt-Based Editing ===
-    def process_prompt(audio_path, prompt):
-        audio = AudioSegment.from_file(audio_path)
-        if "noise" in prompt.lower() or "clean" in prompt.lower():
-            audio = apply_noise_reduction(audio)
-        if "normalize" in prompt.lower() or "loud" in prompt.lower():
-            audio = apply_normalize(audio)
-        if "bass" in prompt.lower() and ("boost" in prompt.lower()):
-            audio = apply_bass_boost(audio)
-        if "treble" in prompt.lower() or "high" in prompt.lower():
-            audio = apply_treble_boost(audio)
-        if "echo" in prompt.lower() or "reverb" in prompt.lower():
-            audio = apply_reverb(audio)
-        if "pitch" in prompt.lower() and "correct" in prompt.lower():
-            audio = apply_pitch_correction(audio)
-        if "harmony" in prompt.lower() or "double" in prompt.lower():
-            audio = apply_harmony(audio)
-        out_path = os.path.join(tempfile.gettempdir(), "prompt_output.wav")
-        audio.export(out_path, format="wav")
-        return out_path
-    # --- Prompt-Based Editing Tab ===
-    with gr.Tab("🧠 Prompt-Based Editing"):
-        gr.Interface(
-            fn=process_prompt,
-            inputs=[
-                gr.File(label="Upload Audio", type="filepath"),
-                gr.Textbox(label="Describe What You Want", lines=5)
-            ],
-            outputs=gr.Audio(label="Edited Output", type="filepath"),
-            title="Type Your Edits – AI Does the Rest",
-            description="Say what you want done and let AI handle it.",
-            allow_flagging="never"
-        )
-    # --- Vocal Pitch Correction (Auto-Tune) ===
-    def apply_pitch_correction(audio, target_key="C"):
-        return apply_pitch_shift(audio, 0.2)
-    with gr.Tab("🧬 Vocal Pitch Correction"):
-        gr.Interface(
-            fn=auto_tune_vocal,
-            inputs=[
-                gr.File(label="Source Voice Clip"),
-                gr.Textbox(label="Target Key", value="C", lines=1)
-            ],
-            outputs=gr.Audio(label="Pitch-Corrected Output", type="filepath"),
-            title="Auto-Tune Style Pitch Correction",
-            description="Correct vocal pitch automatically"
-        )
-    # --- Real-Time Spectrum Analyzer + EQ Preview ===
-    def visualize_spectrum(audio_path):
-        y, sr = torchaudio.load(audio_path)
-        y_np = y.numpy().flatten()
-        stft = librosa.stft(y_np)
-        db = librosa.amplitude_to_db(abs(stft))
-        plt.figure(figsize=(10, 4))
-        img = librosa.display.specshow(db, sr=sr, x_axis="time", y_axis="hz", cmap="magma")
-        plt.colorbar(img, format="%+2.0f dB")
-        plt.title("Frequency Spectrum")
-        plt.tight_layout()
-        buf = BytesIO()
-        plt.savefig(buf, format="png")
-        plt.close()
-        buf.seek(0)
-        return Image.open(buf)
-    with gr.Tab("📊 Frequency Spectrum"):
-        gr.Interface(
-            fn=visualize_spectrum,
-            inputs=gr.Audio(label="Upload Track", type="filepath"),
-            outputs=gr.Image(label="Spectrum Analysis"),
-            title="Real-Time Spectrum Analyzer",
-            description="See the frequency breakdown of your audio"
-        )
 demo.launch()

 import tempfile
 import os
 import noisereduce as nr
 import torch
 from demucs import pretrained
 from demucs.apply import apply_model
 import torchaudio
 import matplotlib.pyplot as plt
 from io import BytesIO
 from PIL import Image
 import librosa
 import warnings
 from faster_whisper import WhisperModel
 from TTS.api import TTS
+import base64
 import pickle
 # Suppress warnings
 def apply_pitch_shift(audio, semitones=-2):
     new_frame_rate = int(audio.frame_rate * (2 ** (semitones / 12)))
     samples = np.array(audio.get_array_of_samples())
+    resampled = np.interp(np.arange(0, len(samples), 2 ** (semitones / 12)), np.arange(len(samples)), samples).astype(np.int16)
+    return AudioSegment(resampled.tobytes(), frame_rate=new_frame_rate, sample_width=audio.sample_width, channels=audio.channels)
 def apply_echo(audio, delay_ms=500, decay=0.5):
     echo = audio - 10
     adjusted.export(out_path, format="wav")
     return out_path
 # === Auto-EQ per Genre ===
 def auto_eq(audio, genre="Pop"):
     eq_map = {
+        "Pop": [(200, 500, -3), (2000, 4000, +4)],
+        "EDM": [(60, 250, +6), (8000, 12000, +3)],
+        "Rock": [(1000, 3000, +4), (7000, 10000, -3)],
+        "Hip-Hop": [(20, 100, +6), (7000, 10000, -4)],
+        "Acoustic": [(100, 300, -3), (4000, 8000, +2)],
+        "Metal": [(100, 500, -4), (2000, 5000, +6), (7000, 12000, -3)],
+        "Trap": [(80, 120, +6), (3000, 6000, -4)],
+        "LoFi": [(20, 200, +3), (1000, 3000, -2)]
     }
     from scipy.signal import butter, sosfilt
     return array_to_audiosegment(samples.astype(np.int16), sr, channels=audio.channels)
+# === AI Mastering Chain – Genre EQ + Loudness Match + Limiting ===
+def ai_mastering_chain(audio_path, genre="Pop", target_lufs=-14.0):
+    audio = AudioSegment.from_file(audio_path)
+    eq_audio = auto_eq(audio, genre=genre)
+    samples, sr = audiosegment_to_array(eq_audio)
+    # Apply loudness normalization
+    meter = pyln.Meter(sr)
+    loudness = meter.integrated_loudness(samples.astype(np.float64) / 32768.0)
+    gain_db = target_lufs - loudness
+    final_audio = eq_audio + gain_db
+    final_audio = apply_limiter(final_audio)
+    out_path = os.path.join(tempfile.gettempdir(), "mastered_output.wav")
+    final_audio.export(out_path, format="wav")
+    return out_path
 # === Harmonic Saturation / Exciter ===
 def harmonic_saturation(audio, saturation_type="Tube", intensity=0.2):
     samples = np.array(audio.get_array_of_samples()).astype(np.float32)
     return array_to_audiosegment(saturated.astype(np.int16), audio.frame_rate, channels=audio.channels)
 # === Process Audio Function ===
 def process_audio(audio_file, selected_effects, isolate_vocals, preset_name, export_format):
     status = "🔊 Loading audio..."
             "Normalize": apply_normalize,
             "Noise Gate": lambda x: apply_noise_gate(x, threshold=-50.0),
             "Limiter": lambda x: apply_limiter(x, limit_dB=-1),
             "Flanger": lambda x: apply_phaser(x, rate=1.2, depth=0.9, mix=0.7),
             "Bitcrusher": lambda x: apply_bitcrush(x, bit_depth=8),
             "Auto Gain": lambda x: apply_auto_gain(x, target_dB=-20),
         status = f"❌ Error: {str(e)}"
         return None, None, status, "", status
+# === Visualize Waveform ===
 def show_waveform(audio_file):
     try:
         audio = AudioSegment.from_file(audio_file)
     except Exception:
         return "Unknown"
 def generate_session_log(audio_path, effects, isolate_vocals, export_format, genre):
     log = {
         "timestamp": str(datetime.datetime.now()),
 preset_names = list(preset_choices.keys())
+# === Preset Cards Gallery ===
+def get_preset_cards():
+    card_paths = []
+    for name in preset_names:
+        card_paths.append(f"https://via.placeholder.com/150x100?text={name}")
+    return card_paths
+# === Load Preset by Name ===
+def load_preset_by_card(name_index):
+    name = preset_names[name_index]
+    return name, preset_choices[name]
+# === Logo Embedding (Base64 or file) ===
+def get_logo():
+    try:
+        with open("logo.png", "rb") as img_file:
+            return "data:image/png;base64," + base64.b64encode(img_file.read()).decode()
+    except FileNotFoundError:
+        return "https://via.placeholder.com/400x100?text=AI+Audio+Studio"
+# === Main UI ===
 with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
+    gr.HTML(f'<div class="studio-header"><img src="{get_logo()}" width="400" /></div>')
+    gr.Markdown("### Upload, edit, export — powered by AI!")
     with gr.Tab("🎵 Single File Studio"):
+        with gr.Row():
+            with gr.Column(min_width=300):
+                input_audio = gr.Audio(label="Upload Audio", type="filepath")
+                effect_checkbox = gr.CheckboxGroup(choices=preset_choices.get("Default", []),
+                                                   label="Apply Effects in Order")
+                preset_dropdown = gr.Dropdown(choices=preset_names, label="Select Preset", value=preset_names[0])
+                export_format = gr.Dropdown(choices=["MP3", "WAV"], label="Export Format", value="MP3")
+                isolate_vocals = gr.Checkbox(label="Isolate Vocals After Effects")
+                submit_btn = gr.Button("Process Audio")
+            with gr.Column(min_width=300):
+                output_audio = gr.Audio(label="Processed Audio", type="filepath")
+                waveform_img = gr.Image(label="Waveform Preview")
+                genre_out = gr.Textbox(label="Detected Genre")
+                status_box = gr.Textbox(label="Status", value="✅ Ready", lines=1)
+        submit_btn.click(fn=process_audio, inputs=[
+            input_audio, effect_checkbox, isolate_vocals, preset_dropdown, export_format
+        ], outputs=[output_audio, waveform_img, _, genre_out, status_box])
     # --- AI Mastering Chain Tab ===
     with gr.Tab("🎧 AI Mastering Chain"):
             description="Enhance clarity and presence using saturation styles like Tube or Tape."
         )
+    # --- Preset Cards Gallery ===
+    with gr.Tab("🎛 Preset Gallery"):
+        gr.Markdown("### Select a preset visually")
+        preset_images = [
+            ("https://via.placeholder.com/150x100?text=Pop", "Pop"),
+            ("https://via.placeholder.com/150x100?text=EDM", "EDM"),
+            ("https://via.placeholder.com/150x100?text=Rock", "Rock"),
+            ("https://via.placeholder.com/150x100?text=Hip-Hop", "Hip-Hop"),
+            ("https://via.placeholder.com/150x100?text=Acoustic", "Acoustic"),
+            ("https://via.placeholder.com/150x100?text=Tube+Saturation", "Tube"),
+            ("https://via.placeholder.com/150x100?text=Stage+Mode", "Stage Mode"),
+            ("https://via.placeholder.com/150x100?text=Vocal+Distortion", "Vocal Distortion")
+        ]
+        preset_gallery = gr.Gallery(value=preset_images, label="Preset Cards", columns=4, height="auto")
+        preset_name_out = gr.Dropdown(choices=preset_names, label="Selected Preset")
+        preset_effects_out = gr.CheckboxGroup(choices=[e for e in preset_choices["Default"]], label="Effects")
+        def select_preset(evt: gr.SelectData):
+            selected = evt.index
+            name = preset_names[selected % len(preset_names)]
+            effects = preset_choices.get(name, [])
+            return name, effects
+        preset_gallery.select(fn=select_preset, inputs=[], outputs=[preset_name_out, preset_effects_out])
+    # --- Vocal Doubler / Harmonizer ===
+    with gr.Tab("🎧 Vocal Doubler / Harmonizer"):
+        gr.Interface(
+            fn=lambda x: apply_harmony(x),
+            inputs=gr.Audio(label="Upload Vocal Clip", type="filepath"),
+            outputs=gr.Audio(label="Doubled Output", type="filepath"),
+            title="Add Vocal Doubling / Harmony",
+            description="Enhance vocals with doubling or harmony",
+            allow_flagging="never"
+        )
     # --- Remix Mode ---
     with gr.Tab("🎛 Remix Mode"):
         gr.Interface(
             description="Correct vocal pitch automatically"
         )
     # --- Real-Time Spectrum Analyzer + Live EQ Preview ===
+    with gr.Tab("📊 Frequency Spectrum"):
         gr.Interface(
             fn=visualize_spectrum,
             inputs=gr.Audio(label="Upload Track", type="filepath"),
             outputs=gr.Image(label="Spectrum Analysis"),
+            title="Real-Time Spectrum Analyzer",
+            description="See the frequency breakdown of your audio"
         )
     # --- Loudness Graph Tab ===
         )
     # --- Save/Load Mix Session (.aiproj) ===
+    with gr.Tab("📁 Save/Load Project"):
         gr.Interface(
             fn=save_project,
             inputs=[
+                gr.File(label="Original Audio"),
+                gr.Dropdown(choices=preset_names, label="Used Preset", value=preset_names[0]),
+                gr.CheckboxGroup(choices=[e for e in preset_choices.get("Default", [])], label="Applied Effects")
             ],
             outputs=gr.File(label="Project File (.aiproj)"),
+            title="Save Everything Together",
+            description="Save your session, effects, and settings in one file to reuse later."
         )
         gr.Interface(
             fn=load_project,
             inputs=gr.File(label="Upload .aiproj File"),
             outputs=[
+                gr.Dropdown(choices=preset_names, label="Loaded Preset"),
+                gr.CheckboxGroup(choices=[e for e in preset_choices.get("Default", [])], label="Loaded Effects")
             ],
+            title="Resume Last Project",
+            description="Load your saved session"
         )
     # --- Prompt-Based Editing Tab ===
             allow_flagging="never"
         )
 demo.launch()