Spaces:

tee342
/

AudioMaster

Sleeping

App Files Files Community

tee342 commited on Jun 13

Commit

94c3b1e

verified ·

1 Parent(s): aa065d9

Update app.py

Browse files

Files changed (1) hide show

app.py +136 -83

app.py CHANGED Viewed

@@ -9,6 +9,7 @@ import torch
 from demucs import pretrained
 from demucs.apply import apply_model
 import torchaudio
 import matplotlib.pyplot as plt
 from io import BytesIO
 from PIL import Image
@@ -18,8 +19,8 @@ import librosa
 import warnings
 from faster_whisper import WhisperModel
 from TTS.api import TTS
-import pickle
 import base64
 # Suppress warnings
 warnings.filterwarnings("ignore")
@@ -156,39 +157,47 @@ def auto_eq(audio, genre="Pop"):
     return array_to_audiosegment(samples.astype(np.int16), sr, channels=audio.channels)
-# === AI Mastering Chain – Genre EQ + Loudness Match + Limiting ===
-def ai_mastering_chain(audio_path, genre="Pop", target_lufs=-14.0):
-    audio = AudioSegment.from_file(audio_path)
-    eq_audio = auto_eq(audio, genre=genre)
-    samples, sr = audiosegment_to_array(eq_audio)
-    # Apply loudness normalization
-    meter = pyln.Meter(sr)
-    loudness = meter.integrated_loudness(samples.astype(np.float64) / 32768.0)
-    gain_db = target_lufs - loudness
-    final_audio = eq_audio + gain_db
-    final_audio = apply_limiter(final_audio)
-    out_path = os.path.join(tempfile.gettempdir(), "mastered_output.wav")
-    final_audio.export(out_path, format="wav")
     return out_path
-# === Harmonic Saturation / Exciter ===
-def harmonic_saturation(audio, saturation_type="Tube", intensity=0.2):
-    samples = np.array(audio.get_array_of_samples()).astype(np.float32)
-    if saturation_type == "Tube":
-        saturated = np.tanh(intensity * samples)
-    elif saturation_type == "Tape":
-        saturated = np.where(samples > 0, 1 - np.exp(-intensity * samples), -1 + np.exp(intensity * samples))
-    elif saturation_type == "Console":
-        saturated = np.clip(samples, -32768, 32768) * intensity
-    elif saturation_type == "Mix Bus":
-        saturated = np.log1p(np.abs(samples)) * np.sign(samples) * intensity
-    else:
-        saturated = samples
-    return array_to_audiosegment(saturated.astype(np.int16), audio.frame_rate, channels=audio.channels)
 # === Process Audio Function ===
 def process_audio(audio_file, selected_effects, isolate_vocals, preset_name, export_format):
@@ -246,7 +255,7 @@ def process_audio(audio_file, selected_effects, isolate_vocals, preset_name, exp
         status = f"❌ Error: {str(e)}"
         return None, None, status, "", status
-# === Waveform + Spectrogram Generator ===
 def show_waveform(audio_file):
     try:
         audio = AudioSegment.from_file(audio_file)
@@ -301,25 +310,60 @@ preset_choices = {
 preset_names = list(preset_choices.keys())
-# === Preset Cards Gallery ===
-preset_cards = [
-    ("images/pop_card.png", "Pop"),
-    ("images/edm_card.png", "EDM"),
-    ("images/rock_card.png", "Rock"),
-    ("images/hiphop_card.png", "Hip-Hop"),
-    ("images/acoustic_card.png", "Acoustic"),
-    ("images/stage_mode_card.png", "Stage Mode"),
-    ("images/vocal_distortion_card.png", "Vocal Distortion"),
-    ("images/tube_saturation_card.png", "Tube Saturation")
-]
-# === Logo Embedding (Base64 or file) ===
-def get_logo():
-    return "logo.png"
 # === Main UI ===
 with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
-    gr.HTML(f'<div class="studio-header"><img src="{get_logo()}" width="400" /></div>')
     gr.Markdown("### Upload, edit, export — powered by AI!")
     with gr.Tab("🎵 Single File Studio"):
@@ -344,6 +388,23 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
             output_audio, waveform_img, session_log_out, genre_out, status_box
         ])
     # --- AI Mastering Chain Tab ===
     with gr.Tab("🎧 AI Mastering Chain"):
         gr.Interface(
@@ -376,9 +437,19 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
     # --- Preset Cards Gallery ===
     with gr.Tab("🎛 Preset Gallery"):
         gr.Markdown("### Select a preset visually")
-        preset_gallery = gr.Gallery(value=preset_cards, label="Preset Cards", columns=4, height="auto")
         preset_name_out = gr.Dropdown(choices=preset_names, label="Selected Preset")
-        preset_effects_out = gr.CheckboxGroup(choices=list(preset_choices.keys())[0:], label="Effects")
         def load_preset_by_card(evt: gr.SelectData):
             index = evt.index % len(preset_names)
@@ -397,23 +468,6 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
             description="Enhance vocals with doubling or harmony"
         )
-    # --- Remix Mode ---
-    with gr.Tab("🎛 Remix Mode"):
-        gr.Interface(
-            fn=stem_split,
-            inputs=gr.Audio(label="Upload Music Track", type="filepath"),
-            outputs=[
-                gr.File(label="Vocals"),
-                gr.File(label="Drums"),
-                gr.File(label="Bass"),
-                gr.File(label="Other")
-            ],
-            title="Split Into Drums, Bass, Vocals, and More",
-            description="Use AI to separate musical elements like vocals, drums, and bass.",
-            flagging_mode="never",
-            clear_btn=None
-        )
     # --- Batch Processing ---
     with gr.Tab("🔊 Batch Processing"):
         gr.Interface(
@@ -450,23 +504,6 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
         )
     # --- Real-Time Spectrum Analyzer + Live EQ Preview ===
-    def visualize_spectrum(audio_path):
-        y, sr = torchaudio.load(audio_path)
-        y_np = y.numpy().flatten()
-        stft = librosa.stft(y_np)
-        db = librosa.amplitude_to_db(abs(stft))
-        plt.figure(figsize=(10, 4))
-        img = librosa.display.specshow(db, sr=sr, x_axis="time", y_axis="hz", cmap="magma")
-        plt.colorbar(img, format="%+2.0f dB")
-        plt.title("Frequency Spectrum")
-        plt.tight_layout()
-        buf = BytesIO()
-        plt.savefig(buf, format="png")
-        plt.close()
-        buf.seek(0)
-        return Image.open(buf)
     with gr.Tab("📊 Frequency Spectrum"):
         gr.Interface(
             fn=visualize_spectrum,
@@ -490,6 +527,22 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
         )
     # --- Save/Load Mix Session (.aiproj) ===
     with gr.Tab("📁 Save/Load Project"):
         gr.Interface(
             fn=save_project,

 from demucs import pretrained
 from demucs.apply import apply_model
 import torchaudio
+from pathlib import Path
 import matplotlib.pyplot as plt
 from io import BytesIO
 from PIL import Image
 import warnings
 from faster_whisper import WhisperModel
 from TTS.api import TTS
 import base64
+import pickle
 # Suppress warnings
 warnings.filterwarnings("ignore")
     return array_to_audiosegment(samples.astype(np.int16), sr, channels=audio.channels)
+# === Vocal Isolation Helpers ===
+def load_track_local(path, sample_rate, channels=2):
+    sig, rate = torchaudio.load(path)
+    if rate != sample_rate:
+        sig = torchaudio.functional.resample(sig, rate, sample_rate)
+    if channels == 1:
+        sig = sig.mean(0)
+    return sig
+def save_track(path, wav, sample_rate):
+    path = Path(path)
+    torchaudio.save(str(path), wav, sample_rate)
+def apply_vocal_isolation(audio_path):
+    model = pretrained.get_model(name='htdemucs')
+    wav = load_track_local(audio_path, model.samplerate, channels=2)
+    ref = wav.mean(0)
+    wav -= ref[:, None]
+    sources = apply_model(model, wav[None])[0]
+    wav += ref[:, None]
+    vocal_track = sources[3].cpu()
+    out_path = os.path.join(tempfile.gettempdir(), "vocals.wav")
+    save_track(out_path, vocal_track, model.samplerate)
     return out_path
+# === Stem Splitting (Drums, Bass, Other, Vocals) – Now Defined! ===
+def stem_split(audio_path):
+    model = pretrained.get_model(name='htdemucs')
+    wav = load_track_local(audio_path, model.samplerate, channels=2)
+    sources = apply_model(model, wav[None])[0]
+    output_dir = tempfile.mkdtemp()
+    stem_paths = []
+    for i, name in enumerate(['drums', 'bass', 'other', 'vocals']):
+        path = os.path.join(output_dir, f"{name}.wav")
+        save_track(path, sources[i].cpu(), model.samplerate)
+        stem_paths.append(gr.File(value=path))
+    return stem_paths
 # === Process Audio Function ===
 def process_audio(audio_file, selected_effects, isolate_vocals, preset_name, export_format):
         status = f"❌ Error: {str(e)}"
         return None, None, status, "", status
+# === Visualize Waveform ===
 def show_waveform(audio_file):
     try:
         audio = AudioSegment.from_file(audio_file)
 preset_names = list(preset_choices.keys())
+# === Batch Processing Function ===
+def batch_process_audio(files, selected_effects, isolate_vocals, preset_name, export_format):
+    status = "🔊 Loading files..."
+    try:
+        output_dir = tempfile.mkdtemp()
+        results = []
+        session_logs = []
+        for file in files:
+            processed_path, _, log, _, _ = process_audio(file.name, selected_effects, isolate_vocals, preset_name, export_format)
+            results.append(processed_path)
+            session_logs.append(log)
+        zip_path = os.path.join(output_dir, "batch_output.zip")
+        with zipfile.ZipFile(zip_path, 'w') as zipf:
+            for i, res in enumerate(results):
+                filename = f"processed_{i}.{export_format.lower()}"
+                zipf.write(res, filename)
+                zipf.writestr(f"session_info_{i}.json", session_logs[i])
+        return zip_path, "📦 ZIP created successfully!"
+    except Exception as e:
+        return None, f"❌ Batch processing failed: {str(e)}"
+# === Vocal Pitch Correction – Auto-Tune Style ===
+def auto_tune_vocal(audio_path, target_key="C"):
+    try:
+        # Placeholder for real-time pitch detection
+        return apply_pitch_shift(AudioSegment.from_file(audio_path), 0.2)
+    except Exception as e:
+        return None
+# === Real-Time Spectrum Analyzer + Live EQ Preview ===
+def visualize_spectrum(audio_path):
+    y, sr = torchaudio.load(audio_path)
+    y_np = y.numpy().flatten()
+    stft = librosa.stft(y_np)
+    db = librosa.amplitude_to_db(abs(stft))
+    plt.figure(figsize=(10, 4))
+    img = librosa.display.specshow(db, sr=sr, x_axis="time", y_axis="hz", cmap="magma")
+    plt.colorbar(img, format="%+2.0f dB")
+    plt.title("Frequency Spectrum")
+    plt.tight_layout()
+    buf = BytesIO()
+    plt.savefig(buf, format="png")
+    plt.close()
+    buf.seek(0)
+    return Image.open(buf)
 # === Main UI ===
 with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
+    gr.HTML('<div class="studio-header"><img src="logo.png" width="400" /></div>')
     gr.Markdown("### Upload, edit, export — powered by AI!")
     with gr.Tab("🎵 Single File Studio"):
             output_audio, waveform_img, session_log_out, genre_out, status_box
         ])
+    # --- Remix Mode ---
+    with gr.Tab("🎛 Remix Mode"):
+        gr.Interface(
+            fn=stem_split,
+            inputs=gr.Audio(label="Upload Music Track", type="filepath"),
+            outputs=[
+                gr.File(label="Vocals"),
+                gr.File(label="Drums"),
+                gr.File(label="Bass"),
+                gr.File(label="Other")
+            ],
+            title="Split Into Drums, Bass, Vocals, and More",
+            description="Use AI to separate musical elements like vocals, drums, and bass.",
+            flagging_mode="never",
+            clear_btn=None
+        )
     # --- AI Mastering Chain Tab ===
     with gr.Tab("🎧 AI Mastering Chain"):
         gr.Interface(
     # --- Preset Cards Gallery ===
     with gr.Tab("🎛 Preset Gallery"):
         gr.Markdown("### Select a preset visually")
+        preset_gallery = gr.Gallery(value=[
+            ("images/pop_card.png", "Pop"),
+            ("images/edm_card.png", "EDM"),
+            ("images/rock_card.png", "Rock"),
+            ("images/hiphop_card.png", "Hip-Hop"),
+            ("images/acoustic_card.png", "Acoustic"),
+            ("images/stage_mode_card.png", "Stage Mode"),
+            ("images/vocal_distortion_card.png", "Vocal Distortion"),
+            ("images/tube_saturation_card.png", "Tube Saturation")
+        ], label="Preset Cards", columns=4, height="auto")
         preset_name_out = gr.Dropdown(choices=preset_names, label="Selected Preset")
+        preset_effects_out = gr.CheckboxGroup(choices=list(preset_choices["Default"]), label="Effects")
         def load_preset_by_card(evt: gr.SelectData):
             index = evt.index % len(preset_names)
             description="Enhance vocals with doubling or harmony"
         )
     # --- Batch Processing ---
     with gr.Tab("🔊 Batch Processing"):
         gr.Interface(
         )
     # --- Real-Time Spectrum Analyzer + Live EQ Preview ===
     with gr.Tab("📊 Frequency Spectrum"):
         gr.Interface(
             fn=visualize_spectrum,
         )
     # --- Save/Load Mix Session (.aiproj) ===
+    def save_project(audio, preset, effects):
+        project_data = {
+            "audio": AudioSegment.from_file(audio).raw_data,
+            "preset": preset,
+            "effects": effects
+        }
+        out_path = os.path.join(tempfile.gettempdir(), "project.aiproj")
+        with open(out_path, "wb") as f:
+            pickle.dump(project_data, f)
+        return out_path
+    def load_project(project_file):
+        with open(project_file.name, "rb") as f:
+            data = pickle.load(f)
+        return data["preset"], data["effects"]
     with gr.Tab("📁 Save/Load Project"):
         gr.Interface(
             fn=save_project,