Spaces:

tee342
/

AudioMaster

Sleeping

App Files Files Community

tee342 commited on Jun 12

Commit

98f6048

verified ·

1 Parent(s): f6738b1

Update app.py

Browse files

Files changed (1) hide show

app.py +218 -73

app.py CHANGED Viewed

@@ -99,15 +99,6 @@ def apply_limiter(audio, limit_dB=-1):
     limiter = audio._spawn(audio.raw_data, overrides={"frame_rate": audio.frame_rate})
     return limiter.apply_gain(limit_dB)
-def apply_phaser(audio):
-    return audio._spawn(audio.raw_data, overrides={"frame_rate": int(audio.frame_rate * 1.1)})
-def apply_bitcrush(audio, bit_depth=8):
-    samples = np.array(audio.get_array_of_samples()).astype(np.float32)
-    max_val = np.iinfo(np.int16).max
-    crushed = ((samples / max_val) * (2 ** bit_depth)).astype(np.int16)
-    return array_to_audiosegment(crushed, audio.frame_rate, channels=audio.channels)
 def apply_auto_gain(audio, target_dB=-20):
     change = target_dB - audio.dBFS
     return audio.apply_gain(change)
@@ -158,67 +149,46 @@ def auto_eq(audio, genre="Pop"):
     return array_to_audiosegment(samples.astype(np.int16), sr, channels=audio.channels)
-# === AI Voice Effects – Harmony / Doubling / Tuning ===
-def pitch_correct(audio, target_key="C", semitones=None):
-    if semitones is None:
-        # Detect key and calculate needed shift
-        semitones = 0  # Placeholder
-    return apply_pitch_shift(audio, semitones)
-def vocal_doubling(audio):
-    double1 = apply_pitch_shift(audio, 0.3)
-    double2 = apply_pitch_shift(audio, -0.3)
-    return audio.overlay(double1).overlay(double2)
-# === Prompt-Based Editing ===
-def process_prompt(audio_path, prompt):
-    prompt = prompt.lower()
-    audio = AudioSegment.from_file(audio_path)
-    if "noise" in prompt or "clean" in prompt:
-        audio = apply_noise_reduction(audio)
-    if "normalize" in prompt or "loud" in prompt:
-        audio = apply_normalize(audio)
-    if "bass" in prompt and ("boost" in prompt or "up" in prompt):
-        audio = apply_bass_boost(audio)
-    if "treble" in prompt or "highs" in prompt:
-        audio = apply_treble_boost(audio)
-    if "echo" in prompt or "reverb" in prompt:
-        audio = apply_reverb(audio)
-    if "pitch" in prompt and "correct" in prompt:
-        audio = pitch_correct(audio)
-    if "harmony" in prompt or "double" in prompt:
-        audio = vocal_doubling(audio)
-    out_path = os.path.join(tempfile.gettempdir(), "prompt_output.wav")
     audio.export(out_path, format="wav")
     return out_path
-# === Spectrum Analyzer + EQ Visualizer ===
-def visualize_spectrum(audio_path):
-    y, sr = torchaudio.load(audio_path)
-    y_np = y.numpy().flatten()
-    stft = librosa.stft(y_np)
-    db = librosa.amplitude_to_db(abs(stft))
-    plt.figure(figsize=(10, 4))
-    img = librosa.display.specshow(db, sr=sr, x_axis="time", y_axis="hz", cmap="magma")
-    plt.colorbar(img, format="%+2.0f dB")
-    plt.title("Frequency Spectrum")
-    plt.tight_layout()
-    buf = BytesIO()
-    plt.savefig(buf, format="png")
-    plt.close()
-    buf.seek(0)
-    return Image.open(buf)
 # === Vocal Isolation Helpers ===
 def load_track_local(path, sample_rate, channels=2):
     sig, rate = torchaudio.load(path)
@@ -433,7 +403,7 @@ def transcribe_audio(audio_path):
 # === TTS Tab ===
 tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False)
-def generate_tTS(text):
     out_path = os.path.join(tempfile.gettempdir(), "tts_output.wav")
     tts.tts_to_file(text=text, file_path=out_path)
     return out_path
@@ -527,6 +497,28 @@ def diarize_and_transcribe(audio_path):
     except Exception as e:
         return f"⚠️ Diarization failed: {str(e)}"
 # === UI ===
 effect_options = [
     "Noise Reduction",
@@ -619,16 +611,41 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
     # --- Genre Mastering Tab ===
     with gr.Tab("🎧 Genre Mastering"):
         gr.Interface(
-            fn=lambda audio, genre: apply_genre_preset(audio, genre),
             inputs=[
                 gr.Audio(label="Upload Track", type="filepath"),
-                gr.Dropdown(choices=list(genre_presets.keys()), label="Select Genre", value="Pop")
             ],
             outputs=gr.Audio(label="Mastered Output", type="filepath"),
             title="Genre-Specific Mastering",
             description="Apply professionally tuned mastering settings for popular music genres."
         )
     # --- Prompt-Based Editing Tab ===
     with gr.Tab("🧠 Prompt-Based Editing"):
         gr.Interface(
@@ -643,14 +660,37 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
             allow_flagging="never"
         )
-    # --- Spectrum Analyzer Tab ===
-    with gr.Tab("📊 Frequency Spectrum"):
         gr.Interface(
-            fn=visualize_spectrum,
-            inputs=gr.Audio(label="Upload Track", type="filepath"),
-            outputs=gr.Image(label="Spectrum Analysis"),
-            title="Real-Time Spectrum Analyzer",
-            description="See the frequency breakdown of your audio",
             allow_flagging="never"
         )
@@ -668,7 +708,7 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
             description="Clone voice from source to target speaker using AI"
         )
-    # --- Speaker Diarization (Who Spoke When?) ===
     if diarize_pipeline:
         with gr.Tab("🧏‍♂️ Who Spoke When?"):
             gr.Interface(
@@ -738,4 +778,109 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
             description="Detect and trim silence at start/end or between words"
         )
 demo.launch()

     limiter = audio._spawn(audio.raw_data, overrides={"frame_rate": audio.frame_rate})
     return limiter.apply_gain(limit_dB)
 def apply_auto_gain(audio, target_dB=-20):
     change = target_dB - audio.dBFS
     return audio.apply_gain(change)
     return array_to_audiosegment(samples.astype(np.int16), sr, channels=audio.channels)
+# === Real-Time EQ Sliders ===
+def real_time_eq(audio, low_gain=0, mid_gain=0, high_gain=0):
+    samples, sr = audiosegment_to_array(audio)
+    samples = samples.astype(np.float64)
+    # Low EQ: 20–500Hz
+    sos_low = butter(10, [20, 500], btype='band', output='sos', fs=sr)
+    samples = sosfilt(sos_low, samples) * (10 ** (low_gain / 20))
+    # Mid EQ: 500–4000Hz
+    sos_mid = butter(10, [500, 4000], btype='band', output='sos', fs=sr)
+    samples += sosfilt(sos_mid, samples) * (10 ** (mid_gain / 20))
+    # High EQ: 4000–20000Hz
+    sos_high = butter(10, [4000, 20000], btype='high', output='sos', fs=sr)
+    samples += sosfilt(sos_high, samples) * (10 ** (high_gain / 20))
+    return array_to_audiosegment(samples.astype(np.int16), sr, channels=audio.channels)
+# === AI Suggest Presets Based on Genre ===
+genre_preset_map = {
+    "Speech": ["Clean Podcast", "Normalize"],
+    "Pop": ["Vocal Clarity", "Limiter", "Stereo Expansion"],
+    "EDM": ["Heavy Bass", "Stereo Expansion", "Limiter", "Phaser"],
+    "Rock": ["Distortion", "Punchy Mids", "Reverb"],
+    "Hip-Hop": ["Deep Bass", "Vocal Presence", "Saturation"]
+}
+def suggest_preset_by_genre(genre):
+    return genre_preset_map.get(genre, ["Default"])
+# === Create Karaoke Video from Audio + Lyrics ===
+def create_karaoke_video(audio_path, lyrics, bg_image=None):
+    # Placeholder for video generation
+    print(f"Creating karaoke video with lyrics: {lyrics}")
+    out_path = os.path.join(tempfile.gettempdir(), "karaoke_output.wav")
+    audio = AudioSegment.from_file(audio_path)
     audio.export(out_path, format="wav")
     return out_path
 # === Vocal Isolation Helpers ===
 def load_track_local(path, sample_rate, channels=2):
     sig, rate = torchaudio.load(path)
 # === TTS Tab ===
 tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False)
+def generate_tts(text):
     out_path = os.path.join(tempfile.gettempdir(), "tts_output.wav")
     tts.tts_to_file(text=text, file_path=out_path)
     return out_path
     except Exception as e:
         return f"⚠️ Diarization failed: {str(e)}"
+# === Real-Time Spectrum Analyzer + EQ Visualizer ===
+def visualize_spectrum(audio_path):
+    y, sr = torchaudio.load(audio_path)
+    y_np = y.numpy().flatten()
+    stft = librosa.stft(y_np)
+    db = librosa.amplitude_to_db(abs(stft))
+    plt.figure(figsize=(10, 4))
+    img = librosa.display.specshow(db, sr=sr, x_axis="time", y_axis="hz", cmap="magma")
+    plt.colorbar(img, format="%+2.0f dB")
+    plt.title("Frequency Spectrum")
+    plt.tight_layout()
+    buf = BytesIO()
+    plt.savefig(buf, format="png")
+    plt.close()
+    buf.seek(0)
+    return Image.open(buf)
+# === Real-Time EQ Sliders ===
+def real_time_eq_slider(audio, low_gain, mid_gain, high_gain):
+    return real_time_eq(audio, low_gain, mid_gain, high_gain)
 # === UI ===
 effect_options = [
     "Noise Reduction",
     # --- Genre Mastering Tab ===
     with gr.Tab("🎧 Genre Mastering"):
         gr.Interface(
+            fn=lambda audio, genre: auto_eq(audio, genre),
             inputs=[
                 gr.Audio(label="Upload Track", type="filepath"),
+                gr.Dropdown(choices=list(genre_preset_map.keys()), label="Select Genre", value="Pop")
             ],
             outputs=gr.Audio(label="Mastered Output", type="filepath"),
             title="Genre-Specific Mastering",
             description="Apply professionally tuned mastering settings for popular music genres."
         )
+    # --- Real-Time EQ ===
+    with gr.Tab("🎛 Real-Time EQ"):
+        gr.Interface(
+            fn=real_time_eq_slider,
+            inputs=[
+                gr.Audio(label="Upload Track", type="filepath"),
+                gr.Slider(minimum=-12, maximum=12, value=0, label="Low Gain (-200–500Hz)"),
+                gr.Slider(minimum=-12, maximum=12, value=0, label="Mid Gain (500Hz–4kHz)"),
+                gr.Slider(minimum=-12, maximum=12, value=0, label="High Gain (4kHz+)"),
+            ],
+            outputs=gr.Audio(label="EQ'd Output", type="filepath"),
+            title="Adjust Frequency Bands Live",
+            description="Fine-tune your sound using real-time sliders for low, mid, and high frequencies."
+        )
+    # --- Spectrum Visualizer ===
+    with gr.Tab("📊 Frequency Spectrum"):
+        gr.Interface(
+            fn=visualize_spectrum,
+            inputs=gr.Audio(label="Upload Track", type="filepath"),
+            outputs=gr.Image(label="Spectrum Analysis"),
+            title="Real-Time Spectrum Analyzer",
+            description="See the frequency breakdown of your audio"
+        )
     # --- Prompt-Based Editing Tab ===
     with gr.Tab("🧠 Prompt-Based Editing"):
         gr.Interface(
             allow_flagging="never"
         )
+    # --- Vocal Presets for Singers ===
+    with gr.Tab("🎤 Vocal Presets for Singers"):
         gr.Interface(
+            fn=process_audio,
+            inputs=[
+                gr.Audio(label="Upload Vocal Track", type="filepath"),
+                gr.CheckboxGroup(choices=[
+                    "Noise Reduction",
+                    "Normalize",
+                    "Compress Dynamic Range",
+                    "Bass Boost",
+                    "Treble Boost",
+                    "Reverb",
+                    "Auto Gain",
+                    "Vocal Distortion",
+                    "Harmony",
+                    "Stage Mode"
+                ]),
+                gr.Checkbox(label="Isolate Vocals After Effects"),
+                gr.Dropdown(choices=preset_names, label="Select Vocal Preset", value=preset_names[0]),
+                gr.Dropdown(choices=["MP3", "WAV"], label="Export Format", value="MP3")
+            ],
+            outputs=[
+                gr.Audio(label="Processed Vocal", type="filepath"),
+                gr.Image(label="Waveform Preview"),
+                gr.Textbox(label="Session Log (JSON)", lines=5),
+                gr.Textbox(label="Detected Genre", lines=1),
+                gr.Textbox(label="Status", value="✅ Ready", lines=1)
+            ],
+            title="Create Studio-Quality Vocal Tracks",
+            description="Apply singer-friendly presets and effects to enhance vocals.",
             allow_flagging="never"
         )
             description="Clone voice from source to target speaker using AI"
         )
+    # --- Speaker Diarization ("Who Spoke When?") ===
     if diarize_pipeline:
         with gr.Tab("🧏‍♂️ Who Spoke When?"):
             gr.Interface(
             description="Detect and trim silence at start/end or between words"
         )
+    # --- Save/Load Project File (.aiproj) ===
+    with gr.Tab("📁 Save/Load Project"):
+        gr.Interface(
+            fn=save_project,
+            inputs=[
+                gr.File(label="Original Audio"),
+                gr.Dropdown(choices=preset_names, label="Used Preset", value=preset_names[0]),
+                gr.CheckboxGroup(choices=effect_options, label="Applied Effects")
+            ],
+            outputs=gr.File(label="Project File (.aiproj)"),
+            title="Save Everything Together",
+            description="Save your session, effects, and settings in one file to reuse later."
+        )
+        gr.Interface(
+            fn=load_project,
+            inputs=gr.File(label="Upload .aiproj File"),
+            outputs=[
+                gr.Dropdown(choices=preset_names, label="Loaded Preset"),
+                gr.CheckboxGroup(choices=effect_options, label="Loaded Effects")
+            ],
+            title="Resume Last Project",
+            description="Load your saved session"
+        )
+    # --- Cloud Project Sync (Premium Feature) ===
+    with gr.Tab("☁️ Cloud Project Sync"):
+        gr.Markdown("Save your projects online and resume them from any device.")
+        project_id = gr.Textbox(label="Project ID (optional)")
+        project_name = gr.Textbox(label="Project Name")
+        project_data = gr.State()
+        def cloud_save_project(audio, preset, effects, name, project_id=""):
+            # Simulated cloud saving
+            project_data = {
+                "audio": AudioSegment.from_file(audio).raw_data,
+                "preset": preset,
+                "effects": effects
+            }
+            project_path = os.path.join(tempfile.gettempdir(), f"{name}.aiproj")
+            with open(project_path, "wb") as f:
+                pickle.dump(project_data, f)
+            return project_path, f"✅ Saved as '{name}'"
+        def cloud_load_project(project_id):
+            # Simulated cloud loading
+            if not project_id:
+                return None, None, None
+            return "Sample Loaded", ["Noise Reduction", "Normalize"], ["Default"]
+        gr.Interface(
+            fn=cloud_save_project,
+            inputs=[
+                gr.File(label="Upload Audio", type="filepath"),
+                gr.Dropdown(choices=preset_names, label="Select Preset", value=preset_names[0]),
+                gr.CheckboxGroup(choices=effect_options, label="Effects"),
+                gr.Textbox(label="Project Name"),
+                gr.Textbox(label="Project ID (Optional)")
+            ],
+            outputs=[
+                gr.File(label="Downloadable Project File"),
+                gr.Textbox(label="Status", value="✅ Ready", lines=1)
+            ],
+            title="Save to Cloud",
+            description="Save your project online and share it across devices."
+        )
+        gr.Interface(
+            fn=cloud_load_project,
+            inputs=gr.Textbox(label="Enter Project ID"),
+            outputs=[
+                gr.Audio(label="Loaded Audio", type="filepath"),
+                gr.Dropdown(choices=preset_names, label="Loaded Preset"),
+                gr.CheckboxGroup(choices=effect_options, label="Loaded Effects")
+            ],
+            title="Load from Cloud",
+            description="Resume a project from the cloud",
+            allow_flagging="never"
+        )
+    # --- AI Suggest Presets Based on Genre ===
+    with gr.Tab("🧠 AI Suggest Preset"):
+        gr.Interface(
+            fn=suggest_preset_by_genre,
+            inputs=gr.Audio(label="Upload Track", type="filepath"),
+            outputs=gr.Dropdown(choices=preset_names, label="Recommended Preset"),
+            title="AI Recommends Best Preset",
+            description="Upload a track and let AI recommend the best preset based on genre."
+        )
+    # --- Create Karaoke Video from Audio + Lyrics ===
+    with gr.Tab("📹 Create Karaoke Video"):
+        gr.Interface(
+            fn=create_karaoke_video,
+            inputs=[
+                gr.Audio(label="Upload Track", type="filepath"),
+                gr.Textbox(label="Lyrics", lines=10),
+                gr.File(label="Background (Optional)")
+            ],
+            outputs=gr.Video(label="Karaoke Video"),
+            title="Make Karaoke Videos from Audio + Lyrics",
+            description="Generate karaoke-style videos with real-time sync."
+        )
 demo.launch()