Spaces:

rc19477
/

Speech_Enhancement_Mamba

Running on Zero

App Files Files Community

roychao19477 commited on 28 days ago

Commit

1fdb610

1 Parent(s): 2bbe7e3

Upload

Browse files

Files changed (1) hide show

app.py +34 -23

app.py CHANGED Viewed

@@ -20,6 +20,7 @@ import gradio as gr
 import torch
 import yaml
 import librosa
 from huggingface_hub import hf_hub_download
 from models.stfts    import mag_phase_stft, mag_phase_istft
 from models.generator import SEMamba
@@ -56,7 +57,6 @@ model.eval()
 @spaces.GPU
-# --- Inference ---
 def enhance(audio):
     if audio is None: return None, None
     orig_sr, wav_np = audio
@@ -73,31 +73,42 @@ def enhance(audio):
     if orig_sr != sr:
         out = librosa.resample(out, sr, orig_sr)
-    # draw spectrum
     D = librosa.stft(out, n_fft=1024, hop_length=512)
     S_db = librosa.amplitude_to_db(np.abs(D), ref=np.max)
-    fig, ax = plt.subplots()
     librosa.display.specshow(S_db, sr=orig_sr, hop_length=512, x_axis='time', y_axis='hz', ax=ax)
     ax.set_title("Enhanced Spectrogram")
-    plt.colorbar(format="%+2.0f dB")
     return (orig_sr, out), fig
-# --- Interface ---
-se_demo = gr.Interface(
-    fn=enhance,
-    inputs=gr.Audio(sources=["upload", "microphone"], type="numpy", label="Input Audio"),
-    outputs=[
-        gr.Audio(label="Enhanced Audio", type="numpy"),
-        gr.Plot(label="Spectrogram")
-    ],
-    title="<a href='https://github.com/RoyChao19477/SEMamba' target='_blank'>SEMamba</a>: Speech Enhancement",
-    description="SEMamba is a state-space model for real-world noisy speech enhancement. Upload or record a noisy sample to hear the result and view the spectrogram.",
-    article="<p style='text-align: center'><a href='https://arxiv.org/abs/2405.15144' target='_blank'>SEMamba: Mamba for Long-Context Speech Enhancement (SLT 2024)</a></p>",
-    examples=[
-        ["examples/noisy_sample_16k.wav"]
-    ],
-    cache_examples=True
-)
-# --- Launch ---
-se_demo.launch()

 import torch
 import yaml
 import librosa
+import librosa.display
 from huggingface_hub import hf_hub_download
 from models.stfts    import mag_phase_stft, mag_phase_istft
 from models.generator import SEMamba
 @spaces.GPU
 def enhance(audio):
     if audio is None: return None, None
     orig_sr, wav_np = audio
     if orig_sr != sr:
         out = librosa.resample(out, sr, orig_sr)
+    # spectrogram
     D = librosa.stft(out, n_fft=1024, hop_length=512)
     S_db = librosa.amplitude_to_db(np.abs(D), ref=np.max)
+    fig, ax = plt.subplots(figsize=(6, 3))
     librosa.display.specshow(S_db, sr=orig_sr, hop_length=512, x_axis='time', y_axis='hz', ax=ax)
     ax.set_title("Enhanced Spectrogram")
+    plt.colorbar(format="%+2.0f dB", ax=ax)
     return (orig_sr, out), fig
+# --- Layout with Blocks ---
+with gr.Blocks(css=".gr-box {border: none !important}") as demo:
+    gr.Markdown("<h1 style='text-align: center;'>🎧 <a href='https://github.com/RoyChao19477/SEMamba' target='_blank'>SEMamba</a>: Speech Enhancement</h1>")
+    gr.Markdown("Enhance real-world noisy speech using Mamba. Upload or record an audio clip and view the spectrogram.")
+    with gr.Row():
+        with gr.Column():
+            audio_input = gr.Audio(sources=["upload", "microphone"], type="numpy", label="Upload or Record", elem_id="input-audio")
+            run_btn = gr.Button("Enhance Now 🚀", variant="primary")
+        with gr.Column():
+            enhanced_audio = gr.Audio(label="Enhanced Output", type="numpy")
+            spec_plot = gr.Plot(label="Spectrogram")
+    run_btn.click(enhance, inputs=audio_input, outputs=[enhanced_audio, spec_plot])
+    gr.Examples(
+        examples=[
+            ["examples/noisy_sample_16k.wav"],
+        ],
+        inputs=audio_input,
+        outputs=[enhanced_audio, spec_plot],
+        fn=enhance,
+        cache_examples=True,
+        label="📂 Try These Examples"
+    )
+    gr.Markdown("<p style='text-align: center'><a href='https://arxiv.org/abs/2405.15144' target='_blank'>📄 SEMamba: Mamba for Long-Context Speech Enhancement (SLT 2024)</a></p>")
+demo.launch()