Spaces:
Running
on
Zero
Running
on
Zero
roychao19477
commited on
Commit
Β·
179e88e
1
Parent(s):
7279258
Upload
Browse files
app.py
CHANGED
@@ -59,59 +59,54 @@ model.eval()
|
|
59 |
|
60 |
@spaces.GPU
|
61 |
def enhance(path):
|
62 |
-
|
63 |
-
|
64 |
-
wav =
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
|
|
|
|
|
|
|
|
|
|
71 |
sf.write("enhanced.wav", out, orig_sr)
|
72 |
-
|
73 |
D = librosa.stft(out, n_fft=1024, hop_length=512)
|
74 |
S = librosa.amplitude_to_db(np.abs(D), ref=np.max)
|
75 |
fig,ax=plt.subplots(figsize=(6,3))
|
76 |
-
librosa.display.specshow(S,sr=orig_sr,hop_length=512,x_axis="time",y_axis="hz",ax=ax)
|
77 |
ax.set_title("Enhanced Spectrogram"); plt.colorbar(format="%+2.0fβdB",ax=ax)
|
78 |
return "enhanced.wav", fig
|
79 |
|
80 |
-
# β custom CSS to remove all borders and style panels β
|
81 |
CSS = """
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
}
|
86 |
-
#input-audio label, #output-audio label, #spec-plot label {font-weight:600; color:#333;}
|
87 |
-
#run-btn {width:100%; margin-top:1rem;}
|
88 |
"""
|
89 |
|
90 |
-
with gr.Blocks(css=CSS, theme=
|
91 |
-
gr.HTML("<
|
92 |
-
gr.HTML("<p
|
|
|
93 |
|
94 |
with gr.Row():
|
95 |
-
with gr.Column():
|
96 |
-
|
97 |
sources=["upload","microphone"],
|
98 |
type="filepath",
|
99 |
-
label="
|
100 |
-
elem_id="
|
101 |
-
)
|
102 |
-
run_btn = gr.Button("Enhance", variant="primary", elem_id="run-btn")
|
103 |
-
|
104 |
-
with gr.Column():
|
105 |
-
output_audio = gr.Audio(
|
106 |
-
type="filepath",
|
107 |
-
label="Enhanced Audio",
|
108 |
-
elem_id="output-audio"
|
109 |
-
)
|
110 |
-
spec_plot = gr.Plot(
|
111 |
-
label="Enhanced Spectrogram",
|
112 |
-
elem_id="spec-plot"
|
113 |
)
|
|
|
|
|
|
|
|
|
114 |
|
115 |
-
run_btn.click(enhance, inputs=
|
116 |
|
117 |
demo.launch()
|
|
|
59 |
|
60 |
@spaces.GPU
|
61 |
def enhance(path):
|
62 |
+
# load & resample
|
63 |
+
wav, orig_sr = librosa.load(path, sr=None)
|
64 |
+
if orig_sr != SR: wav = librosa.resample(wav, orig_sr, SR)
|
65 |
+
# normalize + to-tensor
|
66 |
+
x = torch.from_numpy(wav).float().to(device)
|
67 |
+
norm = torch.sqrt(len(x)/torch.sum(x**2)); x=(x*norm).unsqueeze(0)
|
68 |
+
# STFT β model β ISTFT
|
69 |
+
amp,pha,_ = mag_phase_stft(x, **stft_cfg, compress_factor=model_cfg["compress_factor"])
|
70 |
+
amp2,pha2 = model(amp,pha)
|
71 |
+
out = mag_phase_istft(amp2,pha2, **stft_cfg, compress_factor=model_cfg["compress_factor"])
|
72 |
+
out = (out/norm).squeeze().cpu().numpy()
|
73 |
+
# back to orig_sr
|
74 |
+
if orig_sr != SR: out = librosa.resample(out, SR, orig_sr)
|
75 |
+
# save
|
76 |
sf.write("enhanced.wav", out, orig_sr)
|
77 |
+
# spectrogram
|
78 |
D = librosa.stft(out, n_fft=1024, hop_length=512)
|
79 |
S = librosa.amplitude_to_db(np.abs(D), ref=np.max)
|
80 |
fig,ax=plt.subplots(figsize=(6,3))
|
81 |
+
librosa.display.specshow(S, sr=orig_sr, hop_length=512, x_axis="time", y_axis="hz", ax=ax)
|
82 |
ax.set_title("Enhanced Spectrogram"); plt.colorbar(format="%+2.0fβdB",ax=ax)
|
83 |
return "enhanced.wav", fig
|
84 |
|
|
|
85 |
CSS = """
|
86 |
+
#title, #subtitle {text-align:center;}
|
87 |
+
.duplicate-button {margin:1em auto; display:block;}
|
88 |
+
#in-audio .gradio-component {border:2px dashed #888; border-radius:8px;}
|
89 |
+
#run {width:100%; margin-top:0.5em;}
|
|
|
|
|
90 |
"""
|
91 |
|
92 |
+
with gr.Blocks(css=CSS, theme="soft") as demo:
|
93 |
+
gr.HTML("<h1 id='title'>π§ SEMamba: Speech Enhancement</h1>")
|
94 |
+
gr.HTML("<p id='subtitle'>Upload or record your noisy clip, hit Enhance, and see the spectrogram.</p>")
|
95 |
+
gr.DuplicateButton(value="Duplicate Space for private use", elem_classes="duplicate-button")
|
96 |
|
97 |
with gr.Row():
|
98 |
+
with gr.Column(scale=1):
|
99 |
+
audio_in = gr.Audio(
|
100 |
sources=["upload","microphone"],
|
101 |
type="filepath",
|
102 |
+
label="Your Noisy Audio",
|
103 |
+
elem_id="in-audio"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
104 |
)
|
105 |
+
run_btn = gr.Button("Enhance Now π", variant="primary", elem_id="run")
|
106 |
+
with gr.Column(scale=1):
|
107 |
+
audio_out = gr.Audio(type="filepath", label="Enhanced Audio")
|
108 |
+
spec_out = gr.Plot(label="Spectrogram")
|
109 |
|
110 |
+
run_btn.click(enhance, inputs=audio_in, outputs=[audio_out, spec_out])
|
111 |
|
112 |
demo.launch()
|