Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -5,6 +5,23 @@ import spaces
|
|
5 |
from clearvoice import ClearVoice
|
6 |
import os
|
7 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
@spaces.GPU
|
9 |
def fn_clearvoice_se(input_wav, sr):
|
10 |
if sr == "16000 Hz":
|
@@ -65,19 +82,17 @@ def fn_clearvoice_tse(input_video):
|
|
65 |
|
66 |
demo = gr.Blocks()
|
67 |
|
68 |
-
|
69 |
-
fn=
|
70 |
inputs = [
|
71 |
gr.Audio(label="Input Audio", type="filepath"),
|
72 |
-
gr.
|
73 |
-
["16000 Hz", "48000 Hz"], value="16000 Hz", multiselect=False, info="Choose a sampling rate for your output."
|
74 |
-
),
|
75 |
],
|
76 |
outputs = [
|
77 |
gr.Audio(label="Output Audio", type="filepath"),
|
78 |
],
|
79 |
title = "<a href='https://github.com/modelscope/ClearerVoice-Studio/tree/main/clearvoice' target='_blank'>ClearVoice<a/>: Speech Enhancement",
|
80 |
-
description = ("
|
81 |
"To try it, simply upload your audio, or click one of the examples. "),
|
82 |
article = ("<p style='text-align: center'><a href='https://arxiv.org/abs/2206.07293' target='_blank'>FRCRN: Boosting Feature Representation Using Frequency Recurrence for Monaural Speech Enhancement</a> </p>"
|
83 |
"<p style='text-align: center'><a href='https://arxiv.org/abs/2312.11825' target='_blank'>MossFormer2: Combining Transformer and RNN-Free Recurrent Network for Enhanced Time-Domain Monaural Speech Separation</a> </p>"),
|
@@ -130,6 +145,6 @@ tse_demo = gr.Interface(
|
|
130 |
)
|
131 |
|
132 |
with demo:
|
133 |
-
gr.TabbedInterface([se_demo
|
134 |
|
135 |
demo.launch()
|
|
|
5 |
from clearvoice import ClearVoice
|
6 |
import os
|
7 |
|
8 |
+
@spaces.GPU
|
9 |
+
def fn_clearvoice_sr(input_wav, sr):
|
10 |
+
if sr == "16000 Hz":
|
11 |
+
myClearVoice = ClearVoice(task='speech_enhancement', model_names=['FRCRN_SE_16K'])
|
12 |
+
fs = 16000
|
13 |
+
else:
|
14 |
+
myClearVoice = ClearVoice(task='speech_enhancement', model_names=['MossFormer2_SE_48K'])
|
15 |
+
fs = 48000
|
16 |
+
output_wav_dict = myClearVoice(input_path=input_wav, online_write=False)
|
17 |
+
if isinstance(output_wav_dict, dict):
|
18 |
+
key = next(iter(output_wav_dict))
|
19 |
+
output_wav = output_wav_dict[key]
|
20 |
+
else:
|
21 |
+
output_wav = output_wav_dict
|
22 |
+
sf.write('enhanced.wav', output_wav, fs)
|
23 |
+
return 'enhanced.wav'
|
24 |
+
|
25 |
@spaces.GPU
|
26 |
def fn_clearvoice_se(input_wav, sr):
|
27 |
if sr == "16000 Hz":
|
|
|
82 |
|
83 |
demo = gr.Blocks()
|
84 |
|
85 |
+
sr_demo = gr.Interface(
|
86 |
+
fn=fn_clearvoice_sr,
|
87 |
inputs = [
|
88 |
gr.Audio(label="Input Audio", type="filepath"),
|
89 |
+
gr.Checkbox(["Apply Enhancement"], label="Apply_SE"),
|
|
|
|
|
90 |
],
|
91 |
outputs = [
|
92 |
gr.Audio(label="Output Audio", type="filepath"),
|
93 |
],
|
94 |
title = "<a href='https://github.com/modelscope/ClearerVoice-Studio/tree/main/clearvoice' target='_blank'>ClearVoice<a/>: Speech Enhancement",
|
95 |
+
description = ("ClearVoice ([Github Repo](https://github.com/modelscope/ClearerVoice-Studio/tree/main/clearvoice)) is AI-powered and transform low-resolution audio (effective sampling rate ≥ 16 kHz) into crystal-clear, high-resolution audio at 48 kHz. It supports most of audio types. "
|
96 |
"To try it, simply upload your audio, or click one of the examples. "),
|
97 |
article = ("<p style='text-align: center'><a href='https://arxiv.org/abs/2206.07293' target='_blank'>FRCRN: Boosting Feature Representation Using Frequency Recurrence for Monaural Speech Enhancement</a> </p>"
|
98 |
"<p style='text-align: center'><a href='https://arxiv.org/abs/2312.11825' target='_blank'>MossFormer2: Combining Transformer and RNN-Free Recurrent Network for Enhanced Time-Domain Monaural Speech Separation</a> </p>"),
|
|
|
145 |
)
|
146 |
|
147 |
with demo:
|
148 |
+
gr.TabbedInterface([se_demo], ["Task 4: Speech Super Resolution"])
|
149 |
|
150 |
demo.launch()
|