ClearVoice-SR

Running on Zero

App Files Files Community

alibabasglab commited on Jan 14

Commit

6e4d760

verified ·

1 Parent(s): 4b383e5

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -7

app.py CHANGED Viewed

@@ -5,6 +5,23 @@ import spaces
 from clearvoice import ClearVoice
 import os
 @spaces.GPU
 def fn_clearvoice_se(input_wav, sr):
     if sr == "16000 Hz":
@@ -65,19 +82,17 @@ def fn_clearvoice_tse(input_video):
 demo = gr.Blocks()
-se_demo = gr.Interface(
-    fn=fn_clearvoice_se,
     inputs = [
         gr.Audio(label="Input Audio", type="filepath"),
-        gr.Dropdown(
-            ["16000 Hz", "48000 Hz"], value="16000 Hz", multiselect=False, info="Choose a sampling rate for your output."
-        ),
     ],
     outputs = [
         gr.Audio(label="Output Audio", type="filepath"),
     ],
     title = "<a href='https://github.com/modelscope/ClearerVoice-Studio/tree/main/clearvoice' target='_blank'>ClearVoice<a/>: Speech Enhancement",
-    description = ("ClearerVoice-Studio ([Github Repo](https://github.com/modelscope/ClearerVoice-Studio/tree/main/clearvoice)) is AI-powered and extracts clear speech from background noise for enhanced speech quality. It supports both 16 kHz and 48 kHz audio outputs. "
                    "To try it, simply upload your audio, or click one of the examples. "),
     article = ("<p style='text-align: center'><a href='https://arxiv.org/abs/2206.07293' target='_blank'>FRCRN: Boosting Feature Representation Using Frequency Recurrence for Monaural Speech Enhancement</a> </p>"
               "<p style='text-align: center'><a href='https://arxiv.org/abs/2312.11825' target='_blank'>MossFormer2: Combining Transformer and RNN-Free Recurrent Network for Enhanced Time-Domain Monaural Speech Separation</a> </p>"),
@@ -130,6 +145,6 @@ tse_demo = gr.Interface(
 )
 with demo:
-    gr.TabbedInterface([se_demo, ss_demo, tse_demo], ["Task 1: Speech Enhancement", "Task 2: Speech Separation", "Task 3: Audio-Visual Speaker Extraction"])
 demo.launch()

 from clearvoice import ClearVoice
 import os
+@spaces.GPU
+def fn_clearvoice_sr(input_wav, sr):
+    if sr == "16000 Hz":
+        myClearVoice = ClearVoice(task='speech_enhancement', model_names=['FRCRN_SE_16K'])
+        fs = 16000
+    else:
+        myClearVoice = ClearVoice(task='speech_enhancement', model_names=['MossFormer2_SE_48K'])
+        fs = 48000
+    output_wav_dict = myClearVoice(input_path=input_wav, online_write=False)
+    if isinstance(output_wav_dict, dict):
+        key = next(iter(output_wav_dict))
+        output_wav = output_wav_dict[key]
+    else:
+        output_wav = output_wav_dict
+    sf.write('enhanced.wav', output_wav, fs)
+    return 'enhanced.wav'
 @spaces.GPU
 def fn_clearvoice_se(input_wav, sr):
     if sr == "16000 Hz":
 demo = gr.Blocks()
+sr_demo = gr.Interface(
+    fn=fn_clearvoice_sr,
     inputs = [
         gr.Audio(label="Input Audio", type="filepath"),
+        gr.Checkbox(["Apply Enhancement"], label="Apply_SE"),
     ],
     outputs = [
         gr.Audio(label="Output Audio", type="filepath"),
     ],
     title = "<a href='https://github.com/modelscope/ClearerVoice-Studio/tree/main/clearvoice' target='_blank'>ClearVoice<a/>: Speech Enhancement",
+    description = ("ClearVoice ([Github Repo](https://github.com/modelscope/ClearerVoice-Studio/tree/main/clearvoice)) is AI-powered and transform low-resolution audio (effective sampling rate ≥ 16 kHz) into crystal-clear, high-resolution audio at 48 kHz. It supports most of audio types. "
                    "To try it, simply upload your audio, or click one of the examples. "),
     article = ("<p style='text-align: center'><a href='https://arxiv.org/abs/2206.07293' target='_blank'>FRCRN: Boosting Feature Representation Using Frequency Recurrence for Monaural Speech Enhancement</a> </p>"
               "<p style='text-align: center'><a href='https://arxiv.org/abs/2312.11825' target='_blank'>MossFormer2: Combining Transformer and RNN-Free Recurrent Network for Enhanced Time-Domain Monaural Speech Separation</a> </p>"),
 )
 with demo:
+    gr.TabbedInterface([se_demo], ["Task 4: Speech Super Resolution"])
 demo.launch()