alibabasglab commited on
Commit
6e4d760
·
verified ·
1 Parent(s): 4b383e5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -7
app.py CHANGED
@@ -5,6 +5,23 @@ import spaces
5
  from clearvoice import ClearVoice
6
  import os
7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  @spaces.GPU
9
  def fn_clearvoice_se(input_wav, sr):
10
  if sr == "16000 Hz":
@@ -65,19 +82,17 @@ def fn_clearvoice_tse(input_video):
65
 
66
  demo = gr.Blocks()
67
 
68
- se_demo = gr.Interface(
69
- fn=fn_clearvoice_se,
70
  inputs = [
71
  gr.Audio(label="Input Audio", type="filepath"),
72
- gr.Dropdown(
73
- ["16000 Hz", "48000 Hz"], value="16000 Hz", multiselect=False, info="Choose a sampling rate for your output."
74
- ),
75
  ],
76
  outputs = [
77
  gr.Audio(label="Output Audio", type="filepath"),
78
  ],
79
  title = "<a href='https://github.com/modelscope/ClearerVoice-Studio/tree/main/clearvoice' target='_blank'>ClearVoice<a/>: Speech Enhancement",
80
- description = ("ClearerVoice-Studio ([Github Repo](https://github.com/modelscope/ClearerVoice-Studio/tree/main/clearvoice)) is AI-powered and extracts clear speech from background noise for enhanced speech quality. It supports both 16 kHz and 48 kHz audio outputs. "
81
  "To try it, simply upload your audio, or click one of the examples. "),
82
  article = ("<p style='text-align: center'><a href='https://arxiv.org/abs/2206.07293' target='_blank'>FRCRN: Boosting Feature Representation Using Frequency Recurrence for Monaural Speech Enhancement</a> </p>"
83
  "<p style='text-align: center'><a href='https://arxiv.org/abs/2312.11825' target='_blank'>MossFormer2: Combining Transformer and RNN-Free Recurrent Network for Enhanced Time-Domain Monaural Speech Separation</a> </p>"),
@@ -130,6 +145,6 @@ tse_demo = gr.Interface(
130
  )
131
 
132
  with demo:
133
- gr.TabbedInterface([se_demo, ss_demo, tse_demo], ["Task 1: Speech Enhancement", "Task 2: Speech Separation", "Task 3: Audio-Visual Speaker Extraction"])
134
 
135
  demo.launch()
 
5
  from clearvoice import ClearVoice
6
  import os
7
 
8
+ @spaces.GPU
9
+ def fn_clearvoice_sr(input_wav, sr):
10
+ if sr == "16000 Hz":
11
+ myClearVoice = ClearVoice(task='speech_enhancement', model_names=['FRCRN_SE_16K'])
12
+ fs = 16000
13
+ else:
14
+ myClearVoice = ClearVoice(task='speech_enhancement', model_names=['MossFormer2_SE_48K'])
15
+ fs = 48000
16
+ output_wav_dict = myClearVoice(input_path=input_wav, online_write=False)
17
+ if isinstance(output_wav_dict, dict):
18
+ key = next(iter(output_wav_dict))
19
+ output_wav = output_wav_dict[key]
20
+ else:
21
+ output_wav = output_wav_dict
22
+ sf.write('enhanced.wav', output_wav, fs)
23
+ return 'enhanced.wav'
24
+
25
  @spaces.GPU
26
  def fn_clearvoice_se(input_wav, sr):
27
  if sr == "16000 Hz":
 
82
 
83
  demo = gr.Blocks()
84
 
85
+ sr_demo = gr.Interface(
86
+ fn=fn_clearvoice_sr,
87
  inputs = [
88
  gr.Audio(label="Input Audio", type="filepath"),
89
+ gr.Checkbox(["Apply Enhancement"], label="Apply_SE"),
 
 
90
  ],
91
  outputs = [
92
  gr.Audio(label="Output Audio", type="filepath"),
93
  ],
94
  title = "<a href='https://github.com/modelscope/ClearerVoice-Studio/tree/main/clearvoice' target='_blank'>ClearVoice<a/>: Speech Enhancement",
95
+ description = ("ClearVoice ([Github Repo](https://github.com/modelscope/ClearerVoice-Studio/tree/main/clearvoice)) is AI-powered and transform low-resolution audio (effective sampling rate 16 kHz) into crystal-clear, high-resolution audio at 48 kHz. It supports most of audio types. "
96
  "To try it, simply upload your audio, or click one of the examples. "),
97
  article = ("<p style='text-align: center'><a href='https://arxiv.org/abs/2206.07293' target='_blank'>FRCRN: Boosting Feature Representation Using Frequency Recurrence for Monaural Speech Enhancement</a> </p>"
98
  "<p style='text-align: center'><a href='https://arxiv.org/abs/2312.11825' target='_blank'>MossFormer2: Combining Transformer and RNN-Free Recurrent Network for Enhanced Time-Domain Monaural Speech Separation</a> </p>"),
 
145
  )
146
 
147
  with demo:
148
+ gr.TabbedInterface([se_demo], ["Task 4: Speech Super Resolution"])
149
 
150
  demo.launch()