Hendrik Schroeter commited on
Commit
1e7ab6c
1 Parent(s): d619a71

Add spectrogram

Browse files
Files changed (2) hide show
  1. app.py +153 -20
  2. requirements.txt +1 -0
app.py CHANGED
@@ -1,8 +1,11 @@
1
  import math
 
2
 
3
  import gradio
4
  import gradio.inputs
5
  import gradio.outputs
 
 
6
  import torch
7
  from df import config
8
  from df.enhance import enhance, init_df, load_audio, save_audio
@@ -42,57 +45,187 @@ def mix_at_snr(clean, noise, snr, eps=1e-10):
42
  return clean, noise, mixture
43
 
44
 
45
- def as_gradio_audio(x):
46
- sr = config("sr", 48000, int, section="df")
47
- return sr, (x / 0x7FFF).to(torch.int16).cpu().numpy()
48
-
49
-
50
  def mix_and_denoise(speech, speech_alt, noise, snr):
 
51
  if noise is None:
52
  noise = "samples/dkitchen.wav"
53
- if speech is None:
54
- if speech_alt is None:
55
- speech = "samples/p232_013_clean.wav"
56
- speech = speech_alt
57
  print(speech, noise, snr)
58
  sr = config("sr", 48000, int, section="df")
59
  speech, _ = load_audio(speech, sr)
60
  noise, _ = load_audio(noise, sr)
61
  speech, noise, noisy = mix_at_snr(speech, noise, snr)
62
  enhanced = enhance(model, df, noisy)
63
- save_audio("clean.wav", speech, sr)
64
- save_audio("noisy.wav", noisy, sr)
65
- save_audio("enhanced.wav", enhanced, sr)
66
- return "clean.wav", "noisy.wav", "enhanced.wav"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
 
68
 
69
  inputs = [
70
  gradio.inputs.Audio(
71
- source="microphone", type="filepath", optional=True, label="Record your own voice"
 
 
 
 
 
 
 
 
 
72
  ),
73
  gradio.inputs.Audio(
74
- source="upload", type="filepath", optional=True, label="Alternative: Upload speech sample"
75
  ),
76
- gradio.inputs.Audio(source="upload", type="filepath", optional=True, label="Upload noise sample"),
77
  gradio.inputs.Slider(minimum=-20, maximum=40, step=5, default=10),
78
  ]
79
  examples = [
80
- ["samples/p232_013_clean.wav", "samples/dkitchen.wav", 10],
81
- ["samples/p232_019_clean.wav", "samples/dliving.wav", 10],
 
 
 
 
 
 
 
 
 
 
82
  ]
83
  outputs = [
84
- gradio.outputs.Audio(label="Clean"),
85
  gradio.outputs.Audio(label="Noisy"),
 
86
  gradio.outputs.Audio(label="Enhanced"),
 
87
  ]
88
  description = (
89
  "This demo denoises audio files using DeepFilterNet. Try it with your own voice!"
90
  )
91
  iface = gradio.Interface(
92
  fn=mix_and_denoise,
 
93
  inputs=inputs,
94
  outputs=outputs,
95
  examples=examples,
96
  description=description,
 
 
97
  )
98
- iface.launch()
 
1
  import math
2
+ import tempfile
3
 
4
  import gradio
5
  import gradio.inputs
6
  import gradio.outputs
7
+ import matplotlib.pyplot as plt
8
+ import numpy as np
9
  import torch
10
  from df import config
11
  from df.enhance import enhance, init_df, load_audio, save_audio
 
45
  return clean, noise, mixture
46
 
47
 
 
 
 
 
 
48
  def mix_and_denoise(speech, speech_alt, noise, snr):
49
+ print(speech, noise, snr)
50
  if noise is None:
51
  noise = "samples/dkitchen.wav"
52
+ if speech is None or speech == "":
53
+ speech = "samples/p232_013_clean.wav"
54
+ if speech_alt is not None:
55
+ speech = speech_alt
56
  print(speech, noise, snr)
57
  sr = config("sr", 48000, int, section="df")
58
  speech, _ = load_audio(speech, sr)
59
  noise, _ = load_audio(noise, sr)
60
  speech, noise, noisy = mix_at_snr(speech, noise, snr)
61
  enhanced = enhance(model, df, noisy)
62
+ lim = torch.linspace(0.0, 1.0, int(sr * 0.15)).unsqueeze(0)
63
+ lim = torch.cat((lim, torch.ones(1, enhanced.shape[1] - lim.shape[1])), dim=1)
64
+ print("lim", lim.shape, enhanced.shape)
65
+ enhanced = enhanced * lim
66
+ noisy_fn = tempfile.NamedTemporaryFile(suffix="noisy.wav", delete=False).name
67
+ save_audio(noisy_fn, noisy, sr)
68
+ enhanced_fn = tempfile.NamedTemporaryFile(suffix="enhanced.wav", delete=False).name
69
+ save_audio(enhanced_fn, enhanced, sr)
70
+ return (
71
+ "noisy.wav",
72
+ spec_figure(noisy, sr=sr),
73
+ "enhanced.wav",
74
+ spec_figure(enhanced, sr=sr),
75
+ )
76
+
77
+
78
+ def specshow(
79
+ spec,
80
+ ax=None,
81
+ title=None,
82
+ xlabel=None,
83
+ ylabel=None,
84
+ sr=48000,
85
+ n_fft=None,
86
+ hop=None,
87
+ t=None,
88
+ f=None,
89
+ vmin=-100,
90
+ vmax=0,
91
+ xlim=None,
92
+ ylim=None,
93
+ cmap="viridis",
94
+ ):
95
+ """Plots a spectrogram of shape [F, T]"""
96
+ spec_np = spec.cpu().numpy() if isinstance(spec, torch.Tensor) else spec
97
+ if ax is not None:
98
+ set_title = ax.set_title
99
+ set_xlabel = ax.set_xlabel
100
+ set_ylabel = ax.set_ylabel
101
+ set_xlim = ax.set_xlim
102
+ set_ylim = ax.set_ylim
103
+ else:
104
+ ax = plt
105
+ set_title = plt.title
106
+ set_xlabel = plt.xlabel
107
+ set_ylabel = plt.ylabel
108
+ set_xlim = plt.xlim
109
+ set_ylim = plt.ylim
110
+ if n_fft is None:
111
+ if spec.shape[0] % 2 == 0:
112
+ n_fft = spec.shape[0] * 2
113
+ else:
114
+ n_fft = (spec.shape[0] - 1) * 2
115
+ hop = hop or n_fft // 4
116
+ if t is None:
117
+ t = np.arange(0, spec_np.shape[-1]) * hop / sr
118
+ if f is None:
119
+ f = np.arange(0, spec_np.shape[0]) * sr // 2 / (n_fft // 2) / 1000
120
+ im = ax.pcolormesh(
121
+ t, f, spec_np, rasterized=True, shading="auto", vmin=vmin, vmax=vmax, cmap=cmap
122
+ )
123
+ if title is not None:
124
+ set_title(title)
125
+ if xlabel is not None:
126
+ set_xlabel(xlabel)
127
+ if ylabel is not None:
128
+ set_ylabel(ylabel)
129
+ if xlim is not None:
130
+ set_xlim(xlim)
131
+ if ylim is not None:
132
+ set_ylim(ylim)
133
+ return im
134
+
135
+
136
+ def spec_figure(
137
+ audio: torch.Tensor,
138
+ figsize=(15, 5),
139
+ colorbar=False,
140
+ colorbar_format=None,
141
+ figure=None,
142
+ return_im=False,
143
+ labels=True,
144
+ **kwargs,
145
+ ) -> plt.Figure:
146
+ audio = torch.as_tensor(audio)
147
+ if labels:
148
+ kwargs.setdefault("xlabel", "Time [s]")
149
+ kwargs.setdefault("ylabel", "Frequency [Hz]")
150
+ n_fft = kwargs.setdefault("n_fft", 1024)
151
+ hop = kwargs.setdefault("hop", 512)
152
+ w = torch.hann_window(n_fft, device=audio.device)
153
+ spec = torch.stft(audio, n_fft, hop, window=w, return_complex=False)
154
+ spec = spec.div_(w.pow(2).sum())
155
+ spec = torch.view_as_complex(spec).abs().clamp_min(1e-12).log10().mul(10)
156
+ kwargs.setdefault("vmax", max(0.0, spec.max().item()))
157
+
158
+ if figure is None:
159
+ figure = plt.figure(figsize=figsize)
160
+ figure.set_tight_layout(True)
161
+ if spec.dim() > 2:
162
+ spec = spec.squeeze(0)
163
+ im = specshow(spec, **kwargs)
164
+ if colorbar:
165
+ ckwargs = {}
166
+ if "ax" in kwargs:
167
+ if colorbar_format is None:
168
+ if (
169
+ kwargs.get("vmin", None) is not None
170
+ or kwargs.get("vmax", None) is not None
171
+ ):
172
+ colorbar_format = "%+2.0f dB"
173
+ ckwargs = {"ax": kwargs["ax"]}
174
+ plt.colorbar(im, format=colorbar_format, **ckwargs)
175
+ if return_im:
176
+ return im
177
+ return figure
178
 
179
 
180
  inputs = [
181
  gradio.inputs.Audio(
182
+ source="microphone",
183
+ type="filepath",
184
+ optional=True,
185
+ label="Record your own voice",
186
+ ),
187
+ gradio.inputs.Audio(
188
+ source="upload",
189
+ type="filepath",
190
+ optional=True,
191
+ label="Alternative: Upload speech sample",
192
  ),
193
  gradio.inputs.Audio(
194
+ source="upload", type="filepath", optional=True, label="Upload noise sample"
195
  ),
 
196
  gradio.inputs.Slider(minimum=-20, maximum=40, step=5, default=10),
197
  ]
198
  examples = [
199
+ [
200
+ "samples/p232_013_clean.wav",
201
+ "samples/p232_013_clean.wav",
202
+ "samples/dkitchen.wav",
203
+ 10,
204
+ ],
205
+ [
206
+ "samples/p232_013_clean.wav",
207
+ "samples/p232_019_clean.wav",
208
+ "samples/dliving.wav",
209
+ 10,
210
+ ],
211
  ]
212
  outputs = [
 
213
  gradio.outputs.Audio(label="Noisy"),
214
+ gradio.outputs.Image(type="plot"),
215
  gradio.outputs.Audio(label="Enhanced"),
216
+ gradio.outputs.Image(type="plot"),
217
  ]
218
  description = (
219
  "This demo denoises audio files using DeepFilterNet. Try it with your own voice!"
220
  )
221
  iface = gradio.Interface(
222
  fn=mix_and_denoise,
223
+ title="DeepFilterNet Demo",
224
  inputs=inputs,
225
  outputs=outputs,
226
  examples=examples,
227
  description=description,
228
+ layout="horizontal",
229
+ allow_flagging="never",
230
  )
231
+ iface.launch(cache_examples=False)
requirements.txt CHANGED
@@ -1,4 +1,5 @@
1
  torch
2
  torchaudio
3
  deepfilternet
 
4
  gradio
 
1
  torch
2
  torchaudio
3
  deepfilternet
4
+ matplotlib
5
  gradio