Spaces:
Runtime error
Runtime error
Hendrik Schroeter
commited on
Commit
•
0f79c5b
1
Parent(s):
d446ca4
Initial working space
Browse files- app.py +26 -9
- samples/noise_freesound_573577.wav → clean.wav +2 -2
- enhanced.wav +3 -0
- noisy.wav +3 -0
- samples/p232_013_clean.wav +3 -0
app.py
CHANGED
@@ -1,10 +1,16 @@
|
|
|
|
|
|
|
|
1 |
import gradio
|
2 |
import gradio.inputs
|
3 |
import gradio.outputs
|
4 |
import torch
|
5 |
-
from df
|
|
|
6 |
|
7 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
|
|
|
8 |
|
9 |
|
10 |
def mix_at_snr(clean, noise, snr, eps=1e-10):
|
@@ -21,22 +27,34 @@ def mix_at_snr(clean, noise, snr, eps=1e-10):
|
|
21 |
mix: 1D Tensor with added clean and noise signals.
|
22 |
|
23 |
"""
|
24 |
-
clean = torch.as_tensor(clean)
|
25 |
-
noise = torch.as_tensor(noise)
|
|
|
|
|
|
|
26 |
E_speech = torch.mean(clean.pow(2)) + eps
|
27 |
E_noise = torch.mean(noise.pow(2))
|
28 |
K = torch.sqrt((E_noise / E_speech) * 10 ** (snr / 10) + eps)
|
29 |
noise = noise / K
|
30 |
mixture = clean + noise
|
31 |
-
assert torch.isfinite(mixture)
|
32 |
return clean, noise, mixture
|
33 |
|
|
|
|
|
|
|
34 |
|
35 |
def mix_and_denoise(speech, noise, snr):
|
36 |
-
|
|
|
|
|
|
|
37 |
speech, noise, noisy = mix_at_snr(speech, noise, snr)
|
38 |
-
enhanced = enhance(model
|
39 |
-
|
|
|
|
|
|
|
40 |
|
41 |
|
42 |
inputs = [
|
@@ -49,8 +67,7 @@ inputs = [
|
|
49 |
gradio.inputs.Slider(minimum=-10, maximum=40, step=5, default=10),
|
50 |
]
|
51 |
examples = [
|
52 |
-
[],
|
53 |
-
["samples/noise_freesound_2530.wav", "samples/noise_freesound_573577.wav"],
|
54 |
]
|
55 |
outputs = [
|
56 |
gradio.outputs.Audio(label="Clean"),
|
|
|
1 |
+
import math
|
2 |
+
|
3 |
+
import numpy as np
|
4 |
import gradio
|
5 |
import gradio.inputs
|
6 |
import gradio.outputs
|
7 |
import torch
|
8 |
+
from df import config
|
9 |
+
from df.enhance import enhance, init_df, load_audio, save_audio
|
10 |
|
11 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
12 |
+
model, df, _ = init_df()
|
13 |
+
model = model.to(device=device).eval()
|
14 |
|
15 |
|
16 |
def mix_at_snr(clean, noise, snr, eps=1e-10):
|
|
|
27 |
mix: 1D Tensor with added clean and noise signals.
|
28 |
|
29 |
"""
|
30 |
+
clean = torch.as_tensor(clean).mean(0, keepdim=True)
|
31 |
+
noise = torch.as_tensor(noise).mean(0, keepdim=True)
|
32 |
+
if noise.shape[1] < clean.shape[1]:
|
33 |
+
noise = noise.repeat((1, int(math.ceil(clean.shape[1] / noise.shape[1]))))
|
34 |
+
noise = noise[:, : clean.shape[1]]
|
35 |
E_speech = torch.mean(clean.pow(2)) + eps
|
36 |
E_noise = torch.mean(noise.pow(2))
|
37 |
K = torch.sqrt((E_noise / E_speech) * 10 ** (snr / 10) + eps)
|
38 |
noise = noise / K
|
39 |
mixture = clean + noise
|
40 |
+
assert torch.isfinite(mixture).all()
|
41 |
return clean, noise, mixture
|
42 |
|
43 |
+
def as_gradio_audio(x):
|
44 |
+
sr = config.get("sr", "df", int)
|
45 |
+
return sr, (x/0x7fff).to(torch.int16).cpu().numpy()
|
46 |
|
47 |
def mix_and_denoise(speech, noise, snr):
|
48 |
+
print(speech, noise, snr)
|
49 |
+
sr = config.get("sr", "df", int)
|
50 |
+
speech, _ = load_audio(speech, sr)
|
51 |
+
noise, _ = load_audio(noise, sr)
|
52 |
speech, noise, noisy = mix_at_snr(speech, noise, snr)
|
53 |
+
enhanced = enhance(model, df, noisy)
|
54 |
+
save_audio("clean.wav", speech, sr)
|
55 |
+
save_audio("noisy.wav", noisy, sr)
|
56 |
+
save_audio("enhanced.wav", enhanced, sr)
|
57 |
+
return "clean.wav", "noisy.wav", "enhanced.wav"
|
58 |
|
59 |
|
60 |
inputs = [
|
|
|
67 |
gradio.inputs.Slider(minimum=-10, maximum=40, step=5, default=10),
|
68 |
]
|
69 |
examples = [
|
70 |
+
["samples/p232_013_clean.wav", "samples/noise_freesound_2530.wav", 10],
|
|
|
71 |
]
|
72 |
outputs = [
|
73 |
gradio.outputs.Audio(label="Clean"),
|
samples/noise_freesound_573577.wav → clean.wav
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d7a51b4fdfb02657cf9410dbd34b4ea165acbec48581a8a074e1d45fdd3b3334
|
3 |
+
size 378612
|
enhanced.wav
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:97ff9dc5db07e3a2410f0dd416d9bccdcdc9bd173ed46f415e405208a4105d04
|
3 |
+
size 378284
|
noisy.wav
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e3b658209be05042ce017aa2b3db444e56e84c3cc6f58535599ff8887c9ee5f7
|
3 |
+
size 378612
|
samples/p232_013_clean.wav
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d7a51b4fdfb02657cf9410dbd34b4ea165acbec48581a8a074e1d45fdd3b3334
|
3 |
+
size 378612
|