Spaces:
Runtime error
Runtime error
File size: 2,758 Bytes
0f79c5b d446ca4 0f79c5b 4674b6c d446ca4 0f79c5b 4674b6c d446ca4 0f79c5b d446ca4 0f79c5b d446ca4 a52c38e 0f79c5b 7d50d5d a52c38e d446ca4 0f79c5b 7d50d5d 0f79c5b d446ca4 0f79c5b d446ca4 0f79c5b baa5010 d446ca4 a52c38e d446ca4 a52c38e d446ca4 4674b6c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 |
import math
import gradio
import gradio.inputs
import gradio.outputs
import torch
from df import config
from df.enhance import enhance, init_df, load_audio, save_audio
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model, df, _ = init_df()
model = model.to(device=device).eval()
def mix_at_snr(clean, noise, snr, eps=1e-10):
"""Mix clean and noise signal at a given SNR.
Args:
clean: 1D Tensor with the clean signal to mix.
noise: 1D Tensor of shape.
snr: Signal to noise ratio.
Returns:
clean: 1D Tensor with gain changed according to the snr.
noise: 1D Tensor with the combined noise channels.
mix: 1D Tensor with added clean and noise signals.
"""
clean = torch.as_tensor(clean).mean(0, keepdim=True)
noise = torch.as_tensor(noise).mean(0, keepdim=True)
if noise.shape[1] < clean.shape[1]:
noise = noise.repeat((1, int(math.ceil(clean.shape[1] / noise.shape[1]))))
noise = noise[:, : clean.shape[1]]
E_speech = torch.mean(clean.pow(2)) + eps
E_noise = torch.mean(noise.pow(2))
K = torch.sqrt((E_noise / E_speech) * 10 ** (snr / 10) + eps)
noise = noise / K
mixture = clean + noise
assert torch.isfinite(mixture).all()
return clean, noise, mixture
def as_gradio_audio(x):
sr = config("sr", 48000, int, section="df")
return sr, (x / 0x7FFF).to(torch.int16).cpu().numpy()
def mix_and_denoise(speech, noise, snr):
print(speech, noise, snr)
sr = config("sr", 48000, int, section="df")
speech, _ = load_audio(speech, sr)
noise, _ = load_audio(noise, sr)
speech, noise, noisy = mix_at_snr(speech, noise, snr)
enhanced = enhance(model, df, noisy)
save_audio("clean.wav", speech, sr)
save_audio("noisy.wav", noisy, sr)
save_audio("enhanced.wav", enhanced, sr)
return "clean.wav", "noisy.wav", "enhanced.wav"
inputs = [
gradio.inputs.Audio(
source="microphone", type="filepath", optional=True, label="Speech"
),
gradio.inputs.Audio(
source="microphone", type="filepath", optional=True, label="Noise"
),
gradio.inputs.Slider(minimum=-10, maximum=40, step=5, default=10),
]
examples = [
["samples/p232_013_clean.wav", "samples/noise_freesound_2530.wav", 10],
["samples/p232_019_clean.wav", "samples/dliving.wav", 10],
]
outputs = [
gradio.outputs.Audio(label="Clean"),
gradio.outputs.Audio(label="Noisy"),
gradio.outputs.Audio(label="Enhanced"),
]
description = (
"This demo denoises audio files using DeepFilterNet. Try it with your own voice!"
)
iface = gradio.Interface(
fn=mix_and_denoise,
inputs=inputs,
outputs=outputs,
examples=examples,
description=description,
)
iface.launch()
|