DeepFilterNet / app.py
Hendrik Schroeter
Add description
a52c38e unverified
raw
history blame
2.75 kB
import math
import gradio
import gradio.inputs
import gradio.outputs
import torch
from df import config
from df.enhance import enhance, init_df, load_audio, save_audio
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model, df, _ = init_df()
model = model.to(device=device).eval()
def mix_at_snr(clean, noise, snr, eps=1e-10):
"""Mix clean and noise signal at a given SNR.
Args:
clean: 1D Tensor with the clean signal to mix.
noise: 1D Tensor of shape.
snr: Signal to noise ratio.
Returns:
clean: 1D Tensor with gain changed according to the snr.
noise: 1D Tensor with the combined noise channels.
mix: 1D Tensor with added clean and noise signals.
"""
clean = torch.as_tensor(clean).mean(0, keepdim=True)
noise = torch.as_tensor(noise).mean(0, keepdim=True)
if noise.shape[1] < clean.shape[1]:
noise = noise.repeat((1, int(math.ceil(clean.shape[1] / noise.shape[1]))))
noise = noise[:, : clean.shape[1]]
E_speech = torch.mean(clean.pow(2)) + eps
E_noise = torch.mean(noise.pow(2))
K = torch.sqrt((E_noise / E_speech) * 10 ** (snr / 10) + eps)
noise = noise / K
mixture = clean + noise
assert torch.isfinite(mixture).all()
return clean, noise, mixture
def as_gradio_audio(x):
sr = config.get("sr", "df", int)
return sr, (x / 0x7FFF).to(torch.int16).cpu().numpy()
def mix_and_denoise(speech, noise, snr):
print(speech, noise, snr)
sr = config.get("sr", "df", int)
speech, _ = load_audio(speech, sr)
noise, _ = load_audio(noise, sr)
speech, noise, noisy = mix_at_snr(speech, noise, snr)
enhanced = enhance(model, df, noisy)
save_audio("clean.wav", speech, sr)
save_audio("noisy.wav", noisy, sr)
save_audio("enhanced.wav", enhanced, sr)
return "clean.wav", "noisy.wav", "enhanced.wav"
inputs = [
gradio.inputs.Audio(
source="microphone", type="filepath", optional=True, label="Speech"
),
gradio.inputs.Audio(
source="microphone", type="filepath", optional=True, label="Noise"
),
gradio.inputs.Slider(minimum=-10, maximum=40, step=5, default=10),
]
examples = [
["samples/p232_013_clean.wav", "samples/noise_freesound_2530.wav", 10],
["samples/p232_019_clean.wav", "samples/DLIVING_combined.wav", 10],
]
outputs = [
gradio.outputs.Audio(label="Clean"),
gradio.outputs.Audio(label="Noisy"),
gradio.outputs.Audio(label="Enhanced"),
]
description = (
"This demo denoises audio files using DeepFilterNet. Try it with your own voice!"
)
iface = gradio.Interface(
fn=mix_and_denoise,
inputs=inputs,
outputs=outputs,
examples=examples,
description=description,
)
iface.launch()