DeepFilterNet / app.py
Hendrik Schroeter
add random noise start
df38a50 unverified
raw
history blame
2.9 kB
import math
import gradio
import gradio.inputs
import gradio.outputs
import torch
from df import config
from df.enhance import enhance, init_df, load_audio, save_audio
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model, df, _ = init_df()
model = model.to(device=device).eval()
def mix_at_snr(clean, noise, snr, eps=1e-10):
"""Mix clean and noise signal at a given SNR.
Args:
clean: 1D Tensor with the clean signal to mix.
noise: 1D Tensor of shape.
snr: Signal to noise ratio.
Returns:
clean: 1D Tensor with gain changed according to the snr.
noise: 1D Tensor with the combined noise channels.
mix: 1D Tensor with added clean and noise signals.
"""
clean = torch.as_tensor(clean).mean(0, keepdim=True)
noise = torch.as_tensor(noise).mean(0, keepdim=True)
if noise.shape[1] < clean.shape[1]:
noise = noise.repeat((1, int(math.ceil(clean.shape[1] / noise.shape[1]))))
max_start = int(noise.shape[1] - clean.shape[1])
start = torch.randint(0, max_start)
noise = noise[:, start : start + clean.shape[1]]
E_speech = torch.mean(clean.pow(2)) + eps
E_noise = torch.mean(noise.pow(2))
K = torch.sqrt((E_noise / E_speech) * 10 ** (snr / 10) + eps)
noise = noise / K
mixture = clean + noise
assert torch.isfinite(mixture).all()
return clean, noise, mixture
def as_gradio_audio(x):
sr = config("sr", 48000, int, section="df")
return sr, (x / 0x7FFF).to(torch.int16).cpu().numpy()
def mix_and_denoise(speech, noise, snr):
if noise is None:
noise = "samples/dkitchen.wav"
print(speech, noise, snr)
sr = config("sr", 48000, int, section="df")
speech, _ = load_audio(speech, sr)
noise, _ = load_audio(noise, sr)
speech, noise, noisy = mix_at_snr(speech, noise, snr)
enhanced = enhance(model, df, noisy)
save_audio("clean.wav", speech, sr)
save_audio("noisy.wav", noisy, sr)
save_audio("enhanced.wav", enhanced, sr)
return "clean.wav", "noisy.wav", "enhanced.wav"
inputs = [
gradio.inputs.Audio(
source="microphone,upload", type="filepath", optional=True, label="Speech"
),
gradio.inputs.Audio(source="upload", type="filepath", optional=True, label="Noise"),
gradio.inputs.Slider(minimum=-10, maximum=40, step=5, default=10),
]
examples = [
["samples/p232_013_clean.wav", "samples/dkitchen.wav", 10],
["samples/p232_019_clean.wav", "samples/dliving.wav", 10],
]
outputs = [
gradio.outputs.Audio(label="Clean"),
gradio.outputs.Audio(label="Noisy"),
gradio.outputs.Audio(label="Enhanced"),
]
description = (
"This demo denoises audio files using DeepFilterNet. Try it with your own voice!"
)
iface = gradio.Interface(
fn=mix_and_denoise,
inputs=inputs,
outputs=outputs,
examples=examples,
description=description,
)
iface.launch()