Spaces:
Runtime error
Runtime error
import math | |
import gradio | |
import gradio.inputs | |
import gradio.outputs | |
import torch | |
from df import config | |
from df.enhance import enhance, init_df, load_audio, save_audio | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
model, df, _ = init_df() | |
model = model.to(device=device).eval() | |
def mix_at_snr(clean, noise, snr, eps=1e-10): | |
"""Mix clean and noise signal at a given SNR. | |
Args: | |
clean: 1D Tensor with the clean signal to mix. | |
noise: 1D Tensor of shape. | |
snr: Signal to noise ratio. | |
Returns: | |
clean: 1D Tensor with gain changed according to the snr. | |
noise: 1D Tensor with the combined noise channels. | |
mix: 1D Tensor with added clean and noise signals. | |
""" | |
clean = torch.as_tensor(clean).mean(0, keepdim=True) | |
noise = torch.as_tensor(noise).mean(0, keepdim=True) | |
if noise.shape[1] < clean.shape[1]: | |
noise = noise.repeat((1, int(math.ceil(clean.shape[1] / noise.shape[1])))) | |
max_start = int(noise.shape[1] - clean.shape[1]) | |
start = torch.randint(0, max_start, ()).item() | |
noise = noise[:, start : start + clean.shape[1]] | |
E_speech = torch.mean(clean.pow(2)) + eps | |
E_noise = torch.mean(noise.pow(2)) | |
K = torch.sqrt((E_noise / E_speech) * 10 ** (snr / 10) + eps) | |
noise = noise / K | |
mixture = clean + noise | |
assert torch.isfinite(mixture).all() | |
return clean, noise, mixture | |
def as_gradio_audio(x): | |
sr = config("sr", 48000, int, section="df") | |
return sr, (x / 0x7FFF).to(torch.int16).cpu().numpy() | |
def mix_and_denoise(speech, speech_alt, noise, snr): | |
if noise is None: | |
noise = "samples/dkitchen.wav" | |
if speech is None: | |
if speech_alt is None: | |
speech = "samples/p232_013_clean.wav" | |
speech = speech_alt | |
print(speech, noise, snr) | |
sr = config("sr", 48000, int, section="df") | |
speech, _ = load_audio(speech, sr) | |
noise, _ = load_audio(noise, sr) | |
speech, noise, noisy = mix_at_snr(speech, noise, snr) | |
enhanced = enhance(model, df, noisy) | |
save_audio("clean.wav", speech, sr) | |
save_audio("noisy.wav", noisy, sr) | |
save_audio("enhanced.wav", enhanced, sr) | |
return "clean.wav", "noisy.wav", "enhanced.wav" | |
inputs = [ | |
gradio.inputs.Audio( | |
source="microphone", type="filepath", optional=True, label="Record your own voice" | |
), | |
gradio.inputs.Audio( | |
source="upload", type="filepath", optional=True, label="Alternative: Upload speech sample" | |
), | |
gradio.inputs.Audio(source="upload", type="filepath", optional=True, label="Upload noise sample"), | |
gradio.inputs.Slider(minimum=-20, maximum=40, step=5, default=10), | |
] | |
examples = [ | |
["samples/p232_013_clean.wav", "samples/dkitchen.wav", 10], | |
["samples/p232_019_clean.wav", "samples/dliving.wav", 10], | |
] | |
outputs = [ | |
gradio.outputs.Audio(label="Clean"), | |
gradio.outputs.Audio(label="Noisy"), | |
gradio.outputs.Audio(label="Enhanced"), | |
] | |
description = ( | |
"This demo denoises audio files using DeepFilterNet. Try it with your own voice!" | |
) | |
iface = gradio.Interface( | |
fn=mix_and_denoise, | |
inputs=inputs, | |
outputs=outputs, | |
examples=examples, | |
description=description, | |
) | |
iface.launch() | |