import math import gradio import gradio.inputs import gradio.outputs import torch from df import config from df.enhance import enhance, init_df, load_audio, save_audio device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model, df, _ = init_df() model = model.to(device=device).eval() def mix_at_snr(clean, noise, snr, eps=1e-10): """Mix clean and noise signal at a given SNR. Args: clean: 1D Tensor with the clean signal to mix. noise: 1D Tensor of shape. snr: Signal to noise ratio. Returns: clean: 1D Tensor with gain changed according to the snr. noise: 1D Tensor with the combined noise channels. mix: 1D Tensor with added clean and noise signals. """ clean = torch.as_tensor(clean).mean(0, keepdim=True) noise = torch.as_tensor(noise).mean(0, keepdim=True) if noise.shape[1] < clean.shape[1]: noise = noise.repeat((1, int(math.ceil(clean.shape[1] / noise.shape[1])))) noise = noise[:, : clean.shape[1]] E_speech = torch.mean(clean.pow(2)) + eps E_noise = torch.mean(noise.pow(2)) K = torch.sqrt((E_noise / E_speech) * 10 ** (snr / 10) + eps) noise = noise / K mixture = clean + noise assert torch.isfinite(mixture).all() return clean, noise, mixture def as_gradio_audio(x): sr = config.get("sr", "df", int) return sr, (x/0x7fff).to(torch.int16).cpu().numpy() def mix_and_denoise(speech, noise, snr): print(speech, noise, snr) sr = config.get("sr", "df", int) speech, _ = load_audio(speech, sr) noise, _ = load_audio(noise, sr) speech, noise, noisy = mix_at_snr(speech, noise, snr) enhanced = enhance(model, df, noisy) save_audio("clean.wav", speech, sr) save_audio("noisy.wav", noisy, sr) save_audio("enhanced.wav", enhanced, sr) return "clean.wav", "noisy.wav", "enhanced.wav" inputs = [ gradio.inputs.Audio( source="microphone", type="filepath", optional=True, label="Speech" ), gradio.inputs.Audio( source="microphone", type="filepath", optional=True, label="Noise" ), gradio.inputs.Slider(minimum=-10, maximum=40, step=5, default=10), ] examples = [ ["samples/p232_013_clean.wav", "samples/noise_freesound_2530.wav", 10], ["samples/p232_019_clean.wav", "samples/DLIVING_combined.wav", 10], ] outputs = [ gradio.outputs.Audio(label="Clean"), gradio.outputs.Audio(label="Noisy"), gradio.outputs.Audio(label="Enhanced"), ] iface = gradio.Interface( fn=mix_and_denoise, inputs=inputs, outputs=outputs, examples=examples ) iface.launch()