import gradio import gradio.inputs import gradio.outputs import torch from df.enhance import enhance, init_df device = torch.device("cuda" if torch.cuda.is_available() else "cpu") def mix_at_snr(clean, noise, snr, eps=1e-10): """Mix clean and noise signal at a given SNR. Args: clean: 1D Tensor with the clean signal to mix. noise: 1D Tensor of shape. snr: Signal to noise ratio. Returns: clean: 1D Tensor with gain changed according to the snr. noise: 1D Tensor with the combined noise channels. mix: 1D Tensor with added clean and noise signals. """ clean = torch.as_tensor(clean) noise = torch.as_tensor(noise) E_speech = torch.mean(clean.pow(2)) + eps E_noise = torch.mean(noise.pow(2)) K = torch.sqrt((E_noise / E_speech) * 10 ** (snr / 10) + eps) noise = noise / K mixture = clean + noise assert torch.isfinite(mixture) return clean, noise, mixture def mix_and_denoise(speech, noise, snr): model, df, _ = init_df() speech, noise, noisy = mix_at_snr(speech, noise, snr) enhanced = enhance(model.to(device=device).eval(), df, noisy) return speech, noisy, enhanced inputs = [ gradio.inputs.Audio( source="microphone", type="filepath", optional=True, label="Speech" ), gradio.inputs.Audio( source="microphone", type="filepath", optional=True, label="Noise" ), gradio.inputs.Slider(minimum=-10, maximum=40, step=5, default=10), ] examples = [ [], ["samples/noise_freesound_2530.wav", "samples/noise_freesound_573577.wav"], ] outputs = [ gradio.outputs.Audio(label="Clean"), gradio.outputs.Audio(label="Noisy"), gradio.outputs.Audio(label="Enhanced"), ] iface = gradio.Interface( fn=mix_and_denoise, inputs=inputs, outputs=outputs, examples=examples ) iface.launch()