|
import gradio as gr |
|
import torch |
|
import torchaudio |
|
from speechbrain.inference.enhancement import SpectralMaskEnhancement |
|
|
|
|
|
enhance_model = SpectralMaskEnhancement.from_hparams( |
|
source="speechbrain/metricgan-plus-voicebank", |
|
savedir="tmpdir_metricgan", |
|
) |
|
|
|
|
|
def enhance_speech(audio): |
|
|
|
noisy = enhance_model.load_audio(audio).unsqueeze(0) |
|
|
|
|
|
lengths = torch.tensor([1.]) |
|
|
|
|
|
enhanced = enhance_model.enhance_batch(noisy, lengths) |
|
|
|
|
|
output_path = "enhanced.wav" |
|
torchaudio.save(output_path, enhanced.cpu(), 16000) |
|
return output_path |
|
|
|
|
|
iface = gr.Interface( |
|
fn=enhance_speech, |
|
inputs=gr.Audio(type="filepath"), |
|
outputs=gr.Audio(type="filepath"), |
|
title="Speech Enhancement", |
|
description="Upload a noisy audio file to enhance it using MetricGAN." |
|
) |
|
|
|
|
|
iface.launch() |
|
|