Spaces:
Sleeping
Sleeping
Hendrik Schroeter
commited on
Commit
·
e8d5f8b
1
Parent(s):
f81803d
gradio compat
Browse files
app.py
CHANGED
|
@@ -15,7 +15,7 @@ from torchaudio.backend.common import AudioMetaData
|
|
| 15 |
|
| 16 |
from df import config
|
| 17 |
from df.enhance import enhance, init_df, load_audio, save_audio
|
| 18 |
-
from df.
|
| 19 |
|
| 20 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 21 |
model, df, _ = init_df("./DeepFilterNet2", config_allow_defaults=True)
|
|
@@ -99,28 +99,21 @@ def load_audio_gradio(
|
|
| 99 |
return audio, meta
|
| 100 |
|
| 101 |
|
| 102 |
-
def demo_fn(
|
| 103 |
-
speech_rec: Union[str, Tuple[int, np.ndarray]], speech_upl: str, noise_type: str, snr: int
|
| 104 |
-
):
|
| 105 |
sr = config("sr", 48000, int, section="df")
|
| 106 |
-
logger.info(
|
| 107 |
-
|
| 108 |
-
)
|
| 109 |
noise_fn = NOISES[noise_type]
|
| 110 |
meta = AudioMetaData(-1, -1, -1, -1, "")
|
| 111 |
max_s = 10 # limit to 10 seconds
|
| 112 |
-
if
|
| 113 |
-
sample, meta = load_audio("samples/p232_013_clean.wav", sr)
|
| 114 |
-
elif speech_upl is not None:
|
| 115 |
sample, meta = load_audio(speech_upl, sr)
|
| 116 |
max_len = max_s * sr
|
| 117 |
if sample.shape[-1] > max_len:
|
| 118 |
start = torch.randint(0, sample.shape[-1] - max_len, ()).item()
|
| 119 |
sample = sample[..., start : start + max_len]
|
| 120 |
else:
|
| 121 |
-
|
| 122 |
-
assert tmp is not None
|
| 123 |
-
sample, meta = tmp
|
| 124 |
sample = sample[..., : max_s * sr]
|
| 125 |
if sample.dim() > 1 and sample.shape[0] > 1:
|
| 126 |
assert (
|
|
@@ -274,15 +267,15 @@ inputs = [
|
|
| 274 |
),
|
| 275 |
gradio.inputs.Dropdown(
|
| 276 |
label="Noise Level (SNR)",
|
| 277 |
-
choices=[-5, 0, 10, 20],
|
| 278 |
-
default=10,
|
| 279 |
),
|
| 280 |
]
|
| 281 |
outputs = [
|
| 282 |
-
gradio.
|
| 283 |
-
gradio.
|
| 284 |
-
gradio.
|
| 285 |
-
gradio.
|
| 286 |
]
|
| 287 |
description = "This demo denoises audio files using DeepFilterNet. Try it with your own voice!"
|
| 288 |
iface = gradio.Interface(
|
|
|
|
| 15 |
|
| 16 |
from df import config
|
| 17 |
from df.enhance import enhance, init_df, load_audio, save_audio
|
| 18 |
+
from df.io import resample
|
| 19 |
|
| 20 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 21 |
model, df, _ = init_df("./DeepFilterNet2", config_allow_defaults=True)
|
|
|
|
| 99 |
return audio, meta
|
| 100 |
|
| 101 |
|
| 102 |
+
def demo_fn(speech_upl: str, noise_type: str, snr: int):
|
|
|
|
|
|
|
| 103 |
sr = config("sr", 48000, int, section="df")
|
| 104 |
+
logger.info(f"Got parameters speech_upl: {speech_upl}, noise: {noise_type}, snr: {snr}")
|
| 105 |
+
snr = int(snr)
|
|
|
|
| 106 |
noise_fn = NOISES[noise_type]
|
| 107 |
meta = AudioMetaData(-1, -1, -1, -1, "")
|
| 108 |
max_s = 10 # limit to 10 seconds
|
| 109 |
+
if speech_upl is not None:
|
|
|
|
|
|
|
| 110 |
sample, meta = load_audio(speech_upl, sr)
|
| 111 |
max_len = max_s * sr
|
| 112 |
if sample.shape[-1] > max_len:
|
| 113 |
start = torch.randint(0, sample.shape[-1] - max_len, ()).item()
|
| 114 |
sample = sample[..., start : start + max_len]
|
| 115 |
else:
|
| 116 |
+
sample, meta = load_audio("samples/p232_013_clean.wav", sr)
|
|
|
|
|
|
|
| 117 |
sample = sample[..., : max_s * sr]
|
| 118 |
if sample.dim() > 1 and sample.shape[0] > 1:
|
| 119 |
assert (
|
|
|
|
| 267 |
),
|
| 268 |
gradio.inputs.Dropdown(
|
| 269 |
label="Noise Level (SNR)",
|
| 270 |
+
choices=["-5", "0", "10", "20"],
|
| 271 |
+
default="10",
|
| 272 |
),
|
| 273 |
]
|
| 274 |
outputs = [
|
| 275 |
+
gradio.Audio(type="filepath", label="Noisy audio"),
|
| 276 |
+
gradio.Plot(label="Noisy spectrogram"),
|
| 277 |
+
gradio.Audio(type="filepath", label="Enhanced audio"),
|
| 278 |
+
gradio.Plot(label="Enhanced spectrogram"),
|
| 279 |
]
|
| 280 |
description = "This demo denoises audio files using DeepFilterNet. Try it with your own voice!"
|
| 281 |
iface = gradio.Interface(
|