Hendrik Schroeter commited on
Commit
e2efa2c
1 Parent(s): 5ce2a93

Resample enhanced audio to input sample rate

Browse files
Files changed (1) hide show
  1. app.py +10 -3
app.py CHANGED
@@ -1,5 +1,5 @@
1
- import math
2
  import tempfile
 
3
 
4
  import gradio
5
  import gradio.inputs
@@ -9,6 +9,7 @@ import markdown
9
  import numpy as np
10
  import torch
11
  from df import config
 
12
  from df.enhance import enhance, init_df, load_audio, save_audio
13
 
14
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
@@ -61,14 +62,17 @@ def mix_and_denoise(speech_rec, speech_upl, noise, snr):
61
  if speech_rec is None or "none" in speech_rec:
62
  speech = "samples/p232_013_clean.wav"
63
  if speech_upl is not None and "none" not in speech_upl:
64
- print("using speech_alt")
65
  speech = speech_upl
66
  else:
67
  speech = speech_rec
68
  sp_kwargs = {"frame_offset": 4800}
69
- speech, _ = load_audio(speech, sr, **sp_kwargs)
70
  print(f"Loaded speech with shape {speech.shape}")
71
  noise, _ = load_audio(noise, sr)
 
 
 
72
  print(f"Loaded noise with shape {noise.shape}")
73
  speech, noise, noisy = mix_at_snr(speech, noise, snr)
74
  print("Start denoising audio")
@@ -77,6 +81,9 @@ def mix_and_denoise(speech_rec, speech_upl, noise, snr):
77
  lim = torch.linspace(0.0, 1.0, int(sr * 0.15)).unsqueeze(0)
78
  lim = torch.cat((lim, torch.ones(1, enhanced.shape[1] - lim.shape[1])), dim=1)
79
  enhanced = enhanced * lim
 
 
 
80
  noisy_fn = tempfile.NamedTemporaryFile(suffix="noisy.wav", delete=False).name
81
  save_audio(noisy_fn, noisy, sr)
82
  enhanced_fn = tempfile.NamedTemporaryFile(suffix="enhanced.wav", delete=False).name
 
 
1
  import tempfile
2
+ from df.utils import resample
3
 
4
  import gradio
5
  import gradio.inputs
 
9
  import numpy as np
10
  import torch
11
  from df import config
12
+ import math
13
  from df.enhance import enhance, init_df, load_audio, save_audio
14
 
15
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
62
  if speech_rec is None or "none" in speech_rec:
63
  speech = "samples/p232_013_clean.wav"
64
  if speech_upl is not None and "none" not in speech_upl:
65
+ print("using speech_upl")
66
  speech = speech_upl
67
  else:
68
  speech = speech_rec
69
  sp_kwargs = {"frame_offset": 4800}
70
+ speech, meta = load_audio(speech, sr, **sp_kwargs)
71
  print(f"Loaded speech with shape {speech.shape}")
72
  noise, _ = load_audio(noise, sr)
73
+ if meta.sample_rate != sr:
74
+ # Low pass filter by resampling
75
+ noise = resample(resample(noise, sr, meta.sample_rate), meta.sample_rate, sr)
76
  print(f"Loaded noise with shape {noise.shape}")
77
  speech, noise, noisy = mix_at_snr(speech, noise, snr)
78
  print("Start denoising audio")
 
81
  lim = torch.linspace(0.0, 1.0, int(sr * 0.15)).unsqueeze(0)
82
  lim = torch.cat((lim, torch.ones(1, enhanced.shape[1] - lim.shape[1])), dim=1)
83
  enhanced = enhanced * lim
84
+ if meta.sample_rate != sr:
85
+ enhanced = resample(enhanced, sr, meta.sample_rate)
86
+ noisy = resample(noisy, sr, meta.sample_rate)
87
  noisy_fn = tempfile.NamedTemporaryFile(suffix="noisy.wav", delete=False).name
88
  save_audio(noisy_fn, noisy, sr)
89
  enhanced_fn = tempfile.NamedTemporaryFile(suffix="enhanced.wav", delete=False).name