kevinwang676 commited on
Commit
b1f2c4f
·
1 Parent(s): 08fda23

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -1
app.py CHANGED
@@ -22,6 +22,14 @@ import numpy as np
22
  # from IPython.display import Audio
23
 
24
  import torch
 
 
 
 
 
 
 
 
25
 
26
  from TTS.tts.utils.synthesis import synthesis
27
  from TTS.tts.utils.text.symbols import make_symbols, phonemes, symbols
@@ -163,7 +171,14 @@ def voice_conversion(ta, ra, da):
163
  # print("Reference Audio after decoder:")
164
  # IPython.display.display(Audio(ref_wav_voc, rate=ap.sample_rate))
165
 
166
- return (ap.sample_rate, ref_wav_voc)
 
 
 
 
 
 
 
167
 
168
 
169
  def generate_text_to_speech(text_prompt, selected_speaker, text_temp, waveform_temp):
 
22
  # from IPython.display import Audio
23
 
24
  import torch
25
+ import torchaudio
26
+ from speechbrain.pretrained import SpectralMaskEnhancement
27
+
28
+ enhance_model = SpectralMaskEnhancement.from_hparams(
29
+ source="speechbrain/metricgan-plus-voicebank",
30
+ savedir="pretrained_models/metricgan-plus-voicebank",
31
+ run_opts={"device":"cuda"},
32
+ )
33
 
34
  from TTS.tts.utils.synthesis import synthesis
35
  from TTS.tts.utils.text.symbols import make_symbols, phonemes, symbols
 
171
  # print("Reference Audio after decoder:")
172
  # IPython.display.display(Audio(ref_wav_voc, rate=ap.sample_rate))
173
 
174
+ noisy = enhance_model.load_audio(
175
+ ref_wav_voc
176
+ ).unsqueeze(0)
177
+
178
+ enhanced = enhance_model.enhance_batch(noisy, lengths=torch.tensor([1.]))
179
+ torchaudio.save("enhanced.wav", enhanced.cpu(), 16000)
180
+
181
+ return "enhanced.wav"
182
 
183
 
184
  def generate_text_to_speech(text_prompt, selected_speaker, text_temp, waveform_temp):