kevinwang676
commited on
Commit
·
b1f2c4f
1
Parent(s):
08fda23
Update app.py
Browse files
app.py
CHANGED
@@ -22,6 +22,14 @@ import numpy as np
|
|
22 |
# from IPython.display import Audio
|
23 |
|
24 |
import torch
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
|
26 |
from TTS.tts.utils.synthesis import synthesis
|
27 |
from TTS.tts.utils.text.symbols import make_symbols, phonemes, symbols
|
@@ -163,7 +171,14 @@ def voice_conversion(ta, ra, da):
|
|
163 |
# print("Reference Audio after decoder:")
|
164 |
# IPython.display.display(Audio(ref_wav_voc, rate=ap.sample_rate))
|
165 |
|
166 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
167 |
|
168 |
|
169 |
def generate_text_to_speech(text_prompt, selected_speaker, text_temp, waveform_temp):
|
|
|
22 |
# from IPython.display import Audio
|
23 |
|
24 |
import torch
|
25 |
+
import torchaudio
|
26 |
+
from speechbrain.pretrained import SpectralMaskEnhancement
|
27 |
+
|
28 |
+
enhance_model = SpectralMaskEnhancement.from_hparams(
|
29 |
+
source="speechbrain/metricgan-plus-voicebank",
|
30 |
+
savedir="pretrained_models/metricgan-plus-voicebank",
|
31 |
+
run_opts={"device":"cuda"},
|
32 |
+
)
|
33 |
|
34 |
from TTS.tts.utils.synthesis import synthesis
|
35 |
from TTS.tts.utils.text.symbols import make_symbols, phonemes, symbols
|
|
|
171 |
# print("Reference Audio after decoder:")
|
172 |
# IPython.display.display(Audio(ref_wav_voc, rate=ap.sample_rate))
|
173 |
|
174 |
+
noisy = enhance_model.load_audio(
|
175 |
+
ref_wav_voc
|
176 |
+
).unsqueeze(0)
|
177 |
+
|
178 |
+
enhanced = enhance_model.enhance_batch(noisy, lengths=torch.tensor([1.]))
|
179 |
+
torchaudio.save("enhanced.wav", enhanced.cpu(), 16000)
|
180 |
+
|
181 |
+
return "enhanced.wav"
|
182 |
|
183 |
|
184 |
def generate_text_to_speech(text_prompt, selected_speaker, text_temp, waveform_temp):
|