|
from TTSInferencing import TTSInferencing |
|
from speechbrain.inference.vocoders import HIFIGAN |
|
|
|
import streamlit as st |
|
import numpy as np |
|
|
|
|
|
tts_model = TTSInferencing.from_hparams(source="./", |
|
hparams_file='./hyperparams.yaml', |
|
pymodule_file='./module_classes.py', |
|
|
|
) |
|
|
|
hifi_gan = HIFIGAN.from_hparams(source="speechbrain/tts-hifigan-ljspeech") |
|
|
|
|
|
|
|
|
|
text_input = st.text_input("Enter your text here") |
|
|
|
|
|
if isinstance(text_input, str): |
|
|
|
text = [text_input] |
|
else: |
|
text = text_input |
|
|
|
if st.button("Synthesize Speech"): |
|
if text: |
|
mel_outputs = tts_model.encode_batch(text) |
|
waveforms = hifi_gan.decode_batch(mel_outputs) |
|
|
|
waveform = waveforms[0].squeeze(1).numpy() |
|
|
|
|
|
if np.max(np.abs(waveform)) > 1.0: |
|
waveform /= np.max(np.abs(waveform)) |
|
|
|
|
|
st.audio(waveform, format="audio/wav", sample_rate = 22050) |
|
else: |
|
st.error("Please enter text to get the speech.") |