import streamlit as st from speechbrain.pretrained import Tacotron2, HIFIGAN from scipy.io.wavfile import write # Load the TTS and Vocoder models @st.cache_resource def load_models(): tacotron2 = Tacotron2.from_hparams(source="speechbrain/tts-tacotron2-ljspeech", savedir="tmpdir_tts") hifi_gan = HIFIGAN.from_hparams(source="speechbrain/tts-hifigan-ljspeech", savedir="tmpdir_vocoder") return tacotron2, hifi_gan tacotron2, hifi_gan = load_models() # Text-to-Speech function def text_to_speech(text): mel_output, mel_length, alignment = tacotron2.encode_text(text) waveforms, _, _ = hifi_gan.decode_batch(mel_output) audio_path = "output.wav" write(audio_path, 22050, waveforms.squeeze(1).cpu().numpy()) return audio_path # Streamlit App UI st.title("Text-to-Speech Chatbot") # Input text box text = st.text_input("Enter text to convert to speech:", "") if st.button("Generate Speech"): if text.strip(): st.write("Generating speech...") audio_file = text_to_speech(text) st.audio(audio_file, format="audio/wav") else: st.warning("Please enter some text.") st.write("Powered by SpeechBrain and Streamlit.")