import streamlit as st
from espnet2.bin.tts_inference import Text2Speech

# Load the Text2Speech model
model = Text2Speech.from_pretrained("kan-bayashi/ljspeech_fastspeech2")

def generate_audio(text):
    with st.spinner("Generating Speech..."):
        speech, *_ = model(text)
    return speech

def main():
    st.title("Text to Speech with ESPnet2")

    text_input = st.text_area("Enter the text to generate speech:", "")
    if st.button("Generate Speech"):
        if text_input:
            audio = generate_audio(text_input)
            st.audio(audio, format="audio/wav")
        else:
            st.warning("Please enter some text.")

if __name__ == "__main__":
    main()