import os
os.system("pip install speechbrain torchaudio gradio")

import torchaudio
from speechbrain.pretrained import Tacotron2, HIFIGAN
import gradio as gr

# Load SpeechBrain models
print("Loading SpeechBrain models...")
tacotron2 = Tacotron2.from_hparams(source="speechbrain/tts-tacotron2-ljspeech", savedir="models/tacotron2")
hifi_gan = HIFIGAN.from_hparams(source="speechbrain/tts-hifigan-ljspeech", savedir="models/hifigan")
print("Models loaded! Ready to generate speech.")

# Generate speech from text
def text_to_speech(text):
    mel_output, _, _ = tacotron2.encode_text(text)
    waveforms = hifi_gan.decode_batch(mel_output)
    output_file = "output_speech.wav"
    torchaudio.save(output_file, waveforms.squeeze(1), 22050)
    return output_file  # Return the audio file

# Create Gradio UI
iface = gr.Interface(
    fn=text_to_speech,
    inputs=gr.Textbox(label="Text to speak"),
    outputs=gr.Audio(type="filepath", label="Generated Speech"),
    title="SpeechBrain TTS Demo",
    description="Enter text and get an AI-generated voice output!"
)

# Launch the app
iface.launch()