import os os.system("pip install speechbrain torchaudio gradio") import torchaudio from speechbrain.pretrained import Tacotron2, HIFIGAN import gradio as gr # Load SpeechBrain models print("Loading SpeechBrain models...") tacotron2 = Tacotron2.from_hparams(source="speechbrain/tts-tacotron2-ljspeech", savedir="models/tacotron2") hifi_gan = HIFIGAN.from_hparams(source="speechbrain/tts-hifigan-ljspeech", savedir="models/hifigan") print("Models loaded! Ready to generate speech.") # Generate speech from text def text_to_speech(text): mel_output, _, _ = tacotron2.encode_text(text) waveforms = hifi_gan.decode_batch(mel_output) output_file = "output_speech.wav" torchaudio.save(output_file, waveforms.squeeze(1), 22050) return output_file # Return the audio file # Create Gradio UI iface = gr.Interface( fn=text_to_speech, inputs=gr.Textbox(label="Text to speak"), outputs=gr.Audio(type="filepath", label="Generated Speech"), title="SpeechBrain TTS Demo", description="Enter text and get an AI-generated voice output!" ) # Launch the app iface.launch()