Spaces:

Musawir19
/

Taxt_to_speach

Sleeping

File size: 1,663 Bytes

59a82b8
16aac4b
 
 
e0d22a9
59a82b8
 
 
 
 
16aac4b
e0d22a9
 
59a82b8
e0d22a9
59a82b8
e0d22a9
16aac4b
26a9711
16aac4b
a972636
26a9711
 
a972636
b41b0e5
 
 
 
 
26a9711
59a82b8
b41b0e5
16aac4b
 
e0d22a9
 
 
59a82b8
 
 
 
26a9711
 
 
 
 
59a82b8

import streamlit as st
from speechbrain.pretrained import Tacotron2, HIFIGAN
from scipy.io.wavfile import write

# Load the TTS and vocoder models
@st.cache_resource
def load_models():
    tacotron2 = Tacotron2.from_hparams(source="speechbrain/tts-tacotron2-ljspeech", savedir="tmpdir_tts")
    hifi_gan = HIFIGAN.from_hparams(source="speechbrain/tts-hifigan-ljspeech", savedir="tmpdir_vocoder")
    return tacotron2, hifi_gan

# Load models
st.write("Loading models... Please wait ⏳")
tacotron2, hifi_gan = load_models()
st.success("Models loaded successfully!")

# TTS function
def text_to_speech(text):
    # Generate mel spectrogram
    mel_output, mel_length, alignment = tacotron2.encode_text(text)
    
    # Decode mel spectrogram to waveform
    waveforms = hifi_gan.decode_batch(mel_output)
    
    # Convert waveform to numpy and normalize to int16 range
    waveform = waveforms.squeeze(1).cpu().numpy()
    waveform = waveform / max(abs(waveform))  # Normalize to range [-1, 1]
    waveform = (waveform * 32767).astype("int16")  # Scale to int16 range
    
    # Save waveform as audio file
    audio_path = "output.wav"
    write(audio_path, 22050, waveform)
    return audio_path

# Streamlit UI
st.title("🗣️ Text-to-Speech App")
text = st.text_input("Enter text to convert to speech:")

if st.button("Generate Speech"):
    if text.strip():
        st.write("Generating speech...")
        try:
            audio_file = text_to_speech(text)
            st.audio(audio_file, format="audio/wav")
        except Exception as e:
            st.error(f"Error during TTS generation: {e}")
    else:
        st.warning("Please enter some text.")