Spaces:
Sleeping
Sleeping
import streamlit as st | |
from transformers import VitsModel, AutoTokenizer | |
import torch | |
# Title and Description | |
st.title("Text-to-Speech with VitsModel") | |
st.write("Enter some English text, and I'll generate audio for you!") | |
# Load Model and Tokenizer | |
# Cache the model for efficiency | |
def load_tts_model(): | |
model = VitsModel.from_pretrained("facebook/mms-tts-eng") | |
tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-eng") | |
return model, tokenizer | |
model, tokenizer = load_tts_model() | |
# User Input | |
user_text = st.text_input("Enter your text here:") | |
# Generate Audio on Button Click | |
if st.button("Generate Speech"): | |
if not user_text: | |
st.warning("Please enter some text.") | |
else: | |
inputs = tokenizer(user_text, return_tensors="pt") | |
with torch.no_grad(): | |
output = model(**inputs).waveform | |
# Play the Audio Directly | |
st.audio(output[0].numpy()) |