import streamlit as st
from transformers import pipeline
import sounddevice as sd
import numpy as np
# Set the title with a colorful header
st.markdown("
🎤 Text-to-Speech with Bark Model 🎶
", unsafe_allow_html=True)
# Add a description with a stylish subtitle
st.markdown("
Convert your text into lifelike speech instantly!
", unsafe_allow_html=True)
st.markdown("---")
# Initialize the text-to-speech pipeline with the Bark model
synthesizer = pipeline("text-to-speech", model="suno/bark")
# Add an input text box with a vibrant background
text = st.text_area(
"Enter the text you want to convert to speech:",
placeholder="Type something interesting...",
height=150,
)
# Button to generate and play speech with a custom style
if st.button("🎙️ Convert to Speech"):
if text.strip() == "":
st.error("Please enter some text before converting!")
else:
with st.spinner("Generating speech... 🎶"):
# Generate speech
speech = synthesizer(text)
# Convert the audio data to 16-bit PCM format
audio_data = speech["audio"]
audio_data = np.int16(audio_data / np.max(np.abs(audio_data)) * 32767) # Normalize and convert to int16
# Play the generated audio immediately
sampling_rate = speech["sampling_rate"]
sd.play(audio_data, sampling_rate)
sd.wait() # Wait until the audio has finished playing
# Success message
st.success("🎉 Speech generation complete!")
# Add a colorful footer
st.markdown("", unsafe_allow_html=True)