Spaces:
Running
Running
File size: 1,579 Bytes
84b3aae d1ac783 3127b1b 84b3aae d1ac783 ed7cb82 84b3aae d1ac783 84b3aae eee2253 3127b1b eee2253 084f05a 3127b1b eee2253 084f05a 3127b1b 84b3aae d1ac783 b8320e7 d1ac783 b8320e7 84b3aae 3127b1b 084f05a 3127b1b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 |
import streamlit as st
from TTS.api import TTS
import tempfile
import os
from pydub import AudioSegment
# Initialize the TTS model
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2")
# Streamlit UI
st.title("XTTS v2 Speech Synthesis")
st.write("Enter text below to generate speech.")
# Sidebar for reference voice
st.sidebar.title("Voice Cloning")
reference_audio = st.sidebar.file_uploader("Upload a reference audio (any format)", type=["wav", "mp3", "ogg", "flac", "m4a"])
# Function to convert audio to WAV format
def convert_to_wav(audio_file):
temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
audio = AudioSegment.from_file(audio_file)
audio.export(temp_audio.name, format="wav")
return temp_audio.name
if reference_audio:
ref_audio_path = convert_to_wav(reference_audio)
else:
ref_audio_path = None
text_input = st.text_area("Text to convert to speech:", "Hello, this is an AI-generated voice.")
if st.button("Generate Speech"):
with st.spinner("Generating audio..."):
# Define output path
output_path = "output.wav"
# Generate speech using XTTS v2
tts.tts_to_file(
text=text_input,
file_path=output_path,
speaker_wav=ref_audio_path if ref_audio_path else None,
language="en"
)
# Play the audio in the Streamlit app
st.audio(output_path, format="audio/wav")
st.success("Speech generated successfully!")
# Clean up temporary files
if ref_audio_path:
os.remove(ref_audio_path)
|