Spaces:
Running
Running
import streamlit as st | |
import outetts | |
from scipy.io.wavfile import write | |
# Initialize model configuration | |
model_config = outetts.HFModelConfig_v1( | |
model_path="OuteAI/OuteTTS-0.2-500M", | |
language="en" # Supported languages: en, zh, ja, ko | |
) | |
# Initialize the interface | |
interface = outetts.InterfaceHF(model_version="0.2", cfg=model_config) | |
# Streamlit UI | |
st.title("OuteTTS Speech Synthesis") | |
st.write("Enter text below to generate speech.") | |
# Sidebar for reference voice | |
st.sidebar.title("Voice Cloning") | |
reference_audio = st.sidebar.file_uploader("Upload a reference audio (wav)", type=["wav"]) | |
if reference_audio: | |
ref_audio_path = "reference.wav" | |
with open(ref_audio_path, "wb") as f: | |
f.write(reference_audio.read()) | |
else: | |
ref_audio_path = None | |
text_input = st.text_area("Text to convert to speech:", "Hello, this is an AI-generated voice.") | |
if st.button("Generate Speech"): | |
with st.spinner("Generating audio..."): | |
# Generate speech with reference audio | |
output = interface.generate( | |
text=text_input, | |
temperature=0.1, | |
repetition_penalty=1.1, | |
max_length=4096, | |
speaker_wav=ref_audio_path if ref_audio_path else None | |
) | |
# Save the synthesized speech to a file | |
output_path = "output.wav" | |
output.save(output_path) | |
# Play the audio in the Streamlit app | |
st.audio(output_path, format="audio/wav") | |
st.success("Speech generated successfully!") | |