Spaces:
Running
Running
File size: 1,521 Bytes
84b3aae dbe86d4 b8320e7 dbe86d4 b8320e7 84b3aae eee2253 84b3aae eee2253 b8320e7 eee2253 b8320e7 84b3aae b8320e7 84b3aae |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 |
import streamlit as st
import outetts
from scipy.io.wavfile import write
# Initialize model configuration
model_config = outetts.HFModelConfig_v1(
model_path="OuteAI/OuteTTS-0.2-500M",
language="en" # Supported languages: en, zh, ja, ko
)
# Initialize the interface
interface = outetts.InterfaceHF(model_version="0.2", cfg=model_config)
# Streamlit UI
st.title("OuteTTS Speech Synthesis")
st.write("Enter text below to generate speech.")
# Sidebar for reference voice
st.sidebar.title("Voice Cloning")
reference_audio = st.sidebar.file_uploader("Upload a reference audio (wav)", type=["wav"])
if reference_audio:
ref_audio_path = "reference.wav"
with open(ref_audio_path, "wb") as f:
f.write(reference_audio.read())
else:
ref_audio_path = None
text_input = st.text_area("Text to convert to speech:", "Hello, this is an AI-generated voice.")
if st.button("Generate Speech"):
with st.spinner("Generating audio..."):
# Generate speech with reference audio
output = interface.generate(
text=text_input,
temperature=0.1,
repetition_penalty=1.1,
max_length=4096,
speaker_wav=ref_audio_path if ref_audio_path else None
)
# Save the synthesized speech to a file
output_path = "output.wav"
output.save(output_path)
# Play the audio in the Streamlit app
st.audio(output_path, format="audio/wav")
st.success("Speech generated successfully!")
|