File size: 1,521 Bytes
84b3aae
 
 
 
dbe86d4
 
 
b8320e7
dbe86d4
 
b8320e7
 
84b3aae
 
 
 
 
eee2253
 
 
 
 
 
 
 
 
 
 
84b3aae
 
 
 
eee2253
b8320e7
 
 
 
eee2253
 
b8320e7
 
 
84b3aae
b8320e7
 
 
84b3aae
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import streamlit as st
import outetts
from scipy.io.wavfile import write

# Initialize model configuration
model_config = outetts.HFModelConfig_v1(
    model_path="OuteAI/OuteTTS-0.2-500M",
    language="en"  # Supported languages: en, zh, ja, ko
)

# Initialize the interface
interface = outetts.InterfaceHF(model_version="0.2", cfg=model_config)

# Streamlit UI
st.title("OuteTTS Speech Synthesis")
st.write("Enter text below to generate speech.")

# Sidebar for reference voice
st.sidebar.title("Voice Cloning")
reference_audio = st.sidebar.file_uploader("Upload a reference audio (wav)", type=["wav"])

if reference_audio:
    ref_audio_path = "reference.wav"
    with open(ref_audio_path, "wb") as f:
        f.write(reference_audio.read())
else:
    ref_audio_path = None

text_input = st.text_area("Text to convert to speech:", "Hello, this is an AI-generated voice.")

if st.button("Generate Speech"):
    with st.spinner("Generating audio..."):
        # Generate speech with reference audio
        output = interface.generate(
            text=text_input,
            temperature=0.1,
            repetition_penalty=1.1,
            max_length=4096,
            speaker_wav=ref_audio_path if ref_audio_path else None
        )
        
        # Save the synthesized speech to a file
        output_path = "output.wav"
        output.save(output_path)
        
        # Play the audio in the Streamlit app
        st.audio(output_path, format="audio/wav")
        st.success("Speech generated successfully!")