File size: 1,579 Bytes
84b3aae
d1ac783
3127b1b
 
 
84b3aae
d1ac783
ed7cb82
84b3aae
 
d1ac783
84b3aae
 
eee2253
 
3127b1b
 
 
 
 
 
 
 
eee2253
084f05a
3127b1b
eee2253
084f05a
3127b1b
84b3aae
 
 
 
d1ac783
 
 
 
 
b8320e7
d1ac783
 
 
b8320e7
 
 
84b3aae
 
3127b1b
 
084f05a
3127b1b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import streamlit as st
from TTS.api import TTS
import tempfile
import os
from pydub import AudioSegment

# Initialize the TTS model
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2")

# Streamlit UI
st.title("XTTS v2 Speech Synthesis")
st.write("Enter text below to generate speech.")

# Sidebar for reference voice
st.sidebar.title("Voice Cloning")
reference_audio = st.sidebar.file_uploader("Upload a reference audio (any format)", type=["wav", "mp3", "ogg", "flac", "m4a"])

# Function to convert audio to WAV format
def convert_to_wav(audio_file):
    temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
    audio = AudioSegment.from_file(audio_file)
    audio.export(temp_audio.name, format="wav")
    return temp_audio.name

if reference_audio:
    ref_audio_path = convert_to_wav(reference_audio)
else:
    ref_audio_path = None

text_input = st.text_area("Text to convert to speech:", "Hello, this is an AI-generated voice.")

if st.button("Generate Speech"):
    with st.spinner("Generating audio..."):
        # Define output path
        output_path = "output.wav"
        
        # Generate speech using XTTS v2
        tts.tts_to_file(
            text=text_input,
            file_path=output_path,
            speaker_wav=ref_audio_path if ref_audio_path else None,
            language="en"
        )
        
        # Play the audio in the Streamlit app
        st.audio(output_path, format="audio/wav")
        st.success("Speech generated successfully!")

# Clean up temporary files
if ref_audio_path:
    os.remove(ref_audio_path)