import torch import os import streamlit as st from TTS.api import TTS # By using XTTS you agree to CPML license https://coqui.ai/cpml os.environ["COQUI_TOS_AGREED"] = "1" # Initialize model model = "tts_models/multilingual/multi-dataset/xtts_v2" device = 'cuda' if torch.cuda.is_available() else 'cpu' tts = TTS(model).to(device) def main(): # Title title = f"""

Voice Clone

""" st.markdown(title, unsafe_allow_html=True) # Subtitle title = f"""

Make your favorite characters say anything!

""" st.markdown(title, unsafe_allow_html=True) # Upload audio file uploaded_file = st.file_uploader('Add an audio file of the voice you want to clone...', type=['wav']) st.audio(uploaded_file, format='audio/wav') # Input text text_input = st.text_input('Enter the text to synthesize') if st.button('Synthesize') and uploaded_file is not None and text_input is not "": with st.spinner('Synthesizing...'): output_audio = tts.tts_to_file(text=text_input, speaker_wav=uploaded_file, language='en') st.audio(output_audio, format='audio/wav') if __name__ == '__main__': main()