Spaces:

Dewiin
/

voice_clone

Running

File size: 1,208 Bytes

941f415
33bea70
941f415
 
 
33bea70
 
 
941f415
 
33bea70
 
941f415
4ebf4f4
 
 
 
941f415
4ebf4f4
 
 
941f415
4ebf4f4
 
 
941f415
4ebf4f4
 
 
 
 
 
941f415
350cb58
aad4adb
23b908d

import torch
import os
import streamlit as st
from TTS.api import TTS

# By using XTTS you agree to CPML license https://coqui.ai/cpml
os.environ["COQUI_TOS_AGREED"] = "1"

# Initialize model
model = "tts_models/multilingual/multi-dataset/xtts_v2"
device = 'cuda' if torch.cuda.is_available() else 'cpu'
tts = TTS(model).to(device)

def main():
  # Title
  title = f"""<h1 align="center" style="font-size: 2rem";>Voice Clone</h1>"""
  st.markdown(title, unsafe_allow_html=True)

  # Subtitle
  title = f"""<h2 align="center" style="font-size: 1.5rem";>Make your favorite characters say anything!</h2>"""
  st.markdown(title, unsafe_allow_html=True)

  # Upload audio file
  uploaded_file = st.file_uploader('Add an audio file of the voice you want to clone...', type=['wav'])
  st.audio(uploaded_file, format='audio/wav')

  # Input text 
  text_input = st.text_input('Enter the text to synthesize')

  if st.button('Synthesize') and uploaded_file is not None and text_input is not "":
    with st.spinner('Synthesizing...'):
      output_audio = tts.tts_to_file(text=text_input, speaker_wav=uploaded_file, language='en')

    st.audio(output_audio, format='audio/wav')

if __name__ == '__main__':
  main()