Spaces:

Dewiin
/

voice_clone

Running

File size: 2,023 Bytes

941f415
33bea70
941f415
 
ba596b5
941f415
33bea70
 
 
60a8de0
 
 
 
 
 
ba596b5
 
 
 
 
941f415
4ebf4f4
 
 
 
941f415
4ebf4f4
60a8de0
4ebf4f4
941f415
4ebf4f4
 
941f415
ba596b5
a2f1abf
cba51d2
 
 
1f2644a
cba51d2
 
1f2644a
cba51d2
1f2644a
cba51d2
1f2644a
 
 
 
 
 
cba51d2
 
1f2644a
 
aad4adb
23b908d

import torch
import os
import streamlit as st
from TTS.api import TTS
from tempfile import NamedTemporaryFile

# By using XTTS you agree to CPML license https://coqui.ai/cpml
os.environ["COQUI_TOS_AGREED"] = "1"

def generate_audio(audio_file, text_input):
  # Initialize model
  model = "tts_models/multilingual/multi-dataset/xtts_v2"
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
  tts = TTS(model).to(device)

  with NamedTemporaryFile(delete=False, suffix='.wav') as tmp_file:
    output_path = tmp_file.name
    tts.tts_to_file(text=text_input, speaker_wav=audio_file, language='en', file_path=output_path)

  return output_path

def main():
  # Title
  title = f"""<h1 align="center" style="font-size: 2rem";>Voice Clone</h1>"""
  st.markdown(title, unsafe_allow_html=True)

  # Subtitle
  title = f"""<h2 align="center" style="font-size: 1.2rem; margin-bottom: 2rem;">Make your favorite characters say anything!</h2>"""
  st.markdown(title, unsafe_allow_html=True)

  # Upload audio file
  uploaded_file = st.file_uploader('Add an audio file of the voice you want to clone...', type=['wav'])

  if uploaded_file is not None:
    reference_audio, synthesized_audio = st.columns(2)
    with reference_audio:
      st.header('Reference Audio')
      st.audio(uploaded_file, format='audio/wav')

    # Input text 
    text_input = st.text_input('What do you want your character to say? (Alphabet letters only, DO NOT INCLUDE PUNCTUATION)')

    if st.button('Synthesize'):
      if text_input:
        try:
          with st.spinner('Synthesizing...'):
            output_path = generate_audio(uploaded_file, text_input)

          with synthesized_audio:
            st.header('Synthesized Audio')
            st.audio(output_path, format='audio/wav')
        except:
          st.error('There was an issue synthesizing the text. Please check the input and try again. Remember, do not include punctuation.')
      else:
        st.error('Please provide a text input!')

if __name__ == '__main__':
  main()