File size: 1,208 Bytes
941f415
33bea70
941f415
 
 
33bea70
 
 
941f415
 
33bea70
 
941f415
4ebf4f4
 
 
 
941f415
4ebf4f4
 
 
941f415
4ebf4f4
 
 
941f415
4ebf4f4
 
 
 
 
 
941f415
350cb58
aad4adb
23b908d
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import torch
import os
import streamlit as st
from TTS.api import TTS

# By using XTTS you agree to CPML license https://coqui.ai/cpml
os.environ["COQUI_TOS_AGREED"] = "1"

# Initialize model
model = "tts_models/multilingual/multi-dataset/xtts_v2"
device = 'cuda' if torch.cuda.is_available() else 'cpu'
tts = TTS(model).to(device)

def main():
  # Title
  title = f"""<h1 align="center" style="font-size: 2rem";>Voice Clone</h1>"""
  st.markdown(title, unsafe_allow_html=True)

  # Subtitle
  title = f"""<h2 align="center" style="font-size: 1.5rem";>Make your favorite characters say anything!</h2>"""
  st.markdown(title, unsafe_allow_html=True)

  # Upload audio file
  uploaded_file = st.file_uploader('Add an audio file of the voice you want to clone...', type=['wav'])
  st.audio(uploaded_file, format='audio/wav')

  # Input text 
  text_input = st.text_input('Enter the text to synthesize')

  if st.button('Synthesize') and uploaded_file is not None and text_input is not "":
    with st.spinner('Synthesizing...'):
      output_audio = tts.tts_to_file(text=text_input, speaker_wav=uploaded_file, language='en')

    st.audio(output_audio, format='audio/wav')

if __name__ == '__main__':
  main()