voice_clone / app.py
Devin Xie
changed formatting, and added try except blocks
a2f1abf
raw
history blame
2.02 kB
import torch
import os
import streamlit as st
from TTS.api import TTS
from tempfile import NamedTemporaryFile
# By using XTTS you agree to CPML license https://coqui.ai/cpml
os.environ["COQUI_TOS_AGREED"] = "1"
def generate_audio(audio_file, text_input):
# Initialize model
model = "tts_models/multilingual/multi-dataset/xtts_v2"
device = 'cuda' if torch.cuda.is_available() else 'cpu'
tts = TTS(model).to(device)
with NamedTemporaryFile(delete=False, suffix='.wav') as tmp_file:
output_path = tmp_file.name
tts.tts_to_file(text=text_input, speaker_wav=audio_file, language='en', file_path=output_path)
return output_path
def main():
# Title
title = f"""<h1 align="center" style="font-size: 2rem";>Voice Clone</h1>"""
st.markdown(title, unsafe_allow_html=True)
# Subtitle
title = f"""<h2 align="center" style="font-size: 1.2rem; margin-bottom: 2rem;">Make your favorite characters say anything!</h2>"""
st.markdown(title, unsafe_allow_html=True)
# Upload audio file
uploaded_file = st.file_uploader('Add an audio file of the voice you want to clone...', type=['wav'])
if uploaded_file is not None:
reference_audio, synthesized_audio = st.columns(2)
with reference_audio:
st.header('Reference Audio')
st.audio(uploaded_file, format='audio/wav')
# Input text
text_input = st.text_input('What do you want your character to say? (Alphabet letters only, DO NOT INCLUDE PUNCTUATION)')
if st.button('Synthesize'):
if text_input:
try:
with st.spinner('Synthesizing...'):
output_path = generate_audio(uploaded_file, text_input)
with synthesized_audio:
st.header('Synthesized Audio')
st.audio(output_path, format='audio/wav')
except:
st.error('There was an issue synthesizing the text. Please check the input and try again. Remember, do not include punctuation.')
else:
st.error('Please provide a text input!')
if __name__ == '__main__':
main()