Spaces:
Running
Running
File size: 2,319 Bytes
941f415 33bea70 941f415 ba596b5 941f415 33bea70 60a8de0 64d37cb 60a8de0 ba596b5 941f415 4ebf4f4 941f415 4ebf4f4 60a8de0 4ebf4f4 941f415 d0b52af 6e383be d0b52af 4ebf4f4 2c4fd8d d0b52af 2c4fd8d 941f415 d0b52af 32d3b7e 1f2644a cba51d2 ab33db6 1f2644a cba51d2 1f2644a cba51d2 1f2644a d0b52af 1f2644a d0b52af cba51d2 ab33db6 1f2644a aad4adb 23b908d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 |
import torch
import os
import streamlit as st
from TTS.api import TTS
from tempfile import NamedTemporaryFile
# By using XTTS you agree to CPML license https://coqui.ai/cpml
os.environ["COQUI_TOS_AGREED"] = "1"
def generate_audio(audio_file, text_input):
# Initialize model
model = "tts_models/multilingual/multi-dataset/xtts_v2" #coqui/XTTS-v2
device = 'cuda' if torch.cuda.is_available() else 'cpu'
tts = TTS(model).to(device)
with NamedTemporaryFile(delete=False, suffix='.wav') as tmp_file:
output_path = tmp_file.name
tts.tts_to_file(text=text_input, speaker_wav=audio_file, language='en', file_path=output_path)
return output_path
def main():
# Title
title = f"""<h1 align="center" style="font-size: 2rem";>Voice Clone</h1>"""
st.markdown(title, unsafe_allow_html=True)
# Subtitle
title = f"""<h2 align="center" style="font-size: 1.2rem; margin-bottom: 2rem;">Make your favorite characters say anything!</h2>"""
st.markdown(title, unsafe_allow_html=True)
sample_files = {
'': '',
'Stewie Griffin': 'sample_inputs/stewie.wav',
'Donald Trump': 'sample_inputs/trump.wav',
'Joe Rogan': 'sample_inputs/rogan.wav'
}
# Upload audio file
uploaded_file = st.file_uploader('Add an audio (.wav) file of the voice you want to clone...', type=['wav'])
if uploaded_file is None:
selected_sample = st.selectbox('Or choose a sample:', list(sample_files.keys()))
speaker_file = uploaded_file if uploaded_file is not None else sample_files[selected_sample]
if speaker_file:
st.header('Reference Audio')
st.audio(speaker_file, format='audio/wav')
# Input text
text_input = st.text_area('What do you want your character to say? Try to keep the prompt around 2 sentences.')
if st.button('Synthesize'):
if text_input:
try:
with st.spinner('Synthesizing...'):
output_path = generate_audio(speaker_file, text_input)
st.header('Synthesized Audio')
st.audio(output_path, format='audio/wav')
except:
st.error('There was an issue synthesizing the text. Please check the input and try again. Try to keep the input around 2 sentences, and less than 200 characters.')
else:
st.error('Please provide a text input!')
if __name__ == '__main__':
main()
|