Spaces:
Running
Running
File size: 1,208 Bytes
941f415 33bea70 941f415 33bea70 941f415 33bea70 941f415 4ebf4f4 941f415 4ebf4f4 941f415 4ebf4f4 941f415 4ebf4f4 941f415 350cb58 aad4adb 23b908d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 |
import torch
import os
import streamlit as st
from TTS.api import TTS
# By using XTTS you agree to CPML license https://coqui.ai/cpml
os.environ["COQUI_TOS_AGREED"] = "1"
# Initialize model
model = "tts_models/multilingual/multi-dataset/xtts_v2"
device = 'cuda' if torch.cuda.is_available() else 'cpu'
tts = TTS(model).to(device)
def main():
# Title
title = f"""<h1 align="center" style="font-size: 2rem";>Voice Clone</h1>"""
st.markdown(title, unsafe_allow_html=True)
# Subtitle
title = f"""<h2 align="center" style="font-size: 1.5rem";>Make your favorite characters say anything!</h2>"""
st.markdown(title, unsafe_allow_html=True)
# Upload audio file
uploaded_file = st.file_uploader('Add an audio file of the voice you want to clone...', type=['wav'])
st.audio(uploaded_file, format='audio/wav')
# Input text
text_input = st.text_input('Enter the text to synthesize')
if st.button('Synthesize') and uploaded_file is not None and text_input is not "":
with st.spinner('Synthesizing...'):
output_audio = tts.tts_to_file(text=text_input, speaker_wav=uploaded_file, language='en')
st.audio(output_audio, format='audio/wav')
if __name__ == '__main__':
main()
|