voice_generator / app.py
mrestrepo's picture
Acept tts request by default
370f126
raw
history blame
2.04 kB
import gradio as gr
import uuid
import os
from datetime import timedelta
from TTS.api import TTS
import locale
locale.getpreferredencoding = lambda: "UTF-8"
os.environ["COQUI_STUDIO_TOS"] = "Y"
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2", gpu=True)
tts = TTS("xtts_v2.0.2", gpu=True)
async def generate_audio(text_input: str, creator: str) -> str:
refer_voices = ''
unique_id = str(uuid.uuid4())
output_file = f'{unique_id}.wav'
match creator:
case 'Roomie':
refer_voices = ["./assets/roomie/roomie_emocionado_base_1.wav", "./assets/roomie/ref_12.wav", "./assets/roomie/ref_11.wav", "./assets/roomie/ref_10.wav",
"./assets/roomie/ref_1.wav", "./assets/roomie/ref_6.wav", "./assets/roomie/ref_7.wav", "./assets/roomie/ref_8.wav", "./assets/roomie/roomie_emocionado_base_2.wav",]
case 'Xavy':
refer_voices = ["./assets/xavy/neutro_3.wav",
"./assets/xavy/neutro_1.wav", "./assets/xavy/neutro_2.wav"]
case 'Bella':
refer_voices = ["./assets/bella/neutro_2.wav",
"./assets/bella/neutro_1.wav", "./assets/bella/neutro_3.wav"]
case 'Julia':
refer_voices = ["assets/julia/neutro_4_Final_fast.wav", "assets/julia/enfadado_1_Final.wav", "assets/julia/enfadado_2_Final.wav",
"assets/julia/enfadado_3_Final.wav", "assets/julia/emocionada_1.wav", "assets/julia/emocionada_2_Final.wav"]
tts.tts_to_file(text=text_input,
file_path=output_file,
speaker_wav=refer_voices,
language="en",
split_sentences=True,
)
source_audio_file_name = output_file
return gr.Audio(value=source_audio_file_name)
app = gr.Interface(
fn=generate_audio,
inputs=[gr.Textbox(label='Text to Speach'), gr.Dropdown(
['Roomie', 'Xavy', 'Bella', 'Julia'], label="Coice your creator")],
outputs=['audio']
)
app.launch()