File size: 2,393 Bytes
914c7b4
ff92946
370f126
ff92946
b308ed7
 
86b63a1
914c7b4
b308ed7
86b63a1
b308ed7
17da063
e937e1b
 
914c7b4
4fa54b8
 
 
914c7b4
854e562
914c7b4
b308ed7
 
914c7b4
86b63a1
914c7b4
 
b308ed7
 
914c7b4
b308ed7
 
914c7b4
b308ed7
 
ff92946
b308ed7
 
 
ff92946
b308ed7
 
 
 
 
854e562
b308ed7
ff92946
b308ed7
ff92946
33c11bc
914c7b4
 
 
 
 
854e562
 
b308ed7
914c7b4
 
 
4fa54b8
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import gradio as gr
import uuid
import os
from datetime import timedelta
from TTS.api import TTS
import locale
import torch

locale.getpreferredencoding = lambda: "UTF-8"
device = "cuda" if torch.cuda.is_available() else "cpu"

os.environ["COQUI_TOS_AGREED"] = "1"
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
tts = TTS("xtts_v2.0.2")

SECRET_USER = "admin_kr"
SECRET_PASSWORD = "9NLa2)597,J^"


async def generate_audio(text_input: str, creator: str, top_k_input: int) -> str:

    refer_voices = ''
    unique_id = str(uuid.uuid4())

    output_file = f'output.wav'
    match creator:
        case 'Roomie':
            refer_voices = ["./assets/roomie/roomie_emocionado_base_1.wav", "./assets/roomie/ref_12.wav", "./assets/roomie/ref_11.wav", "./assets/roomie/ref_10.wav",
                            "./assets/roomie/ref_1.wav", "./assets/roomie/ref_6.wav", "./assets/roomie/ref_7.wav", "./assets/roomie/ref_8.wav", "./assets/roomie/roomie_emocionado_base_2.wav",]
        case 'Xavy':
            refer_voices = ["./assets/xavy/neutro_3.wav",
                            "./assets/xavy/neutro_1.wav", "./assets/xavy/neutro_2.wav"]
        case 'Bella':
            refer_voices = ["./assets/bella/neutro_2.wav",
                            "./assets/bella/neutro_1.wav", "./assets/bella/neutro_3.wav"]

        case 'Julia':
            refer_voices = ["assets/julia/neutro_4_Final_fast.wav", "assets/julia/enfadado_1_Final.wav", "assets/julia/enfadado_2_Final.wav",
                            "assets/julia/enfadado_3_Final.wav", "assets/julia/emocionada_1.wav", "assets/julia/emocionada_2_Final.wav"]

    tts.tts_to_file(text=text_input,
                    file_path=output_file,
                    speaker_wav=refer_voices,
                    language="en",
                    split_sentences=True,
                    top_k=top_k_input
                    )

    source_audio_file_name = output_file

    return gr.Audio(value=source_audio_file_name)


app = gr.Interface(
    fn=generate_audio,
    inputs=[gr.Textbox(label='Text to Speach'), gr.Dropdown(
        ['Roomie', 'Xavy', 'Bella', 'Julia'], label="Coice your creator"),
        gr.Slider(0, 100, value=50, label='Emotion', info='Valores mas altos para tonos mas emocionado, valores bajos para tonos mas aburrido ')],

    outputs=['audio']
)

app.launch(auth=(SECRET_USER, SECRET_PASSWORD))