File size: 2,098 Bytes
914c7b4
ff92946
370f126
ff92946
b308ed7
 
86b63a1
914c7b4
 
b308ed7
86b63a1
b308ed7
17da063
e937e1b
 
914c7b4
 
ff92946
914c7b4
b308ed7
 
914c7b4
86b63a1
914c7b4
 
b308ed7
 
914c7b4
b308ed7
 
914c7b4
b308ed7
 
ff92946
b308ed7
 
 
ff92946
b308ed7
 
 
 
 
 
ff92946
b308ed7
ff92946
33c11bc
914c7b4
 
 
 
 
b308ed7
 
914c7b4
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import gradio as gr
import uuid
import os
from datetime import timedelta
from TTS.api import TTS
import locale
import torch


locale.getpreferredencoding = lambda: "UTF-8"
device = "cuda" if torch.cuda.is_available() else "cpu"

os.environ["COQUI_TOS_AGREED"] = "1"
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
tts = TTS("xtts_v2.0.2")


async def generate_audio(text_input: str, creator: str) -> str:

    refer_voices = ''
    unique_id = str(uuid.uuid4())

    output_file = f'output.wav'
    match creator:
        case 'Roomie':
            refer_voices = ["./assets/roomie/roomie_emocionado_base_1.wav", "./assets/roomie/ref_12.wav", "./assets/roomie/ref_11.wav", "./assets/roomie/ref_10.wav",
                            "./assets/roomie/ref_1.wav", "./assets/roomie/ref_6.wav", "./assets/roomie/ref_7.wav", "./assets/roomie/ref_8.wav", "./assets/roomie/roomie_emocionado_base_2.wav",]
        case 'Xavy':
            refer_voices = ["./assets/xavy/neutro_3.wav",
                            "./assets/xavy/neutro_1.wav", "./assets/xavy/neutro_2.wav"]
        case 'Bella':
            refer_voices = ["./assets/bella/neutro_2.wav",
                            "./assets/bella/neutro_1.wav", "./assets/bella/neutro_3.wav"]

        case 'Julia':
            refer_voices = ["assets/julia/neutro_4_Final_fast.wav", "assets/julia/enfadado_1_Final.wav", "assets/julia/enfadado_2_Final.wav",
                            "assets/julia/enfadado_3_Final.wav", "assets/julia/emocionada_1.wav", "assets/julia/emocionada_2_Final.wav"]

    tts.tts_to_file(text=text_input,
                    file_path=output_file,
                    speaker_wav=refer_voices,
                    language="en",
                    split_sentences=True,
                    )

    source_audio_file_name = output_file

    return gr.Audio(value=source_audio_file_name)


app = gr.Interface(
    fn=generate_audio,
    inputs=[gr.Textbox(label='Text to Speach'), gr.Dropdown(
        ['Roomie', 'Xavy', 'Bella', 'Julia'], label="Coice your creator")],

    outputs=['audio']
)

app.launch()