File size: 1,914 Bytes
0f20c1d
4110922
d33f32d
 
4110922
6afc51e
3a4510f
 
4110922
0f20c1d
3a4510f
 
 
 
 
 
 
0f20c1d
4110922
 
3a4510f
4af5dfa
 
 
3a4510f
4af5dfa
4110922
0a5aead
 
c5ee403
0a5aead
 
3a4510f
 
 
4049f7e
0a5aead
4af5dfa
0f20c1d
3a4510f
4af5dfa
 
c5ee403
 
 
 
 
 
 
 
 
 
4af5dfa
 
c5ee403
4af5dfa
 
0f20c1d
 
4af5dfa
3a4510f
 
 
 
 
4af5dfa
 
45fac78
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
###################################### imports ######################################
import torch
from TTS.api import TTS
import gradio as gr
import os
import spaces
import yaml


###################################### utilities ######################################
def get_config():
    # get config path
    config_path = os.environ["CONFIG_PATH"]
    # Parse the YAML file
    with open(config_path, 'r') as file:
        config = yaml.safe_load(file)
    
    return config


def init_TTS(config):
    # Get device
    device = "cuda" if torch.cuda.is_available() else "cpu"
    # Initialize the TTS model
    tts = TTS(config['inference']['model']).to(device)
    return tts


@spaces.GPU
def generate_speech(microphone, text):
    # Generate speech using the provided text, speaker voice, and language
    tts.tts_to_file(text=text,
                    file_path=config['inference']['file_path'],
                    speaker_wav=config['inference']['speaker_wav'],
                    language=config['inference']['language'])
    return config['inference']['file_path']


###################################### main ######################################
def main(config):
    
    # Create the Gradio interface
    demo = gr.Interface(
        fn=generate_speech,
        inputs=[
            audio_box = gr.Audio(label="Audio", sources="microphone", type="filepath", elem_id='audio'),
            gr.Textbox(label="Enter your text")
        ],
        outputs="audio",
        title="Voice Synthesis with Coqui-XTTS",
        description="Synthesize speech using predefined target voice and language."
    )

    # Launch the interface
    demo.launch()
    return 0


###################################### Execute ######################################
if __name__ == "__main__":
    # Get config
    config = get_config()
    # initialize TTS
    tts = init_TTS(config)
    main(config)