Spaces:

tykiww
/

TTS_Demo

Runtime error

File size: 2,378 Bytes

0f20c1d
4110922
d33f32d
 
4110922
6afc51e
3a4510f
 
4110922
0f20c1d
3a4510f
 
 
 
 
 
 
0f20c1d
4110922
 
3a4510f
4af5dfa
 
 
3a4510f
4af5dfa
4110922
0a5aead
 
486cbec
0a5aead
486cbec
 
 
 
 
 
d49cd75
0a5aead
3a4510f
dd46461
3a4510f
4049f7e
0a5aead
4af5dfa
0f20c1d
1b3c5cb
486cbec
5b06c99
 
486cbec
 
5b06c99
 
4af5dfa
 
c5ee403
 
 
486cbec
5b06c99
 
c5ee403
 
486cbec
 
c5ee403
4af5dfa
 
c5ee403
4af5dfa
 
0f20c1d
 
4af5dfa
3a4510f
 
1b3c5cb
3a4510f
 
1b3c5cb
 
 
4af5dfa
 
45fac78

###################################### imports ######################################
import torch
from TTS.api import TTS
import gradio as gr
import os
import spaces
import yaml


###################################### utilities ######################################
def get_config():
    # get config path
    config_path = os.environ["CONFIG_PATH"]
    # Parse the YAML file
    with open(config_path, 'r') as file:
        config = yaml.safe_load(file)
    
    return config


def init_TTS(config):
    # Get device
    device = "cuda" if torch.cuda.is_available() else "cpu"
    # Initialize the TTS model
    tts = TTS(config['inference']['model']).to(device)
    return tts


@spaces.GPU
def generate_speech(voice_choice, microphone, text):
    # Generate speech using the provided text, speaker voice, and language

    if voice_choice=="Record":
        speaker = microphone
    
    else:
        speaker = config['inference']['speaker_wav']
    
    tts.tts_to_file(text=text,
                    file_path=config['inference']['file_path'],
                    speaker_wav=speaker,
                    language=config['inference']['language'])
    return config['inference']['file_path']


###################################### main ######################################
def UI(config):

    # gradio elements
    voice_choice = gr.Radio(label="Record or skip to use predefined voice.", 
                            choices=["Record", "Predefined (Nancy)"], 
                            value="Record")
    microphone = gr.Audio(label="Audio", sources="microphone", type="filepath", elem_id='audio')
    enter_text = gr.Textbox(label="Enter your text")
    
    # Create the Gradio interface
    demo = gr.Interface(
        fn=generate_speech,
        inputs=[
            voice_choice,
            microphone,
            enter_text
        ],
        outputs="audio",
        title="Voice cloning and Synthesis with Coqui-XTTS",
        description="Clone your voice and Synthesize speech using predefined target voice and language."
    )

    # Launch the interface
    demo.launch()
    return 0


###################################### Execute ######################################
if __name__ == "__main__":
    # Get config
    config = get_config()
    
    # initialize TTS
    tts = init_TTS(config)
    
    # run program
    UI(config)