###################################### imports ###################################### import torch from TTS.api import TTS import gradio as gr import os import spaces import yaml ###################################### utilities ###################################### def get_config(): # get config path config_path = os.environ["CONFIG_PATH"] # Parse the YAML file with open(config_path, 'r') as file: config = yaml.safe_load(file) return config def init_TTS(config): # Get device device = "cuda" if torch.cuda.is_available() else "cpu" # Initialize the TTS model tts = TTS(config['inference']['model']).to(device) return tts @spaces.GPU def generate_speech(microphone, text): # Generate speech using the provided text, speaker voice, and language tts.tts_to_file(text=text, file_path=config['inference']['file_path'], speaker_wav=config['inference']['speaker_wav'], language=config['inference']['language']) return config['inference']['file_path'] ###################################### main ###################################### def main(config): # Create the Gradio interface demo = gr.Interface( fn=generate_speech, inputs=[ audio_box = gr.Audio(label="Audio", sources="microphone", type="filepath", elem_id='audio'), gr.Textbox(label="Enter your text") ], outputs="audio", title="Voice Synthesis with Coqui-XTTS", description="Synthesize speech using predefined target voice and language." ) # Launch the interface demo.launch() return 0 ###################################### Execute ###################################### if __name__ == "__main__": # Get config config = get_config() # initialize TTS tts = init_TTS(config) main(config)