###################################### imports ###################################### import torch from TTS.api import TTS import gradio as gr import os import spaces import yaml ###################################### utilities ###################################### def get_config(): # get config path config_path = os.environ["CONFIG_PATH"] # Parse the YAML file with open(config_path, 'r') as file: config = yaml.safe_load(file) return config def init_TTS(config): # Get device device = "cuda" if torch.cuda.is_available() else "cpu" # Initialize the TTS model tts = TTS(config['inference']['model']).to(device) return tts @spaces.GPU def generate_speech(text): # Generate speech using the provided text, speaker voice, and language tts.tts_to_file(text=text, file_path=config['inference']['file_path'], speaker_wav=config['inference']['speaker_wav'], language=config['inference']['language']) return config['inference']['file_path'] ###################################### main ###################################### def main(config): # Create the Gradio interface #demo = gr.Interface( # fn=generate_speech, # inputs=[ # gr.Textbox(label="Enter your text") # ], # outputs="audio", # title="Voice Synthesis with Coqui-XTTS", # description="Synthesize speech using predefined target voice and language." #) # Launch the interface #demo.launch() #return 0 with gr.Blocks() as demo: gr.Markdown("# Voice Synthesis with Coqui-XTTS") gr.Markdown("Synthesize speech using predefined target voice and language") textbox = gr.Textbox(label="Enter your text") tune_btn = gr.Button("Start Fine Tuning") tune_btn.click(fn=generate_speech, inputs=[ textbox ], outputs="audio") demo.launch() return 0 ###################################### Execute ###################################### if __name__ == "__main__": # Get config config = get_config() # initialize TTS tts = init_TTS(config) main(config)