import torch from TTS.api import TTS import gradio as gr import os import spaces # Agree to Terms of service # os.environ["COQUI_TOS_AGREED"] = "1" def init_TTS(): # Get device device = "cuda" if torch.cuda.is_available() else "cpu" # Initialize the TTS model tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device) return tts @spaces.GPU def generate_speech(text): # Generate speech using the provided text, speaker voice, and language file_path = "output.wav" speaker_wav = "/content/speaker.wav" language = "en" tts.tts_to_file(text=text, file_path=file_path, speaker_wav=speaker_wav, language=language) return file_path def main(): # call init tts = init_TTS() # Create the Gradio interface interface = gr.Interface( fn=generate_speech, inputs=[ gr.Textbox(label="Enter your text") #gr.Textbox(label="Path to target speaker WAV file", value="/content/speaker.wav") #gr.Dropdown(label="Language", choices=["en"], value="en") ], outputs="audio", title="Voice Synthesis with Coqui-XTTS", description="Synthesize speech using predefined target voice and language." ) # Launch the interface interface.launch() return 0 if __name__ == "__main__": main()