import sys import os # By using XTTS you agree to CPML license https://coqui.ai/cpml os.environ["COQUI_TOS_AGREED"] = "1" import gradio as gr from TTS.api import TTS model_names = TTS().list_models() m = model_names[1] tts = TTS(m, gpu=False) tts.to("cpu") #tts.to("cuda") # cuda only def predict(prompt, language, audio_file_pth, mic_file_path, use_mic, agree): if agree == True: if use_mic == True: if mic_file_path is not None: speaker_wav=mic_file_path else: gr.Warning("Please record your voice with Microphone, or uncheck Use Microphone to use reference audios") return ( None, None, ) else: speaker_wav=audio_file_pth if len(prompt)<2: gr.Warning("Please give a longer prompt text") return ( None, None, ) if len(prompt)>10000: gr.Warning("Text length limited to 10000 characters for this demo, please try shorter text") return ( None, None, ) try: if language == "fr": if m.find("your") != -1: language = "fr-fr" if m.find("/fr/") != -1: language = None tts.tts_to_file( text=prompt, file_path="output.wav", speaker_wav=speaker_wav, language=language ) except RuntimeError as e : if "device-assert" in str(e): # cannot do anything on cuda device side error, need tor estart gr.Warning("Unhandled Exception encounter, please retry in a minute") print("Cuda device-assert Runtime encountered need restart") sys.exit("Exit due to cuda device-assert") else: raise e return ( gr.make_waveform( audio="output.wav", ), "output.wav", ) else: gr.Warning("Please accept the Terms & Condition!") return ( None, None, ) title = "Voice Clone" gr.Interface( fn=predict, inputs=[ gr.Textbox( label="Text Prompt", info="One or two sentences at a time is better", value="Hello, World !, here is an example of light voice cloning. Try to upload your best audio samples quality", ), gr.Dropdown( label="Language", info="Select an output language for the synthesised speech", choices=[ "en", "es", "fr", "de", "it", "pt", "pl", "tr", "ru", "nl", "cs", "ar", "zh-cn", ], max_choices=1, value="en", ), gr.Audio( label="Reference Audio", info="Click on the ✎ button to upload your own target speaker audio", type="filepath", value="examples/female.wav", ), gr.Audio(source="microphone", type="filepath", info="Use your microphone to record audio", label="Use Microphone for Reference"), gr.Checkbox(label="Check to use Microphone as Reference", value=False, info="Notice: Microphone input may not work properly under traffic",), gr.Checkbox( label="Agree", value=True, info="I agree to the terms of the Coqui Public Model License at https://coqui.ai/cpml", ), ], outputs=[ gr.Video(label="Waveform Visual"), gr.Audio(label="Synthesised Audio"), ], title=title, ).queue().launch(debug=True)