Spaces:
Runtime error
Runtime error
File size: 4,097 Bytes
964c670 fc1a544 964c670 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 |
import sys
import os
# By using XTTS you agree to CPML license https://coqui.ai/cpml
os.environ["COQUI_TOS_AGREED"] = "1"
import gradio as gr
from TTS.api import TTS
model_names = TTS().list_models()
m = model_names[0]
tts = TTS(m, gpu=False)
tts.to("cpu") # no GPU or Amd
#tts.to("cuda") # cuda only
def predict(prompt, language, audio_file_pth, mic_file_path, use_mic, agree):
if agree == True:
if use_mic == True:
if mic_file_path is not None:
speaker_wav=mic_file_path
else:
gr.Warning("Please record your voice with Microphone, or uncheck Use Microphone to use reference audios")
return (
None,
None,
)
else:
speaker_wav=audio_file_pth
if len(prompt)<2:
gr.Warning("Please give a longer prompt text")
return (
None,
None,
)
if len(prompt)>10000:
gr.Warning("Text length limited to 10000 characters for this demo, please try shorter text")
return (
None,
None,
)
try:
if language == "fr":
if m.find("your") != -1:
language = "fr-fr"
if m.find("/fr/") != -1:
language = None
tts.tts_to_file(
text=prompt,
file_path="output.wav",
speaker_wav=speaker_wav,
language=language
)
except RuntimeError as e :
if "device-assert" in str(e):
# cannot do anything on cuda device side error, need tor estart
gr.Warning("Unhandled Exception encounter, please retry in a minute")
print("Cuda device-assert Runtime encountered need restart")
sys.exit("Exit due to cuda device-assert")
else:
raise e
return (
gr.make_waveform(
audio="output.wav",
),
"output.wav",
)
else:
gr.Warning("Please accept the Terms & Condition!")
return (
None,
None,
)
title = "Voice Clone"
gr.Interface(
fn=predict,
inputs=[
gr.Textbox(
label="Text Prompt",
info="One or two sentences at a time is better",
value="Hello, World !, here is an example of light voice cloning. Try to upload your best audio samples quality",
),
gr.Dropdown(
label="Language",
info="Select an output language for the synthesised speech",
choices=[
"en",
"es",
"fr",
"de",
"it",
"pt",
"pl",
"tr",
"ru",
"nl",
"cs",
"ar",
"zh-cn",
],
max_choices=1,
value="en",
),
gr.Audio(
label="Reference Audio",
info="Click on the ✎ button to upload your own target speaker audio",
type="filepath",
value="examples/female.wav",
),
gr.Audio(source="microphone",
type="filepath",
info="Use your microphone to record audio",
label="Use Microphone for Reference"),
gr.Checkbox(label="Check to use Microphone as Reference",
value=False,
info="Notice: Microphone input may not work properly under traffic",),
gr.Checkbox(
label="Agree",
value=True,
info="I agree to the terms of the Coqui Public Model License at https://coqui.ai/cpml",
),
],
outputs=[
gr.Video(label="Waveform Visual"),
gr.Audio(label="Synthesised Audio"),
],
title=title,
).queue().launch(debug=True) |