Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,10 +1,12 @@
|
|
1 |
import sys
|
2 |
-
import os,stat
|
3 |
import subprocess
|
4 |
import random
|
5 |
from zipfile import ZipFile
|
6 |
import uuid
|
7 |
|
|
|
|
|
8 |
# By using XTTS you agree to CPML license https://coqui.ai/cpml
|
9 |
os.environ["COQUI_TOS_AGREED"] = "1"
|
10 |
|
@@ -13,9 +15,18 @@ os.environ["COQUI_TOS_AGREED"] = "1"
|
|
13 |
import langid
|
14 |
|
15 |
import gradio as gr
|
|
|
|
|
|
|
16 |
from TTS.api import TTS
|
|
|
|
|
|
|
|
|
17 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
|
|
18 |
from huggingface_hub import HfApi
|
|
|
19 |
# will use api to restart space on a unrecoverable error
|
20 |
api = HfApi(token=HF_TOKEN)
|
21 |
repo_id = "coqui/xtts"
|
@@ -29,8 +40,19 @@ os.chmod('ffmpeg', st.st_mode | stat.S_IEXEC)
|
|
29 |
|
30 |
# Load TTS
|
31 |
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v1")
|
32 |
-
tts.to("cuda")
|
33 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
|
35 |
# This is for debugging purposes only
|
36 |
DEVICE_ASSERT_DETECTED=0
|
@@ -143,14 +165,24 @@ def predict(prompt, language, audio_file_pth, mic_file_path, use_mic, voice_clea
|
|
143 |
global DEVICE_ASSERT_LANG
|
144 |
#It will likely never come here as we restart space on first unrecoverable error now
|
145 |
print(f"Unrecoverable exception caused by language:{DEVICE_ASSERT_LANG} prompt:{DEVICE_ASSERT_PROMPT}")
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
154 |
except RuntimeError as e :
|
155 |
if "device-side assert" in str(e):
|
156 |
# cannot do anything on cuda device side error, need tor estart
|
@@ -168,13 +200,6 @@ def predict(prompt, language, audio_file_pth, mic_file_path, use_mic, voice_clea
|
|
168 |
else:
|
169 |
print("RuntimeError: non device-side assert error:", str(e))
|
170 |
raise e
|
171 |
-
return (
|
172 |
-
gr.make_waveform(
|
173 |
-
audio="output.wav",
|
174 |
-
),
|
175 |
-
"output.wav",
|
176 |
-
speaker_wav,
|
177 |
-
)
|
178 |
else:
|
179 |
gr.Warning("Please accept the Terms & Condition!")
|
180 |
return (
|
|
|
1 |
import sys
|
2 |
+
import io, os, stat
|
3 |
import subprocess
|
4 |
import random
|
5 |
from zipfile import ZipFile
|
6 |
import uuid
|
7 |
|
8 |
+
import torch
|
9 |
+
import torchaudio
|
10 |
# By using XTTS you agree to CPML license https://coqui.ai/cpml
|
11 |
os.environ["COQUI_TOS_AGREED"] = "1"
|
12 |
|
|
|
15 |
import langid
|
16 |
|
17 |
import gradio as gr
|
18 |
+
from scipy.io.wavfile import write
|
19 |
+
from pydub import AudioSegment
|
20 |
+
|
21 |
from TTS.api import TTS
|
22 |
+
from TTS.tts.configs.xtts_config import XttsConfig
|
23 |
+
from TTS.tts.models.xtts import Xtts
|
24 |
+
from TTS.utils.generic_utils import get_user_data_dir
|
25 |
+
|
26 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
27 |
+
|
28 |
from huggingface_hub import HfApi
|
29 |
+
|
30 |
# will use api to restart space on a unrecoverable error
|
31 |
api = HfApi(token=HF_TOKEN)
|
32 |
repo_id = "coqui/xtts"
|
|
|
40 |
|
41 |
# Load TTS
|
42 |
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v1")
|
|
|
43 |
|
44 |
+
model_path = os.path.join(get_user_data_dir("tts"), "tts_models--multilingual--multi-dataset--xtts_v1")
|
45 |
+
config = XttsConfig()
|
46 |
+
config.load_json(os.path.join(model_path, "config.json"))
|
47 |
+
model = Xtts.init_from_config(config)
|
48 |
+
model.load_checkpoint(
|
49 |
+
config,
|
50 |
+
checkpoint_path=os.path.join(model_path, "model.pth"),
|
51 |
+
vocab_path=os.path.join(model_path, "vocab.json"),
|
52 |
+
eval=True,
|
53 |
+
use_deepspeed=True
|
54 |
+
)
|
55 |
+
model.cuda()
|
56 |
|
57 |
# This is for debugging purposes only
|
58 |
DEVICE_ASSERT_DETECTED=0
|
|
|
165 |
global DEVICE_ASSERT_LANG
|
166 |
#It will likely never come here as we restart space on first unrecoverable error now
|
167 |
print(f"Unrecoverable exception caused by language:{DEVICE_ASSERT_LANG} prompt:{DEVICE_ASSERT_PROMPT}")
|
168 |
+
|
169 |
+
gpt_cond_latent, _, speaker_embedding = model.get_conditioning_latents(audio_path=speaker_wav)
|
170 |
+
wav_chunks = []
|
171 |
+
|
172 |
+
chunks = model.inference_stream(
|
173 |
+
prompt,
|
174 |
+
language,
|
175 |
+
gpt_cond_latent,
|
176 |
+
speaker_embedding,)
|
177 |
+
try:
|
178 |
+
|
179 |
+
for i, chunk in enumerate(chunks):
|
180 |
+
print(f"Received chunk {i} of audio length {chunk.shape[-1]}")
|
181 |
+
out_file = f'{i}.wav'
|
182 |
+
write(out_file, 24000, chunk.detach().cpu().numpy().squeeze())
|
183 |
+
audio = AudioSegment.from_file(out_file)
|
184 |
+
audio.export(out_file, format='wav')
|
185 |
+
yield (gr.make_waveform(audio=out_file),out_file, speaker_wav)
|
186 |
except RuntimeError as e :
|
187 |
if "device-side assert" in str(e):
|
188 |
# cannot do anything on cuda device side error, need tor estart
|
|
|
200 |
else:
|
201 |
print("RuntimeError: non device-side assert error:", str(e))
|
202 |
raise e
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
203 |
else:
|
204 |
gr.Warning("Please accept the Terms & Condition!")
|
205 |
return (
|