Spaces:
Running
on
Zero
Running
on
Zero
local Edge TTS
Browse files- app/models.py +2 -0
- app/synth.py +7 -1
- requirements.txt +1 -0
app/models.py
CHANGED
@@ -52,6 +52,7 @@ AVAILABLE_MODELS = {
|
|
52 |
|
53 |
# # Microsoft Edge TTS
|
54 |
# 'innoai/Edge-TTS-Text-to-Speech': 'innoai/Edge-TTS-Text-to-Speech', # API disabled
|
|
|
55 |
|
56 |
# IMS-Toucan
|
57 |
# 'Flux9665/MassivelyMultilingualTTS': 'Flux9665/MassivelyMultilingualTTS', # 5.1
|
@@ -310,6 +311,7 @@ HF_SPACES = {
|
|
310 |
'return_audio_index': 0,
|
311 |
'is_zero_gpu_space': False,
|
312 |
'series': 'Kokoro',
|
|
|
313 |
},
|
314 |
|
315 |
# StyleTTS Kokoro v0.23
|
|
|
52 |
|
53 |
# # Microsoft Edge TTS
|
54 |
# 'innoai/Edge-TTS-Text-to-Speech': 'innoai/Edge-TTS-Text-to-Speech', # API disabled
|
55 |
+
'innoai/Edge-TTS-Text-to-Speech': '/Edge-TTS', # using Edge API
|
56 |
|
57 |
# IMS-Toucan
|
58 |
# 'Flux9665/MassivelyMultilingualTTS': 'Flux9665/MassivelyMultilingualTTS', # 5.1
|
|
|
311 |
'return_audio_index': 0,
|
312 |
'is_zero_gpu_space': False,
|
313 |
'series': 'Kokoro',
|
314 |
+
'space_link': 'Remsky/Kokoro-TTS-Zero', # still supports v0.19
|
315 |
},
|
316 |
|
317 |
# StyleTTS Kokoro v0.23
|
app/synth.py
CHANGED
@@ -80,7 +80,12 @@ def synthandreturn(text, autoplay, request: gr.Request):
|
|
80 |
while attempt_count < max_attempts:
|
81 |
try:
|
82 |
if model in AVAILABLE_MODELS:
|
83 |
-
if '/'
|
|
|
|
|
|
|
|
|
|
|
84 |
# Use public HF Space
|
85 |
# if (model not in hf_clients):
|
86 |
# #save client to local variable; can timeout
|
@@ -169,6 +174,7 @@ def synthandreturn(text, autoplay, request: gr.Request):
|
|
169 |
else:
|
170 |
print('Done with', model)
|
171 |
|
|
|
172 |
try:
|
173 |
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as f:
|
174 |
audio = AudioSegment.from_file(result)
|
|
|
80 |
while attempt_count < max_attempts:
|
81 |
try:
|
82 |
if model in AVAILABLE_MODELS:
|
83 |
+
if '/' == AVAILABLE_MODELS[model][0]:
|
84 |
+
# local model
|
85 |
+
# just Edge TTS API
|
86 |
+
from .tts.edge import edge_text_to_speech
|
87 |
+
result = edge_text_to_speech(text, 'en-US-EmmaMultilingualNeural - en-US (Female)')
|
88 |
+
elif '/' in model:
|
89 |
# Use public HF Space
|
90 |
# if (model not in hf_clients):
|
91 |
# #save client to local variable; can timeout
|
|
|
174 |
else:
|
175 |
print('Done with', model)
|
176 |
|
177 |
+
# Resample to 24kHz
|
178 |
try:
|
179 |
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as f:
|
180 |
audio = AudioSegment.from_file(result)
|
requirements.txt
CHANGED
@@ -6,3 +6,4 @@ git+https://github.com/unitaryai/detoxify
|
|
6 |
pyloudnorm
|
7 |
langdetect
|
8 |
pydub
|
|
|
|
6 |
pyloudnorm
|
7 |
langdetect
|
8 |
pydub
|
9 |
+
edge_tts==6.1.12
|