Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
HF Spaces API support
#30
by
Pendrokar
- opened
app.py
CHANGED
@@ -36,7 +36,54 @@ AVAILABLE_MODELS = {
|
|
36 |
'ElevenLabs': 'eleven',
|
37 |
'OpenVoice': 'openvoice',
|
38 |
'Pheme': 'pheme',
|
39 |
-
'MetaVoice': 'metavoice'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
}
|
41 |
|
42 |
SPACE_ID = os.getenv('SPACE_ID')
|
@@ -118,6 +165,7 @@ if not os.path.isfile(DB_PATH):
|
|
118 |
# Create DB table (if doesn't exist)
|
119 |
create_db_if_missing()
|
120 |
|
|
|
121 |
# Sync local DB with remote repo every 5 minute (only if a change is detected)
|
122 |
scheduler = CommitScheduler(
|
123 |
repo_id=DB_DATASET_ID,
|
@@ -133,7 +181,7 @@ scheduler = CommitScheduler(
|
|
133 |
####################################
|
134 |
# Router API
|
135 |
####################################
|
136 |
-
router = Client("TTS-AGI/tts-router", hf_token=
|
137 |
####################################
|
138 |
# Gradio app
|
139 |
####################################
|
@@ -291,6 +339,9 @@ model_licenses = {
|
|
291 |
'metavoice': 'Apache 2.0',
|
292 |
'elevenlabs': 'Proprietary',
|
293 |
'whisperspeech': 'MIT',
|
|
|
|
|
|
|
294 |
}
|
295 |
model_links = {
|
296 |
'styletts2': 'https://github.com/yl4579/StyleTTS2',
|
@@ -561,7 +612,44 @@ def synthandreturn(text):
|
|
561 |
def predict_and_update_result(text, model, result_storage):
|
562 |
try:
|
563 |
if model in AVAILABLE_MODELS:
|
564 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
565 |
else:
|
566 |
result = router.predict(text, model.lower(), api_name="/synthesize")
|
567 |
except:
|
@@ -593,6 +681,40 @@ def synthandreturn(text):
|
|
593 |
# doloudnorm(result)
|
594 |
# except:
|
595 |
# pass
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
596 |
results = {}
|
597 |
thread1 = threading.Thread(target=predict_and_update_result, args=(text, mdl1, results))
|
598 |
thread2 = threading.Thread(target=predict_and_update_result, args=(text, mdl2, results))
|
@@ -709,4 +831,4 @@ with gr.Blocks(theme=theme, css="footer {visibility: hidden}textbox{resize:none}
|
|
709 |
gr.Markdown(f"If you use this data in your publication, please cite us!\n\nCopy the BibTeX citation to cite this source:\n\n```bibtext\n{CITATION_TEXT}\n```\n\nPlease remember that all generated audio clips should be assumed unsuitable for redistribution or commercial use.")
|
710 |
|
711 |
|
712 |
-
demo.queue(api_open=False, default_concurrency_limit=40).launch(show_api=False)
|
|
|
36 |
'ElevenLabs': 'eleven',
|
37 |
'OpenVoice': 'openvoice',
|
38 |
'Pheme': 'pheme',
|
39 |
+
'MetaVoice': 'metavoice',
|
40 |
+
|
41 |
+
# '<Space>': <function>#<return-index-of-audio-param>
|
42 |
+
# 'coqui/xtts': '1#1',
|
43 |
+
# 'collabora/WhisperSpeech': '/whisper_speech_demo#0',
|
44 |
+
# 'myshell-ai/OpenVoice': '1#1',
|
45 |
+
# 'PolyAI/pheme': '/predict#0', #sleepy HF Space
|
46 |
+
# 'mrfakename/MetaVoice-1B-v0.1': '/tts#0',
|
47 |
+
|
48 |
+
# xVASynth (CPU)
|
49 |
+
'Pendrokar/xVASynth': '/predict#0',
|
50 |
+
|
51 |
+
# MeloTTS
|
52 |
+
# 'mrfakename/MeloTTS': '0#0', #API disabled
|
53 |
+
|
54 |
+
# CoquiTTS (CPU)
|
55 |
+
'coqui/CoquiTTS': '0#0',
|
56 |
+
|
57 |
+
# 'pytorch/Tacotron2': '0#0', #old gradio
|
58 |
+
}
|
59 |
+
|
60 |
+
OVERRIDE_INPUTS = {
|
61 |
+
'coqui/xtts': {
|
62 |
+
1: 'en',
|
63 |
+
2: 'https://cdn-uploads.huggingface.co/production/uploads/641de0213239b631552713e4/iKHHqWxWy6Zfmp6QP6CZZ.wav', # voice sample - Scarlett Johanson
|
64 |
+
3: 'https://cdn-uploads.huggingface.co/production/uploads/641de0213239b631552713e4/iKHHqWxWy6Zfmp6QP6CZZ.wav', # voice sample - Scarlett Johanson
|
65 |
+
4: False, #use_mic
|
66 |
+
5: False, #cleanup_reference
|
67 |
+
6: False, #auto_detect
|
68 |
+
},
|
69 |
+
'collabora/WhisperSpeech': {
|
70 |
+
1: 'https://cdn-uploads.huggingface.co/production/uploads/641de0213239b631552713e4/iKHHqWxWy6Zfmp6QP6CZZ.wav', # voice sample - Scarlett Johanson
|
71 |
+
2: 'https://cdn-uploads.huggingface.co/production/uploads/641de0213239b631552713e4/iKHHqWxWy6Zfmp6QP6CZZ.wav', # voice sample - Scarlett Johanson
|
72 |
+
3: 14.0, #Tempo - Gradio Slider issue: takes min. rather than value
|
73 |
+
},
|
74 |
+
'myshell-ai/OpenVoice': {
|
75 |
+
1: 'default', # style
|
76 |
+
2: 'https://cdn-uploads.huggingface.co/production/uploads/641de0213239b631552713e4/iKHHqWxWy6Zfmp6QP6CZZ.wav', # voice sample - Scarlett Johanson
|
77 |
+
},
|
78 |
+
'PolyAI/pheme': {
|
79 |
+
1: 'YOU1000000044_S0000798', # voice
|
80 |
+
2: 210,
|
81 |
+
3: 0.7, #Tempo - Gradio Slider issue: takes min. rather than value
|
82 |
+
},
|
83 |
+
'Pendrokar/xVASynth': {
|
84 |
+
1: 'ccby_nvidia_hifi_92_F', #fine-tuned voice model name
|
85 |
+
3: 1.0, #pacing/duration - Gradio Slider issue: takes min. rather than value
|
86 |
+
},
|
87 |
}
|
88 |
|
89 |
SPACE_ID = os.getenv('SPACE_ID')
|
|
|
165 |
# Create DB table (if doesn't exist)
|
166 |
create_db_if_missing()
|
167 |
|
168 |
+
hf_token = os.getenv('HF_TOKEN')
|
169 |
# Sync local DB with remote repo every 5 minute (only if a change is detected)
|
170 |
scheduler = CommitScheduler(
|
171 |
repo_id=DB_DATASET_ID,
|
|
|
181 |
####################################
|
182 |
# Router API
|
183 |
####################################
|
184 |
+
router = Client("TTS-AGI/tts-router", hf_token=hf_token)
|
185 |
####################################
|
186 |
# Gradio app
|
187 |
####################################
|
|
|
339 |
'metavoice': 'Apache 2.0',
|
340 |
'elevenlabs': 'Proprietary',
|
341 |
'whisperspeech': 'MIT',
|
342 |
+
|
343 |
+
'Pendrokar/xVASynth': 'GPT3',
|
344 |
+
'Pendrokar/xVASynthStreaming': 'GPT3',
|
345 |
}
|
346 |
model_links = {
|
347 |
'styletts2': 'https://github.com/yl4579/StyleTTS2',
|
|
|
612 |
def predict_and_update_result(text, model, result_storage):
|
613 |
try:
|
614 |
if model in AVAILABLE_MODELS:
|
615 |
+
if '/' in model:
|
616 |
+
# Use public HF Space
|
617 |
+
mdl_space = Client(model, hf_token=hf_token)
|
618 |
+
# assume the index is one of the first 9 return params
|
619 |
+
return_audio_index = int(AVAILABLE_MODELS[model][-1])
|
620 |
+
endpoints = mdl_space.view_api(all_endpoints=True, print_info=False, return_format='dict')
|
621 |
+
|
622 |
+
api_name = None
|
623 |
+
fn_index = None
|
624 |
+
# has named endpoint
|
625 |
+
if '/' == AVAILABLE_MODELS[model][:1]:
|
626 |
+
# assume the index is one of the first 9 params
|
627 |
+
api_name = AVAILABLE_MODELS[model][:-2]
|
628 |
+
|
629 |
+
space_inputs = _get_param_examples(
|
630 |
+
endpoints['named_endpoints'][api_name]['parameters']
|
631 |
+
)
|
632 |
+
# has unnamed endpoint
|
633 |
+
else:
|
634 |
+
# endpoint index is the first character
|
635 |
+
fn_index = int(AVAILABLE_MODELS[model][0])
|
636 |
+
|
637 |
+
space_inputs = _get_param_examples(
|
638 |
+
endpoints['unnamed_endpoints'][str(fn_index)]['parameters']
|
639 |
+
)
|
640 |
+
|
641 |
+
space_inputs = _override_params(space_inputs, model)
|
642 |
+
|
643 |
+
# force text
|
644 |
+
space_inputs[0] = text
|
645 |
+
|
646 |
+
results = mdl_space.predict(*space_inputs, api_name=api_name, fn_index=fn_index)
|
647 |
+
|
648 |
+
# return path to audio
|
649 |
+
result = results[return_audio_index] if (not isinstance(results, str)) else results
|
650 |
+
else:
|
651 |
+
# Use the private HF Space
|
652 |
+
result = router.predict(text, AVAILABLE_MODELS[model].lower(), api_name="/synthesize")
|
653 |
else:
|
654 |
result = router.predict(text, model.lower(), api_name="/synthesize")
|
655 |
except:
|
|
|
681 |
# doloudnorm(result)
|
682 |
# except:
|
683 |
# pass
|
684 |
+
|
685 |
+
def _get_param_examples(parameters):
|
686 |
+
example_inputs = []
|
687 |
+
for param_info in parameters:
|
688 |
+
if (
|
689 |
+
param_info['component'] == 'Radio'
|
690 |
+
or param_info['component'] == 'Dropdown'
|
691 |
+
or param_info['component'] == 'Audio'
|
692 |
+
or param_info['python_type']['type'] == 'str'
|
693 |
+
):
|
694 |
+
example_inputs.append(str(param_info['example_input']))
|
695 |
+
continue
|
696 |
+
if param_info['python_type']['type'] == 'int':
|
697 |
+
example_inputs.append(int(param_info['example_input']))
|
698 |
+
continue
|
699 |
+
if param_info['python_type']['type'] == 'float':
|
700 |
+
example_inputs.append(float(param_info['example_input']))
|
701 |
+
continue
|
702 |
+
if param_info['python_type']['type'] == 'bool':
|
703 |
+
example_inputs.append(bool(param_info['example_input']))
|
704 |
+
continue
|
705 |
+
|
706 |
+
return example_inputs
|
707 |
+
|
708 |
+
def _override_params(inputs, modelname):
|
709 |
+
try:
|
710 |
+
for key,value in OVERRIDE_INPUTS[modelname].items():
|
711 |
+
inputs[key] = value
|
712 |
+
print(f"Default inputs overridden for {modelname}")
|
713 |
+
except:
|
714 |
+
pass
|
715 |
+
|
716 |
+
return inputs
|
717 |
+
|
718 |
results = {}
|
719 |
thread1 = threading.Thread(target=predict_and_update_result, args=(text, mdl1, results))
|
720 |
thread2 = threading.Thread(target=predict_and_update_result, args=(text, mdl2, results))
|
|
|
831 |
gr.Markdown(f"If you use this data in your publication, please cite us!\n\nCopy the BibTeX citation to cite this source:\n\n```bibtext\n{CITATION_TEXT}\n```\n\nPlease remember that all generated audio clips should be assumed unsuitable for redistribution or commercial use.")
|
832 |
|
833 |
|
834 |
+
demo.queue(api_open=False, default_concurrency_limit=40).launch(show_api=False)
|