Spaces:
Running
on
Zero
Running
on
Zero
Support cache and expose API
Browse files
app.py
CHANGED
@@ -4,35 +4,72 @@ import alias
|
|
4 |
import outetts
|
5 |
import json
|
6 |
import tempfile
|
|
|
|
|
|
|
|
|
7 |
|
8 |
# Available OuteTTS models based on the documentation
|
9 |
MODELS = {v.value: v for _, v in outetts.Models.__members__.items()}
|
10 |
|
11 |
-
|
12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
model = MODELS[model_name]
|
14 |
|
15 |
# Configure the model
|
16 |
config = outetts.ModelConfig.auto_config(
|
17 |
model=model,
|
18 |
backend=outetts.Backend.LLAMACPP,
|
19 |
-
quantization=outetts.LlamaCppQuantization.
|
20 |
)
|
21 |
|
22 |
# Initialize the interface
|
23 |
interface = outetts.Interface(config=config)
|
24 |
return interface
|
25 |
|
26 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
"""Create speaker from audio and optionally generate test audio."""
|
28 |
if audio_file is None:
|
29 |
raise gr.Error("Please upload an audio file")
|
30 |
|
31 |
-
#
|
32 |
-
interface =
|
33 |
|
34 |
-
#
|
35 |
-
speaker = interface
|
36 |
|
37 |
# Convert speaker dict to formatted JSON
|
38 |
speaker_json = json.dumps(speaker, indent=2, ensure_ascii=False)
|
@@ -57,6 +94,8 @@ def create_speaker_and_generate(model_name, audio_file, test_text="", temperatur
|
|
57 |
|
58 |
return speaker_json, generated_audio
|
59 |
|
|
|
|
|
60 |
# Create the Gradio interface
|
61 |
demo = gr.Interface(
|
62 |
fn=create_speaker_and_generate,
|
@@ -76,7 +115,7 @@ demo = gr.Interface(
|
|
76 |
label="Test Text (Optional)",
|
77 |
placeholder="Enter text to generate speech (leave empty to only create speaker profile)...",
|
78 |
lines=3,
|
79 |
-
value=
|
80 |
),
|
81 |
gr.Slider(
|
82 |
minimum=0.1,
|
@@ -102,7 +141,18 @@ demo = gr.Interface(
|
|
102 |
title="🎙️ OuteTTS Speaker Creator",
|
103 |
description="Create and manage speaker profiles for OuteTTS text-to-speech synthesis. Upload audio to create a speaker profile, and optionally provide test text to generate sample audio.",
|
104 |
theme=gr.themes.Soft(),
|
105 |
-
examples=
|
|
|
|
|
|
|
106 |
)
|
107 |
|
108 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
import outetts
|
5 |
import json
|
6 |
import tempfile
|
7 |
+
import hashlib
|
8 |
+
import os
|
9 |
+
from functools import lru_cache
|
10 |
+
from typing import Optional
|
11 |
|
12 |
# Available OuteTTS models based on the documentation
|
13 |
MODELS = {v.value: v for _, v in outetts.Models.__members__.items()}
|
14 |
|
15 |
+
# Cache for speaker profiles to avoid re-transcribing the same audio
|
16 |
+
speaker_cache = {}
|
17 |
+
|
18 |
+
def get_file_hash(file_path):
|
19 |
+
"""Calculate MD5 hash of a file for caching purposes."""
|
20 |
+
hash_md5 = hashlib.md5()
|
21 |
+
with open(file_path, "rb") as f:
|
22 |
+
for chunk in iter(lambda: f.read(4096), b""):
|
23 |
+
hash_md5.update(chunk)
|
24 |
+
return hash_md5.hexdigest()
|
25 |
+
|
26 |
+
@lru_cache(maxsize=5)
|
27 |
+
def get_cached_interface(model_name: str):
|
28 |
+
"""Get cached interface instance for the model."""
|
29 |
model = MODELS[model_name]
|
30 |
|
31 |
# Configure the model
|
32 |
config = outetts.ModelConfig.auto_config(
|
33 |
model=model,
|
34 |
backend=outetts.Backend.LLAMACPP,
|
35 |
+
quantization=outetts.LlamaCppQuantization.Q6_K,
|
36 |
)
|
37 |
|
38 |
# Initialize the interface
|
39 |
interface = outetts.Interface(config=config)
|
40 |
return interface
|
41 |
|
42 |
+
def get_or_create_speaker(interface, audio_file, model_name):
|
43 |
+
"""Get speaker from cache or create new one if not cached."""
|
44 |
+
# Calculate file hash for caching
|
45 |
+
file_hash = get_file_hash(audio_file)
|
46 |
+
cache_key = f"{model_name}_{file_hash}"
|
47 |
+
|
48 |
+
# Check if speaker profile is already cached
|
49 |
+
if cache_key in speaker_cache:
|
50 |
+
print(f"✅ Using cached speaker profile for {os.path.basename(audio_file)}")
|
51 |
+
return speaker_cache[cache_key]
|
52 |
+
|
53 |
+
# Create new speaker profile
|
54 |
+
print(f"🔄 Creating new speaker profile for {os.path.basename(audio_file)}")
|
55 |
+
speaker = interface.create_speaker(audio_file, whisper_model="large-v3-turbo")
|
56 |
+
|
57 |
+
# Cache the speaker profile
|
58 |
+
speaker_cache[cache_key] = speaker
|
59 |
+
print(f"💾 Cached speaker profile ({len(speaker_cache)} total cached)")
|
60 |
+
|
61 |
+
return speaker
|
62 |
+
|
63 |
+
def create_speaker_and_generate(model_name, audio_file, test_text: Optional[str] = None, temperature: float = 0.4):
|
64 |
"""Create speaker from audio and optionally generate test audio."""
|
65 |
if audio_file is None:
|
66 |
raise gr.Error("Please upload an audio file")
|
67 |
|
68 |
+
# Get cached interface
|
69 |
+
interface = get_cached_interface(model_name)
|
70 |
|
71 |
+
# Get or create speaker profile (with caching)
|
72 |
+
speaker = get_or_create_speaker(interface, audio_file, model_name)
|
73 |
|
74 |
# Convert speaker dict to formatted JSON
|
75 |
speaker_json = json.dumps(speaker, indent=2, ensure_ascii=False)
|
|
|
94 |
|
95 |
return speaker_json, generated_audio
|
96 |
|
97 |
+
example_text = "Hello, this is a test of the OuteTTS speaker profile."
|
98 |
+
|
99 |
# Create the Gradio interface
|
100 |
demo = gr.Interface(
|
101 |
fn=create_speaker_and_generate,
|
|
|
115 |
label="Test Text (Optional)",
|
116 |
placeholder="Enter text to generate speech (leave empty to only create speaker profile)...",
|
117 |
lines=3,
|
118 |
+
value=None
|
119 |
),
|
120 |
gr.Slider(
|
121 |
minimum=0.1,
|
|
|
141 |
title="🎙️ OuteTTS Speaker Creator",
|
142 |
description="Create and manage speaker profiles for OuteTTS text-to-speech synthesis. Upload audio to create a speaker profile, and optionally provide test text to generate sample audio.",
|
143 |
theme=gr.themes.Soft(),
|
144 |
+
examples=[
|
145 |
+
["OuteTTS-1.0-0.6B", None, example_text, 0.2],
|
146 |
+
["OuteTTS-0.3-500M", None, example_text, 0.2],
|
147 |
+
]
|
148 |
)
|
149 |
|
150 |
+
if __name__ == "__main__":
|
151 |
+
# Launch with API enabled
|
152 |
+
demo.launch(
|
153 |
+
server_name="0.0.0.0", # Allow external connections
|
154 |
+
server_port=7860,
|
155 |
+
share=False, # Set to True if you want a public link
|
156 |
+
show_api=True, # Show API documentation
|
157 |
+
show_error=True # Show detailed error messages
|
158 |
+
)
|