hans00 commited on
Commit
92715cf
·
unverified ·
1 Parent(s): 48a33d2

Support cache and expose API

Browse files
Files changed (1) hide show
  1. app.py +61 -11
app.py CHANGED
@@ -4,35 +4,72 @@ import alias
4
  import outetts
5
  import json
6
  import tempfile
 
 
 
 
7
 
8
  # Available OuteTTS models based on the documentation
9
  MODELS = {v.value: v for _, v in outetts.Models.__members__.items()}
10
 
11
- def initialize_interface(model_name: str):
12
- """Initialize the OuteTTS interface with selected model."""
 
 
 
 
 
 
 
 
 
 
 
 
13
  model = MODELS[model_name]
14
 
15
  # Configure the model
16
  config = outetts.ModelConfig.auto_config(
17
  model=model,
18
  backend=outetts.Backend.LLAMACPP,
19
- quantization=outetts.LlamaCppQuantization.Q5_0,
20
  )
21
 
22
  # Initialize the interface
23
  interface = outetts.Interface(config=config)
24
  return interface
25
 
26
- def create_speaker_and_generate(model_name, audio_file, test_text="", temperature=0.4):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  """Create speaker from audio and optionally generate test audio."""
28
  if audio_file is None:
29
  raise gr.Error("Please upload an audio file")
30
 
31
- # Initialize model
32
- interface = initialize_interface(model_name)
33
 
34
- # Create speaker profile from audio
35
- speaker = interface.create_speaker(audio_file, whisper_model="large-v3-turbo")
36
 
37
  # Convert speaker dict to formatted JSON
38
  speaker_json = json.dumps(speaker, indent=2, ensure_ascii=False)
@@ -57,6 +94,8 @@ def create_speaker_and_generate(model_name, audio_file, test_text="", temperatur
57
 
58
  return speaker_json, generated_audio
59
 
 
 
60
  # Create the Gradio interface
61
  demo = gr.Interface(
62
  fn=create_speaker_and_generate,
@@ -76,7 +115,7 @@ demo = gr.Interface(
76
  label="Test Text (Optional)",
77
  placeholder="Enter text to generate speech (leave empty to only create speaker profile)...",
78
  lines=3,
79
- value="Hello, this is a test of the OuteTTS speaker profile."
80
  ),
81
  gr.Slider(
82
  minimum=0.1,
@@ -102,7 +141,18 @@ demo = gr.Interface(
102
  title="🎙️ OuteTTS Speaker Creator",
103
  description="Create and manage speaker profiles for OuteTTS text-to-speech synthesis. Upload audio to create a speaker profile, and optionally provide test text to generate sample audio.",
104
  theme=gr.themes.Soft(),
105
- examples=None
 
 
 
106
  )
107
 
108
- demo.launch()
 
 
 
 
 
 
 
 
 
4
  import outetts
5
  import json
6
  import tempfile
7
+ import hashlib
8
+ import os
9
+ from functools import lru_cache
10
+ from typing import Optional
11
 
12
  # Available OuteTTS models based on the documentation
13
  MODELS = {v.value: v for _, v in outetts.Models.__members__.items()}
14
 
15
+ # Cache for speaker profiles to avoid re-transcribing the same audio
16
+ speaker_cache = {}
17
+
18
+ def get_file_hash(file_path):
19
+ """Calculate MD5 hash of a file for caching purposes."""
20
+ hash_md5 = hashlib.md5()
21
+ with open(file_path, "rb") as f:
22
+ for chunk in iter(lambda: f.read(4096), b""):
23
+ hash_md5.update(chunk)
24
+ return hash_md5.hexdigest()
25
+
26
+ @lru_cache(maxsize=5)
27
+ def get_cached_interface(model_name: str):
28
+ """Get cached interface instance for the model."""
29
  model = MODELS[model_name]
30
 
31
  # Configure the model
32
  config = outetts.ModelConfig.auto_config(
33
  model=model,
34
  backend=outetts.Backend.LLAMACPP,
35
+ quantization=outetts.LlamaCppQuantization.Q6_K,
36
  )
37
 
38
  # Initialize the interface
39
  interface = outetts.Interface(config=config)
40
  return interface
41
 
42
+ def get_or_create_speaker(interface, audio_file, model_name):
43
+ """Get speaker from cache or create new one if not cached."""
44
+ # Calculate file hash for caching
45
+ file_hash = get_file_hash(audio_file)
46
+ cache_key = f"{model_name}_{file_hash}"
47
+
48
+ # Check if speaker profile is already cached
49
+ if cache_key in speaker_cache:
50
+ print(f"✅ Using cached speaker profile for {os.path.basename(audio_file)}")
51
+ return speaker_cache[cache_key]
52
+
53
+ # Create new speaker profile
54
+ print(f"🔄 Creating new speaker profile for {os.path.basename(audio_file)}")
55
+ speaker = interface.create_speaker(audio_file, whisper_model="large-v3-turbo")
56
+
57
+ # Cache the speaker profile
58
+ speaker_cache[cache_key] = speaker
59
+ print(f"💾 Cached speaker profile ({len(speaker_cache)} total cached)")
60
+
61
+ return speaker
62
+
63
+ def create_speaker_and_generate(model_name, audio_file, test_text: Optional[str] = None, temperature: float = 0.4):
64
  """Create speaker from audio and optionally generate test audio."""
65
  if audio_file is None:
66
  raise gr.Error("Please upload an audio file")
67
 
68
+ # Get cached interface
69
+ interface = get_cached_interface(model_name)
70
 
71
+ # Get or create speaker profile (with caching)
72
+ speaker = get_or_create_speaker(interface, audio_file, model_name)
73
 
74
  # Convert speaker dict to formatted JSON
75
  speaker_json = json.dumps(speaker, indent=2, ensure_ascii=False)
 
94
 
95
  return speaker_json, generated_audio
96
 
97
+ example_text = "Hello, this is a test of the OuteTTS speaker profile."
98
+
99
  # Create the Gradio interface
100
  demo = gr.Interface(
101
  fn=create_speaker_and_generate,
 
115
  label="Test Text (Optional)",
116
  placeholder="Enter text to generate speech (leave empty to only create speaker profile)...",
117
  lines=3,
118
+ value=None
119
  ),
120
  gr.Slider(
121
  minimum=0.1,
 
141
  title="🎙️ OuteTTS Speaker Creator",
142
  description="Create and manage speaker profiles for OuteTTS text-to-speech synthesis. Upload audio to create a speaker profile, and optionally provide test text to generate sample audio.",
143
  theme=gr.themes.Soft(),
144
+ examples=[
145
+ ["OuteTTS-1.0-0.6B", None, example_text, 0.2],
146
+ ["OuteTTS-0.3-500M", None, example_text, 0.2],
147
+ ]
148
  )
149
 
150
+ if __name__ == "__main__":
151
+ # Launch with API enabled
152
+ demo.launch(
153
+ server_name="0.0.0.0", # Allow external connections
154
+ server_port=7860,
155
+ share=False, # Set to True if you want a public link
156
+ show_api=True, # Show API documentation
157
+ show_error=True # Show detailed error messages
158
+ )