Spaces:

BricksDisplay
/

OuteTTS-Speaker-Creator

Running on Zero

App Files Files Community

hans00 commited on 17 days ago

Commit

92715cf

unverified ·

1 Parent(s): 48a33d2

Support cache and expose API

Browse files

Files changed (1) hide show

app.py +61 -11

app.py CHANGED Viewed

@@ -4,35 +4,72 @@ import alias
 import outetts
 import json
 import tempfile
 # Available OuteTTS models based on the documentation
 MODELS = {v.value: v for _, v in outetts.Models.__members__.items()}
-def initialize_interface(model_name: str):
-    """Initialize the OuteTTS interface with selected model."""
     model = MODELS[model_name]
     # Configure the model
     config = outetts.ModelConfig.auto_config(
         model=model,
         backend=outetts.Backend.LLAMACPP,
-        quantization=outetts.LlamaCppQuantization.Q5_0,
     )
     # Initialize the interface
     interface = outetts.Interface(config=config)
     return interface
-def create_speaker_and_generate(model_name, audio_file, test_text="", temperature=0.4):
     """Create speaker from audio and optionally generate test audio."""
     if audio_file is None:
         raise gr.Error("Please upload an audio file")
-    # Initialize model
-    interface = initialize_interface(model_name)
-    # Create speaker profile from audio
-    speaker = interface.create_speaker(audio_file, whisper_model="large-v3-turbo")
     # Convert speaker dict to formatted JSON
     speaker_json = json.dumps(speaker, indent=2, ensure_ascii=False)
@@ -57,6 +94,8 @@ def create_speaker_and_generate(model_name, audio_file, test_text="", temperatur
     return speaker_json, generated_audio
 # Create the Gradio interface
 demo = gr.Interface(
     fn=create_speaker_and_generate,
@@ -76,7 +115,7 @@ demo = gr.Interface(
             label="Test Text (Optional)",
             placeholder="Enter text to generate speech (leave empty to only create speaker profile)...",
             lines=3,
-            value="Hello, this is a test of the OuteTTS speaker profile."
         ),
         gr.Slider(
             minimum=0.1,
@@ -102,7 +141,18 @@ demo = gr.Interface(
     title="🎙️ OuteTTS Speaker Creator",
     description="Create and manage speaker profiles for OuteTTS text-to-speech synthesis. Upload audio to create a speaker profile, and optionally provide test text to generate sample audio.",
     theme=gr.themes.Soft(),
-    examples=None
 )
-demo.launch()

 import outetts
 import json
 import tempfile
+import hashlib
+import os
+from functools import lru_cache
+from typing import Optional
 # Available OuteTTS models based on the documentation
 MODELS = {v.value: v for _, v in outetts.Models.__members__.items()}
+# Cache for speaker profiles to avoid re-transcribing the same audio
+speaker_cache = {}
+def get_file_hash(file_path):
+    """Calculate MD5 hash of a file for caching purposes."""
+    hash_md5 = hashlib.md5()
+    with open(file_path, "rb") as f:
+        for chunk in iter(lambda: f.read(4096), b""):
+            hash_md5.update(chunk)
+    return hash_md5.hexdigest()
+@lru_cache(maxsize=5)
+def get_cached_interface(model_name: str):
+    """Get cached interface instance for the model."""
     model = MODELS[model_name]
     # Configure the model
     config = outetts.ModelConfig.auto_config(
         model=model,
         backend=outetts.Backend.LLAMACPP,
+        quantization=outetts.LlamaCppQuantization.Q6_K,
     )
     # Initialize the interface
     interface = outetts.Interface(config=config)
     return interface
+def get_or_create_speaker(interface, audio_file, model_name):
+    """Get speaker from cache or create new one if not cached."""
+    # Calculate file hash for caching
+    file_hash = get_file_hash(audio_file)
+    cache_key = f"{model_name}_{file_hash}"
+    # Check if speaker profile is already cached
+    if cache_key in speaker_cache:
+        print(f"✅ Using cached speaker profile for {os.path.basename(audio_file)}")
+        return speaker_cache[cache_key]
+    # Create new speaker profile
+    print(f"🔄 Creating new speaker profile for {os.path.basename(audio_file)}")
+    speaker = interface.create_speaker(audio_file, whisper_model="large-v3-turbo")
+    # Cache the speaker profile
+    speaker_cache[cache_key] = speaker
+    print(f"💾 Cached speaker profile ({len(speaker_cache)} total cached)")
+    return speaker
+def create_speaker_and_generate(model_name, audio_file, test_text: Optional[str] = None, temperature: float = 0.4):
     """Create speaker from audio and optionally generate test audio."""
     if audio_file is None:
         raise gr.Error("Please upload an audio file")
+    # Get cached interface
+    interface = get_cached_interface(model_name)
+    # Get or create speaker profile (with caching)
+    speaker = get_or_create_speaker(interface, audio_file, model_name)
     # Convert speaker dict to formatted JSON
     speaker_json = json.dumps(speaker, indent=2, ensure_ascii=False)
     return speaker_json, generated_audio
+example_text = "Hello, this is a test of the OuteTTS speaker profile."
 # Create the Gradio interface
 demo = gr.Interface(
     fn=create_speaker_and_generate,
             label="Test Text (Optional)",
             placeholder="Enter text to generate speech (leave empty to only create speaker profile)...",
             lines=3,
+            value=None
         ),
         gr.Slider(
             minimum=0.1,
     title="🎙️ OuteTTS Speaker Creator",
     description="Create and manage speaker profiles for OuteTTS text-to-speech synthesis. Upload audio to create a speaker profile, and optionally provide test text to generate sample audio.",
     theme=gr.themes.Soft(),
+    examples=[
+        ["OuteTTS-1.0-0.6B", None, example_text, 0.2],
+        ["OuteTTS-0.3-500M", None, example_text, 0.2],
+    ]
 )
+if __name__ == "__main__":
+    # Launch with API enabled
+    demo.launch(
+        server_name="0.0.0.0",  # Allow external connections
+        server_port=7860,
+        share=False,           # Set to True if you want a public link
+        show_api=True,         # Show API documentation
+        show_error=True        # Show detailed error messages
+    )