Spaces:

BricksDisplay
/

OuteTTS-Speaker-Creator

Running on Zero

App Files Files Community

hans00 commited on 13 days ago

Commit

f5220fd

unverified ·

1 Parent(s): a6b9820

Better quantize selection

Browse files

Files changed (1) hide show

app.py +11 -4

app.py CHANGED Viewed

@@ -15,6 +15,12 @@ from huggingface_hub import hf_hub_download
 # Available OuteTTS models based on the documentation
 MODELS = {v.value: v for _, v in outetts.Models.__members__.items()}
 # Cache for speaker profiles to avoid re-transcribing the same audio
 speaker_cache = {}
@@ -58,7 +64,8 @@ def get_cached_interface(model_name: str):
     """Get cached interface instance for the model."""
     model = MODELS[model_name]
-    config = try_auto_model_config(model, outetts.Backend.LLAMACPP, outetts.LlamaCppQuantization.Q6_K)
     if not config:
         # Fallback to HF model
         model_config = MODEL_INFO[model]
@@ -73,11 +80,11 @@ def get_cached_interface(model_name: str):
     interface = outetts.Interface(config=config)
     return interface
-def get_or_create_speaker(interface, audio_file, model_name):
     """Get speaker from cache or create new one if not cached."""
     # Calculate file hash for caching
     file_hash = get_file_hash(audio_file)
-    cache_key = f"{model_name}_{file_hash}"
     # Check if speaker profile is already cached
     if cache_key in speaker_cache:
@@ -104,7 +111,7 @@ def create_speaker_and_generate(model_name, audio_file, test_text: Optional[str]
     interface = get_cached_interface(model_name)
     # Get or create speaker profile (with caching)
-    speaker = get_or_create_speaker(interface, audio_file, model_name)
     # Convert speaker dict to formatted JSON
     speaker_json = json.dumps(speaker, indent=2, ensure_ascii=False)

 # Available OuteTTS models based on the documentation
 MODELS = {v.value: v for _, v in outetts.Models.__members__.items()}
+MODEL_QUANTIZATION = {
+    outetts.Models.VERSION_0_1_SIZE_350M: outetts.LlamaCppQuantization.Q8_0,
+    outetts.Models.VERSION_0_2_SIZE_500M: outetts.LlamaCppQuantization.Q8_0,
+    outetts.Models.VERSION_0_3_SIZE_500M: outetts.LlamaCppQuantization.Q8_0,
+}
 # Cache for speaker profiles to avoid re-transcribing the same audio
 speaker_cache = {}
     """Get cached interface instance for the model."""
     model = MODELS[model_name]
+    quantization = MODEL_QUANTIZATION.get(model, outetts.LlamaCppQuantization.Q6_K)
+    config = try_auto_model_config(model, outetts.Backend.LLAMACPP, quantization)
     if not config:
         # Fallback to HF model
         model_config = MODEL_INFO[model]
     interface = outetts.Interface(config=config)
     return interface
+def get_or_create_speaker(interface, audio_file):
     """Get speaker from cache or create new one if not cached."""
     # Calculate file hash for caching
     file_hash = get_file_hash(audio_file)
+    cache_key = f"{interface.config.interface_version}_{file_hash}"
     # Check if speaker profile is already cached
     if cache_key in speaker_cache:
     interface = get_cached_interface(model_name)
     # Get or create speaker profile (with caching)
+    speaker = get_or_create_speaker(interface, audio_file)
     # Convert speaker dict to formatted JSON
     speaker_json = json.dumps(speaker, indent=2, ensure_ascii=False)