Spaces:

BricksDisplay
/

OuteTTS-Speaker-Creator

Running on Zero

App Files Files Community

hans00 commited on 11 days ago

Commit

476f08d

unverified ·

1 Parent(s): bc05181

Use HF if CUDA available and not persistent model load

Browse files

Files changed (1) hide show

app.py +31 -41

app.py CHANGED Viewed

@@ -34,48 +34,35 @@ def get_file_hash(file_path):
             hash_md5.update(chunk)
     return hash_md5.hexdigest()
-def try_auto_model_config(model: outetts.Models, backend: outetts.Backend, quantization: outetts.LlamaCppQuantization):
     model_config = MODEL_INFO[model]
-    try:
-        repo = f"OuteAI/{model.value}-GGUF"
-        filename = f"{model.value}-{quantization.value}.gguf"
-        model_path = hf_hub_download(
-            repo_id=repo,
-            filename=filename,
-            local_dir=os.path.join(helpers.get_cache_dir(), "gguf"),
-            local_files_only=False
-        )
-        return outetts.ModelConfig(
-            model_path=model_path,
-            tokenizer_path=f"OuteAI/{model.value}",
-            backend=backend,
-            n_gpu_layers=99,
-            verbose=False,
-            device=None,
-            dtype=None,
-            additional_model_config={},
-            audio_codec_path=None,
-            **model_config
-        )
-    except Exception as e:
-        print(f"Error: {e}")
-        return None
-@lru_cache(maxsize=5)
-def get_cached_interface(model_name: str):
-    """Get cached interface instance for the model."""
     model = MODELS[model_name]
-    quantization = MODEL_QUANTIZATION.get(model, outetts.LlamaCppQuantization.Q6_K)
-    config = try_auto_model_config(model, outetts.Backend.LLAMACPP, quantization)
-    # self.model = AutoModelForCausalLM.from_pretrained(
-    #         model_path,
-    #         torch_dtype=dtype,
-    #         **additional_model_config
-    #     ).to(self.device)
-    if not config:
-        # Fallback to HF model
-        has_cuda = torch.cuda.is_available()
         model_config = MODEL_INFO[model]
         config = outetts.ModelConfig(
             model_path=f"OuteAI/{model_name}",
@@ -83,13 +70,16 @@ def get_cached_interface(model_name: str):
             backend=outetts.Backend.HF,
             additional_model_config={
                 "device_map": "auto" if has_cuda else "cpu",
-                "attn_implementation": "flash_attention_2",
                 "quantization_config": BitsAndBytesConfig(
                     load_in_8bit=True
                 ) if has_cuda else None,
             },
             **model_config
         )
     # Initialize the interface
     interface = outetts.Interface(config=config)
@@ -122,8 +112,8 @@ def create_speaker_and_generate(model_name, audio_file, test_text: Optional[str]
         # Return default values for startup/caching purposes
         return "Please upload an audio file to create a speaker profile.", None
-    # Get cached interface
-    interface = get_cached_interface(model_name)
     # Get or create speaker profile (with caching)
     speaker = get_or_create_speaker(interface, audio_file)

             hash_md5.update(chunk)
     return hash_md5.hexdigest()
+def try_ggml_model(model: outetts.Models, backend: outetts.Backend, quantization: outetts.LlamaCppQuantization):
     model_config = MODEL_INFO[model]
+    repo = f"OuteAI/{model.value}-GGUF"
+    filename = f"{model.value}-{quantization.value}.gguf"
+    model_path = hf_hub_download(
+        repo_id=repo,
+        filename=filename,
+        local_dir=os.path.join(helpers.get_cache_dir(), "gguf"),
+        local_files_only=False
+    )
+    return outetts.ModelConfig(
+        model_path=model_path,
+        tokenizer_path=f"OuteAI/{model.value}",
+        backend=backend,
+        n_gpu_layers=99,
+        verbose=False,
+        device=None,
+        dtype=None,
+        additional_model_config={},
+        audio_codec_path=None,
+        **model_config
+    )
+def get_interface(model_name: str):
+    """Get interface instance for the model (no caching to avoid CUDA memory issues)."""
     model = MODELS[model_name]
+    has_cuda = torch.cuda.is_available()
+    if has_cuda:
         model_config = MODEL_INFO[model]
         config = outetts.ModelConfig(
             model_path=f"OuteAI/{model_name}",
             backend=outetts.Backend.HF,
             additional_model_config={
                 "device_map": "auto" if has_cuda else "cpu",
+                "attn_implementation": "flash_attention_2" if has_cuda else "eager",
                 "quantization_config": BitsAndBytesConfig(
                     load_in_8bit=True
                 ) if has_cuda else None,
             },
             **model_config
         )
+    else:
+        quantization = MODEL_QUANTIZATION.get(model, outetts.LlamaCppQuantization.Q6_K)
+        config = try_ggml_model(model, outetts.Backend.LLAMACPP, quantization)
     # Initialize the interface
     interface = outetts.Interface(config=config)
         # Return default values for startup/caching purposes
         return "Please upload an audio file to create a speaker profile.", None
+    # Get interface (no caching to avoid CUDA memory issues)
+    interface = get_interface(model_name)
     # Get or create speaker profile (with caching)
     speaker = get_or_create_speaker(interface, audio_file)