Spaces:

BricksDisplay
/

OuteTTS-Speaker-Creator

Running on Zero

hans00 commited on 4 days ago

Commit

03fd9ee

unverified ·

1 Parent(s): cf59503

Try use GGML to improve inference speed

Files changed (1) hide show

app.py CHANGED Viewed

@@ -18,9 +18,9 @@ import spaces
 MODELS = {v.value: v for _, v in outetts.Models.__members__.items()}
 MODEL_QUANTIZATION = {
-    outetts.Models.VERSION_0_1_SIZE_350M: outetts.LlamaCppQuantization.Q8_0,
-    outetts.Models.VERSION_0_2_SIZE_500M: outetts.LlamaCppQuantization.Q8_0,
-    outetts.Models.VERSION_0_3_SIZE_500M: outetts.LlamaCppQuantization.Q8_0,
 }
 # Cache for speaker profiles to avoid re-transcribing the same audio
@@ -61,8 +61,11 @@ def get_interface(model_name: str):
     """Get interface instance for the model (no caching to avoid CUDA memory issues)."""
     model = MODELS[model_name]
-    has_cuda = torch.cuda.is_available()
-    if has_cuda:
         model_config = MODEL_INFO[model]
         config = outetts.ModelConfig(
             model_path=f"OuteAI/{model_name}",
@@ -77,9 +80,6 @@ def get_interface(model_name: str):
             },
             **model_config
         )
-    else:
-        quantization = MODEL_QUANTIZATION.get(model, outetts.LlamaCppQuantization.Q6_K)
-        config = try_ggml_model(model, outetts.Backend.LLAMACPP, quantization)
     # Initialize the interface
     interface = outetts.Interface(config=config)

 MODELS = {v.value: v for _, v in outetts.Models.__members__.items()}
 MODEL_QUANTIZATION = {
+    outetts.Models.VERSION_0_1_SIZE_350M: outetts.LlamaCppQuantization.FP16,
+    outetts.Models.VERSION_0_2_SIZE_500M: outetts.LlamaCppQuantization.FP16,
+    outetts.Models.VERSION_0_3_SIZE_500M: outetts.LlamaCppQuantization.FP16,
 }
 # Cache for speaker profiles to avoid re-transcribing the same audio
     """Get interface instance for the model (no caching to avoid CUDA memory issues)."""
     model = MODELS[model_name]
+    try:
+        quantization = MODEL_QUANTIZATION.get(model, outetts.LlamaCppQuantization.Q8_0)
+        config = try_ggml_model(model, outetts.Backend.LLAMACPP, quantization)
+    except:
+        has_cuda = torch.cuda.is_available()
         model_config = MODEL_INFO[model]
         config = outetts.ModelConfig(
             model_path=f"OuteAI/{model_name}",
             },
             **model_config
         )
     # Initialize the interface
     interface = outetts.Interface(config=config)