Spaces:
Running
on
Zero
Running
on
Zero
Try use GGML to improve inference speed
Browse files
app.py
CHANGED
@@ -18,9 +18,9 @@ import spaces
|
|
18 |
MODELS = {v.value: v for _, v in outetts.Models.__members__.items()}
|
19 |
|
20 |
MODEL_QUANTIZATION = {
|
21 |
-
outetts.Models.VERSION_0_1_SIZE_350M: outetts.LlamaCppQuantization.
|
22 |
-
outetts.Models.VERSION_0_2_SIZE_500M: outetts.LlamaCppQuantization.
|
23 |
-
outetts.Models.VERSION_0_3_SIZE_500M: outetts.LlamaCppQuantization.
|
24 |
}
|
25 |
|
26 |
# Cache for speaker profiles to avoid re-transcribing the same audio
|
@@ -61,8 +61,11 @@ def get_interface(model_name: str):
|
|
61 |
"""Get interface instance for the model (no caching to avoid CUDA memory issues)."""
|
62 |
model = MODELS[model_name]
|
63 |
|
64 |
-
|
65 |
-
|
|
|
|
|
|
|
66 |
model_config = MODEL_INFO[model]
|
67 |
config = outetts.ModelConfig(
|
68 |
model_path=f"OuteAI/{model_name}",
|
@@ -77,9 +80,6 @@ def get_interface(model_name: str):
|
|
77 |
},
|
78 |
**model_config
|
79 |
)
|
80 |
-
else:
|
81 |
-
quantization = MODEL_QUANTIZATION.get(model, outetts.LlamaCppQuantization.Q6_K)
|
82 |
-
config = try_ggml_model(model, outetts.Backend.LLAMACPP, quantization)
|
83 |
|
84 |
# Initialize the interface
|
85 |
interface = outetts.Interface(config=config)
|
|
|
18 |
MODELS = {v.value: v for _, v in outetts.Models.__members__.items()}
|
19 |
|
20 |
MODEL_QUANTIZATION = {
|
21 |
+
outetts.Models.VERSION_0_1_SIZE_350M: outetts.LlamaCppQuantization.FP16,
|
22 |
+
outetts.Models.VERSION_0_2_SIZE_500M: outetts.LlamaCppQuantization.FP16,
|
23 |
+
outetts.Models.VERSION_0_3_SIZE_500M: outetts.LlamaCppQuantization.FP16,
|
24 |
}
|
25 |
|
26 |
# Cache for speaker profiles to avoid re-transcribing the same audio
|
|
|
61 |
"""Get interface instance for the model (no caching to avoid CUDA memory issues)."""
|
62 |
model = MODELS[model_name]
|
63 |
|
64 |
+
try:
|
65 |
+
quantization = MODEL_QUANTIZATION.get(model, outetts.LlamaCppQuantization.Q8_0)
|
66 |
+
config = try_ggml_model(model, outetts.Backend.LLAMACPP, quantization)
|
67 |
+
except:
|
68 |
+
has_cuda = torch.cuda.is_available()
|
69 |
model_config = MODEL_INFO[model]
|
70 |
config = outetts.ModelConfig(
|
71 |
model_path=f"OuteAI/{model_name}",
|
|
|
80 |
},
|
81 |
**model_config
|
82 |
)
|
|
|
|
|
|
|
83 |
|
84 |
# Initialize the interface
|
85 |
interface = outetts.Interface(config=config)
|