hans00 commited on
Commit
03fd9ee
·
unverified ·
1 Parent(s): cf59503

Try use GGML to improve inference speed

Browse files
Files changed (1) hide show
  1. app.py +8 -8
app.py CHANGED
@@ -18,9 +18,9 @@ import spaces
18
  MODELS = {v.value: v for _, v in outetts.Models.__members__.items()}
19
 
20
  MODEL_QUANTIZATION = {
21
- outetts.Models.VERSION_0_1_SIZE_350M: outetts.LlamaCppQuantization.Q8_0,
22
- outetts.Models.VERSION_0_2_SIZE_500M: outetts.LlamaCppQuantization.Q8_0,
23
- outetts.Models.VERSION_0_3_SIZE_500M: outetts.LlamaCppQuantization.Q8_0,
24
  }
25
 
26
  # Cache for speaker profiles to avoid re-transcribing the same audio
@@ -61,8 +61,11 @@ def get_interface(model_name: str):
61
  """Get interface instance for the model (no caching to avoid CUDA memory issues)."""
62
  model = MODELS[model_name]
63
 
64
- has_cuda = torch.cuda.is_available()
65
- if has_cuda:
 
 
 
66
  model_config = MODEL_INFO[model]
67
  config = outetts.ModelConfig(
68
  model_path=f"OuteAI/{model_name}",
@@ -77,9 +80,6 @@ def get_interface(model_name: str):
77
  },
78
  **model_config
79
  )
80
- else:
81
- quantization = MODEL_QUANTIZATION.get(model, outetts.LlamaCppQuantization.Q6_K)
82
- config = try_ggml_model(model, outetts.Backend.LLAMACPP, quantization)
83
 
84
  # Initialize the interface
85
  interface = outetts.Interface(config=config)
 
18
  MODELS = {v.value: v for _, v in outetts.Models.__members__.items()}
19
 
20
  MODEL_QUANTIZATION = {
21
+ outetts.Models.VERSION_0_1_SIZE_350M: outetts.LlamaCppQuantization.FP16,
22
+ outetts.Models.VERSION_0_2_SIZE_500M: outetts.LlamaCppQuantization.FP16,
23
+ outetts.Models.VERSION_0_3_SIZE_500M: outetts.LlamaCppQuantization.FP16,
24
  }
25
 
26
  # Cache for speaker profiles to avoid re-transcribing the same audio
 
61
  """Get interface instance for the model (no caching to avoid CUDA memory issues)."""
62
  model = MODELS[model_name]
63
 
64
+ try:
65
+ quantization = MODEL_QUANTIZATION.get(model, outetts.LlamaCppQuantization.Q8_0)
66
+ config = try_ggml_model(model, outetts.Backend.LLAMACPP, quantization)
67
+ except:
68
+ has_cuda = torch.cuda.is_available()
69
  model_config = MODEL_INFO[model]
70
  config = outetts.ModelConfig(
71
  model_path=f"OuteAI/{model_name}",
 
80
  },
81
  **model_config
82
  )
 
 
 
83
 
84
  # Initialize the interface
85
  interface = outetts.Interface(config=config)