hans00 commited on
Commit
f5220fd
·
unverified ·
1 Parent(s): a6b9820

Better quantize selection

Browse files
Files changed (1) hide show
  1. app.py +11 -4
app.py CHANGED
@@ -15,6 +15,12 @@ from huggingface_hub import hf_hub_download
15
  # Available OuteTTS models based on the documentation
16
  MODELS = {v.value: v for _, v in outetts.Models.__members__.items()}
17
 
 
 
 
 
 
 
18
  # Cache for speaker profiles to avoid re-transcribing the same audio
19
  speaker_cache = {}
20
 
@@ -58,7 +64,8 @@ def get_cached_interface(model_name: str):
58
  """Get cached interface instance for the model."""
59
  model = MODELS[model_name]
60
 
61
- config = try_auto_model_config(model, outetts.Backend.LLAMACPP, outetts.LlamaCppQuantization.Q6_K)
 
62
  if not config:
63
  # Fallback to HF model
64
  model_config = MODEL_INFO[model]
@@ -73,11 +80,11 @@ def get_cached_interface(model_name: str):
73
  interface = outetts.Interface(config=config)
74
  return interface
75
 
76
- def get_or_create_speaker(interface, audio_file, model_name):
77
  """Get speaker from cache or create new one if not cached."""
78
  # Calculate file hash for caching
79
  file_hash = get_file_hash(audio_file)
80
- cache_key = f"{model_name}_{file_hash}"
81
 
82
  # Check if speaker profile is already cached
83
  if cache_key in speaker_cache:
@@ -104,7 +111,7 @@ def create_speaker_and_generate(model_name, audio_file, test_text: Optional[str]
104
  interface = get_cached_interface(model_name)
105
 
106
  # Get or create speaker profile (with caching)
107
- speaker = get_or_create_speaker(interface, audio_file, model_name)
108
 
109
  # Convert speaker dict to formatted JSON
110
  speaker_json = json.dumps(speaker, indent=2, ensure_ascii=False)
 
15
  # Available OuteTTS models based on the documentation
16
  MODELS = {v.value: v for _, v in outetts.Models.__members__.items()}
17
 
18
+ MODEL_QUANTIZATION = {
19
+ outetts.Models.VERSION_0_1_SIZE_350M: outetts.LlamaCppQuantization.Q8_0,
20
+ outetts.Models.VERSION_0_2_SIZE_500M: outetts.LlamaCppQuantization.Q8_0,
21
+ outetts.Models.VERSION_0_3_SIZE_500M: outetts.LlamaCppQuantization.Q8_0,
22
+ }
23
+
24
  # Cache for speaker profiles to avoid re-transcribing the same audio
25
  speaker_cache = {}
26
 
 
64
  """Get cached interface instance for the model."""
65
  model = MODELS[model_name]
66
 
67
+ quantization = MODEL_QUANTIZATION.get(model, outetts.LlamaCppQuantization.Q6_K)
68
+ config = try_auto_model_config(model, outetts.Backend.LLAMACPP, quantization)
69
  if not config:
70
  # Fallback to HF model
71
  model_config = MODEL_INFO[model]
 
80
  interface = outetts.Interface(config=config)
81
  return interface
82
 
83
+ def get_or_create_speaker(interface, audio_file):
84
  """Get speaker from cache or create new one if not cached."""
85
  # Calculate file hash for caching
86
  file_hash = get_file_hash(audio_file)
87
+ cache_key = f"{interface.config.interface_version}_{file_hash}"
88
 
89
  # Check if speaker profile is already cached
90
  if cache_key in speaker_cache:
 
111
  interface = get_cached_interface(model_name)
112
 
113
  # Get or create speaker profile (with caching)
114
+ speaker = get_or_create_speaker(interface, audio_file)
115
 
116
  # Convert speaker dict to formatted JSON
117
  speaker_json = json.dumps(speaker, indent=2, ensure_ascii=False)