llm

Sleeping

Chris4K commited on Jan 12

Commit

86179ff

verified ·

1 Parent(s): b572bdb

Update services/model_service.py

Files changed (1) hide show

services/model_service.py CHANGED Viewed

@@ -25,7 +25,8 @@ class ModelService:
             # Load tokenizer
             self.tokenizer = AutoTokenizer.from_pretrained(settings.MODEL_NAME)
             # Check quantization type and adjust accordingly
             if config.get('quantization_config', {}).get('type', '') == 'compressed-tensors':
@@ -34,10 +35,10 @@ class ModelService:
             # Load model with the updated configuration
             self.model = AutoModelForCausalLM.from_pretrained(
-                model_type == "llama" ,
                 settings.MODEL_NAME,
                 torch_dtype=torch.float16 if settings.DEVICE == "cuda" else torch.float32,
-                device_map="auto" if settings.DEVICE == "cuda" else None
             )
             # Load sentence embedder
@@ -48,4 +49,4 @@ class ModelService:
             raise
     def get_models(self):
-        return self.tokenizer, self.model, self.embedder

             # Load tokenizer
             self.tokenizer = AutoTokenizer.from_pretrained(settings.MODEL_NAME)
+            # Load model configuration
+            config = LlamaConfig.from_pretrained(settings.MODEL_NAME)
             # Check quantization type and adjust accordingly
             if config.get('quantization_config', {}).get('type', '') == 'compressed-tensors':
             # Load model with the updated configuration
             self.model = AutoModelForCausalLM.from_pretrained(
                 settings.MODEL_NAME,
+                config=config,
                 torch_dtype=torch.float16 if settings.DEVICE == "cuda" else torch.float32,
+                device_map="auto" if settings.DEVICE == "cuda" else None
             )
             # Load sentence embedder
             raise
     def get_models(self):
+        return self.tokenizer, self.model, self.embedder