llm

Sleeping

App Files Files Community

Chris4K commited on Jan 12

Commit

477aabb

verified ·

1 Parent(s): c0216a8

Update services/model_service.py

Browse files

Files changed (1) hide show

services/model_service.py +24 -16

services/model_service.py CHANGED Viewed

@@ -25,22 +25,30 @@ class ModelService:
             # Load tokenizer
             self.tokenizer = AutoTokenizer.from_pretrained(settings.MODEL_NAME)
-            # Load model configuration
-            config = LlamaConfig.from_pretrained(settings.MODEL_NAME)
-            # Check quantization type and adjust accordingly
-            if config.get('quantization_config', {}).get('type', '') == 'compressed-tensors':
-                logger.warning("Quantization type 'compressed-tensors' is not supported. Switching to 'bitsandbytes_8bit'.")
-                config.quantization_config['type'] = 'bitsandbytes_8bit'
-            # Load model with the updated configuration
-            self.model = AutoModelForCausalLM.from_pretrained(
-                settings.MODEL_NAME,
-                config=config,
-                torch_dtype=torch.float16 if settings.DEVICE == "cuda" else torch.float32,
-                device_map="auto" if settings.DEVICE == "cuda" else None
-            )
             # Load sentence embedder
             self.embedder = SentenceTransformer(settings.EMBEDDER_MODEL)

             # Load tokenizer
             self.tokenizer = AutoTokenizer.from_pretrained(settings.MODEL_NAME)
+            ## Load model configuration
+            #config = LlamaConfig.from_pretrained(settings.MODEL_NAME)
+            ## Check quantization type and adjust accordingly
+            #if config.get('quantization_config', {}).get('type', '') == 'compressed-tensors':
+            #    logger.warning("Quantization type 'compressed-tensors' is not supported. Switching to 'bitsandbytes_8bit'.")
+            #    config.quantization_config['type'] = 'bitsandbytes_8bit'
+            ## Load model with the updated configuration
+            #self.model = AutoModelForCausalLM.from_pretrained(
+            #    settings.MODEL_NAME,
+            #    config=config,
+            #    torch_dtype=torch.float16 if settings.DEVICE == "cuda" else torch.float32,
+            #    device_map="auto" if settings.DEVICE == "cuda" else None
+            #)
+#-----
+            # Load Llama 3.2 model
+            model_name = settings.MODEL_NAME #"meta-llama/Llama-3.2-3B-Instruct"  # Replace with the exact model path
+            tokenizer = AutoTokenizer.from_pretrained(model_name)
+            #model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", torch_dtype=torch.float16)
+            self.model = AutoModelForCausalLM.from_pretrained(model_name, device_map=None, torch_dtype=torch.float32)
             # Load sentence embedder
             self.embedder = SentenceTransformer(settings.EMBEDDER_MODEL)