llm

Sleeping

Chris4K commited on Jan 12

Commit

16f48ef

verified ·

1 Parent(s): 8c9754a

Update services/model_service.py

Files changed (1) hide show

services/model_service.py CHANGED Viewed

@@ -25,16 +25,7 @@ class ModelService:
             # Load tokenizer
             self.tokenizer = AutoTokenizer.from_pretrained(settings.MODEL_NAME)
-            # Load model configuration
-            config = LlamaConfig.from_pretrained(settings.MODEL_NAME)
-            # Check and update rope_scaling if necessary
-            if hasattr(config, "rope_scaling") and config.rope_scaling is not None:
-                logger.info("Updating rope_scaling in configuration...")
-                config.rope_scaling = {
-                    "type": "linear",  # Ensure this matches the expected type
-                    "factor": config.rope_scaling.get('factor', 1.0)  # Use existing factor or default to 1.0
-                }
             # Check quantization type and adjust accordingly
             if config.get('quantization_config', {}).get('type', '') == 'compressed-tensors':
@@ -43,11 +34,9 @@ class ModelService:
             # Load model with the updated configuration
             self.model = AutoModelForCausalLM.from_pretrained(
-                settings.MODEL_NAME,
-                model_type = "llama",
                 torch_dtype=torch.float16 if settings.DEVICE == "cuda" else torch.float32,
-                device_map="auto" if settings.DEVICE == "cuda" else None,
-                config=config
             )
             # Load sentence embedder

             # Load tokenizer
             self.tokenizer = AutoTokenizer.from_pretrained(settings.MODEL_NAME)
             # Check quantization type and adjust accordingly
             if config.get('quantization_config', {}).get('type', '') == 'compressed-tensors':
             # Load model with the updated configuration
             self.model = AutoModelForCausalLM.from_pretrained(
+                settings.MODEL_NAME,
                 torch_dtype=torch.float16 if settings.DEVICE == "cuda" else torch.float32,
+                device_map="auto" if settings.DEVICE == "cuda" else None
             )
             # Load sentence embedder