Chris4K commited on
Commit
477aabb
·
verified ·
1 Parent(s): c0216a8

Update services/model_service.py

Browse files
Files changed (1) hide show
  1. services/model_service.py +24 -16
services/model_service.py CHANGED
@@ -25,22 +25,30 @@ class ModelService:
25
  # Load tokenizer
26
  self.tokenizer = AutoTokenizer.from_pretrained(settings.MODEL_NAME)
27
 
28
- # Load model configuration
29
- config = LlamaConfig.from_pretrained(settings.MODEL_NAME)
30
-
31
- # Check quantization type and adjust accordingly
32
- if config.get('quantization_config', {}).get('type', '') == 'compressed-tensors':
33
- logger.warning("Quantization type 'compressed-tensors' is not supported. Switching to 'bitsandbytes_8bit'.")
34
- config.quantization_config['type'] = 'bitsandbytes_8bit'
35
-
36
- # Load model with the updated configuration
37
- self.model = AutoModelForCausalLM.from_pretrained(
38
- settings.MODEL_NAME,
39
- config=config,
40
- torch_dtype=torch.float16 if settings.DEVICE == "cuda" else torch.float32,
41
- device_map="auto" if settings.DEVICE == "cuda" else None
42
- )
43
-
 
 
 
 
 
 
 
 
44
  # Load sentence embedder
45
  self.embedder = SentenceTransformer(settings.EMBEDDER_MODEL)
46
 
 
25
  # Load tokenizer
26
  self.tokenizer = AutoTokenizer.from_pretrained(settings.MODEL_NAME)
27
 
28
+ ## Load model configuration
29
+ #config = LlamaConfig.from_pretrained(settings.MODEL_NAME)
30
+
31
+ ## Check quantization type and adjust accordingly
32
+ #if config.get('quantization_config', {}).get('type', '') == 'compressed-tensors':
33
+ # logger.warning("Quantization type 'compressed-tensors' is not supported. Switching to 'bitsandbytes_8bit'.")
34
+ # config.quantization_config['type'] = 'bitsandbytes_8bit'
35
+
36
+ ## Load model with the updated configuration
37
+ #self.model = AutoModelForCausalLM.from_pretrained(
38
+ # settings.MODEL_NAME,
39
+ # config=config,
40
+ # torch_dtype=torch.float16 if settings.DEVICE == "cuda" else torch.float32,
41
+ # device_map="auto" if settings.DEVICE == "cuda" else None
42
+ #)
43
+
44
+ #-----
45
+ # Load Llama 3.2 model
46
+ model_name = settings.MODEL_NAME #"meta-llama/Llama-3.2-3B-Instruct" # Replace with the exact model path
47
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
48
+ #model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", torch_dtype=torch.float16)
49
+ self.model = AutoModelForCausalLM.from_pretrained(model_name, device_map=None, torch_dtype=torch.float32)
50
+
51
+
52
  # Load sentence embedder
53
  self.embedder = SentenceTransformer(settings.EMBEDDER_MODEL)
54