Chris4K commited on
Commit
fff699d
·
verified ·
1 Parent(s): bdb5f54

Update services/model_service.py

Browse files
Files changed (1) hide show
  1. services/model_service.py +16 -1
services/model_service.py CHANGED
@@ -26,10 +26,25 @@ class ModelService:
26
  def _load_models(self):
27
  try:
28
  self.tokenizer = AutoTokenizer.from_pretrained(settings.MODEL_NAME)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  self.model = AutoModelForCausalLM.from_pretrained(
30
  settings.MODEL_NAME,
31
  torch_dtype=torch.float16 if settings.DEVICE == "cuda" else torch.float32,
32
- device_map="auto" if settings.DEVICE == "cuda" else None
 
33
  )
34
  self.embedder = SentenceTransformer(settings.EMBEDDER_MODEL)
35
  except Exception as e:
 
26
  def _load_models(self):
27
  try:
28
  self.tokenizer = AutoTokenizer.from_pretrained(settings.MODEL_NAME)
29
+
30
+ # Modify the model configuration to use a valid rope_scaling format
31
+ config = LlamaConfig.from_pretrained(model_name)
32
+ if hasattr(config, "rope_scaling") and isinstance(config.rope_scaling, dict):
33
+ config.rope_scaling = {
34
+ "type": "linear",
35
+ "factor": config.rope_scaling.get("factor", 32.0)
36
+ }
37
+
38
+ # Load model with updated configuration
39
+ #self.model = AutoModelForCausalLM.from_pretrained(model_name, config=config).to(device)
40
+
41
+
42
+
43
  self.model = AutoModelForCausalLM.from_pretrained(
44
  settings.MODEL_NAME,
45
  torch_dtype=torch.float16 if settings.DEVICE == "cuda" else torch.float32,
46
+ device_map="auto" if settings.DEVICE == "cuda" else None,
47
+ config=config
48
  )
49
  self.embedder = SentenceTransformer(settings.EMBEDDER_MODEL)
50
  except Exception as e: