ciyidogan commited on
Commit
edc8519
·
verified ·
1 Parent(s): 12da146

Update llm_model.py

Browse files
Files changed (1) hide show
  1. llm_model.py +5 -36
llm_model.py CHANGED
@@ -11,14 +11,10 @@ class Message(BaseModel):
11
 
12
  class LLMModel:
13
  def __init__(self):
14
- self.model = None # ana model
15
  self.tokenizer = None
16
  self.eos_token_id = None
17
 
18
- self.intent_model = None # intent modeli
19
- self.intent_tokenizer = None
20
- self.intent_label2id = None
21
-
22
  def setup(self, s_config, project_config, project_path):
23
  try:
24
  log("🧠 LLMModel setup() başladı")
@@ -32,12 +28,10 @@ class LLMModel:
32
  log(f"📦 Hugging Face cloud modeli yükleniyor: {model_base}")
33
  self.tokenizer = AutoTokenizer.from_pretrained(model_base, token=token, use_fast=False)
34
  self.model = AutoModelForCausalLM.from_pretrained(model_base, token=token, torch_dtype=torch.float32).to(device)
35
- elif s_config.work_mode in ["cloud", "on-prem"]:
36
  log(f"📦 Model indiriliyor veya yükleniyor: {model_base}")
37
  self.tokenizer = AutoTokenizer.from_pretrained(model_base, use_fast=False)
38
  self.model = AutoModelForCausalLM.from_pretrained(model_base, torch_dtype=torch.float32).to(device)
39
- else:
40
- raise Exception(f"Bilinmeyen work_mode: {s_config.work_mode}")
41
 
42
  self.tokenizer.pad_token = self.tokenizer.pad_token or self.tokenizer.eos_token
43
  self.model.config.pad_token_id = self.tokenizer.pad_token_id
@@ -49,20 +43,7 @@ class LLMModel:
49
  log(f"❌ LLMModel setup() hatası: {e}")
50
  traceback.print_exc()
51
 
52
- def load_intent_model(self, model_path):
53
- try:
54
- log(f"🔧 Intent modeli yükleniyor: {model_path}")
55
- self.intent_tokenizer = AutoTokenizer.from_pretrained(model_path)
56
- self.intent_model = AutoModelForSequenceClassification.from_pretrained(model_path)
57
- with open(os.path.join(model_path, "label2id.json")) as f:
58
- self.intent_label2id = json.load(f)
59
- log("✅ Intent modeli yüklemesi tamamlandı.")
60
- except Exception as e:
61
- log(f"❌ Intent modeli yükleme hatası: {e}")
62
- traceback.print_exc()
63
-
64
- async def generate_response(self, text, project_config):
65
- messages = [{"role": "user", "content": text}]
66
  encodeds = self.tokenizer.apply_chat_template(messages, return_tensors="pt", add_generation_prompt=True)
67
  input_ids = encodeds.to(self.model.device)
68
  attention_mask = (input_ids != self.tokenizer.pad_token_id).long()
@@ -71,7 +52,7 @@ class LLMModel:
71
  output = self.model.generate(
72
  input_ids=input_ids,
73
  attention_mask=attention_mask,
74
- max_new_tokens=128,
75
  do_sample=project_config["use_sampling"],
76
  eos_token_id=self.eos_token_id,
77
  pad_token_id=self.tokenizer.pad_token_id,
@@ -79,17 +60,5 @@ class LLMModel:
79
  output_scores=True
80
  )
81
 
82
- if not project_config["use_sampling"]:
83
- scores = torch.stack(output.scores, dim=1)
84
- probs = torch.nn.functional.softmax(scores[0], dim=-1)
85
- top_conf = probs.max().item()
86
- else:
87
- top_conf = None
88
-
89
  decoded = self.tokenizer.decode(output.sequences[0], skip_special_tokens=True).strip()
90
- for tag in ["assistant", "<|im_start|>assistant"]:
91
- start = decoded.find(tag)
92
- if start != -1:
93
- decoded = decoded[start + len(tag):].strip()
94
- break
95
- return decoded, top_conf
 
11
 
12
  class LLMModel:
13
  def __init__(self):
14
+ self.model = None
15
  self.tokenizer = None
16
  self.eos_token_id = None
17
 
 
 
 
 
18
  def setup(self, s_config, project_config, project_path):
19
  try:
20
  log("🧠 LLMModel setup() başladı")
 
28
  log(f"📦 Hugging Face cloud modeli yükleniyor: {model_base}")
29
  self.tokenizer = AutoTokenizer.from_pretrained(model_base, token=token, use_fast=False)
30
  self.model = AutoModelForCausalLM.from_pretrained(model_base, token=token, torch_dtype=torch.float32).to(device)
31
+ else:
32
  log(f"📦 Model indiriliyor veya yükleniyor: {model_base}")
33
  self.tokenizer = AutoTokenizer.from_pretrained(model_base, use_fast=False)
34
  self.model = AutoModelForCausalLM.from_pretrained(model_base, torch_dtype=torch.float32).to(device)
 
 
35
 
36
  self.tokenizer.pad_token = self.tokenizer.pad_token or self.tokenizer.eos_token
37
  self.model.config.pad_token_id = self.tokenizer.pad_token_id
 
43
  log(f"❌ LLMModel setup() hatası: {e}")
44
  traceback.print_exc()
45
 
46
+ async def generate_response_with_messages(self, messages, project_config):
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  encodeds = self.tokenizer.apply_chat_template(messages, return_tensors="pt", add_generation_prompt=True)
48
  input_ids = encodeds.to(self.model.device)
49
  attention_mask = (input_ids != self.tokenizer.pad_token_id).long()
 
52
  output = self.model.generate(
53
  input_ids=input_ids,
54
  attention_mask=attention_mask,
55
+ max_new_tokens=256,
56
  do_sample=project_config["use_sampling"],
57
  eos_token_id=self.eos_token_id,
58
  pad_token_id=self.tokenizer.pad_token_id,
 
60
  output_scores=True
61
  )
62
 
 
 
 
 
 
 
 
63
  decoded = self.tokenizer.decode(output.sequences[0], skip_special_tokens=True).strip()
64
+ return decoded