Spaces:
Running
Running
Update llm_model.py
Browse files- llm_model.py +5 -36
llm_model.py
CHANGED
@@ -11,14 +11,10 @@ class Message(BaseModel):
|
|
11 |
|
12 |
class LLMModel:
|
13 |
def __init__(self):
|
14 |
-
self.model = None
|
15 |
self.tokenizer = None
|
16 |
self.eos_token_id = None
|
17 |
|
18 |
-
self.intent_model = None # intent modeli
|
19 |
-
self.intent_tokenizer = None
|
20 |
-
self.intent_label2id = None
|
21 |
-
|
22 |
def setup(self, s_config, project_config, project_path):
|
23 |
try:
|
24 |
log("🧠 LLMModel setup() başladı")
|
@@ -32,12 +28,10 @@ class LLMModel:
|
|
32 |
log(f"📦 Hugging Face cloud modeli yükleniyor: {model_base}")
|
33 |
self.tokenizer = AutoTokenizer.from_pretrained(model_base, token=token, use_fast=False)
|
34 |
self.model = AutoModelForCausalLM.from_pretrained(model_base, token=token, torch_dtype=torch.float32).to(device)
|
35 |
-
|
36 |
log(f"📦 Model indiriliyor veya yükleniyor: {model_base}")
|
37 |
self.tokenizer = AutoTokenizer.from_pretrained(model_base, use_fast=False)
|
38 |
self.model = AutoModelForCausalLM.from_pretrained(model_base, torch_dtype=torch.float32).to(device)
|
39 |
-
else:
|
40 |
-
raise Exception(f"Bilinmeyen work_mode: {s_config.work_mode}")
|
41 |
|
42 |
self.tokenizer.pad_token = self.tokenizer.pad_token or self.tokenizer.eos_token
|
43 |
self.model.config.pad_token_id = self.tokenizer.pad_token_id
|
@@ -49,20 +43,7 @@ class LLMModel:
|
|
49 |
log(f"❌ LLMModel setup() hatası: {e}")
|
50 |
traceback.print_exc()
|
51 |
|
52 |
-
def
|
53 |
-
try:
|
54 |
-
log(f"🔧 Intent modeli yükleniyor: {model_path}")
|
55 |
-
self.intent_tokenizer = AutoTokenizer.from_pretrained(model_path)
|
56 |
-
self.intent_model = AutoModelForSequenceClassification.from_pretrained(model_path)
|
57 |
-
with open(os.path.join(model_path, "label2id.json")) as f:
|
58 |
-
self.intent_label2id = json.load(f)
|
59 |
-
log("✅ Intent modeli yüklemesi tamamlandı.")
|
60 |
-
except Exception as e:
|
61 |
-
log(f"❌ Intent modeli yükleme hatası: {e}")
|
62 |
-
traceback.print_exc()
|
63 |
-
|
64 |
-
async def generate_response(self, text, project_config):
|
65 |
-
messages = [{"role": "user", "content": text}]
|
66 |
encodeds = self.tokenizer.apply_chat_template(messages, return_tensors="pt", add_generation_prompt=True)
|
67 |
input_ids = encodeds.to(self.model.device)
|
68 |
attention_mask = (input_ids != self.tokenizer.pad_token_id).long()
|
@@ -71,7 +52,7 @@ class LLMModel:
|
|
71 |
output = self.model.generate(
|
72 |
input_ids=input_ids,
|
73 |
attention_mask=attention_mask,
|
74 |
-
max_new_tokens=
|
75 |
do_sample=project_config["use_sampling"],
|
76 |
eos_token_id=self.eos_token_id,
|
77 |
pad_token_id=self.tokenizer.pad_token_id,
|
@@ -79,17 +60,5 @@ class LLMModel:
|
|
79 |
output_scores=True
|
80 |
)
|
81 |
|
82 |
-
if not project_config["use_sampling"]:
|
83 |
-
scores = torch.stack(output.scores, dim=1)
|
84 |
-
probs = torch.nn.functional.softmax(scores[0], dim=-1)
|
85 |
-
top_conf = probs.max().item()
|
86 |
-
else:
|
87 |
-
top_conf = None
|
88 |
-
|
89 |
decoded = self.tokenizer.decode(output.sequences[0], skip_special_tokens=True).strip()
|
90 |
-
|
91 |
-
start = decoded.find(tag)
|
92 |
-
if start != -1:
|
93 |
-
decoded = decoded[start + len(tag):].strip()
|
94 |
-
break
|
95 |
-
return decoded, top_conf
|
|
|
11 |
|
12 |
class LLMModel:
|
13 |
def __init__(self):
|
14 |
+
self.model = None
|
15 |
self.tokenizer = None
|
16 |
self.eos_token_id = None
|
17 |
|
|
|
|
|
|
|
|
|
18 |
def setup(self, s_config, project_config, project_path):
|
19 |
try:
|
20 |
log("🧠 LLMModel setup() başladı")
|
|
|
28 |
log(f"📦 Hugging Face cloud modeli yükleniyor: {model_base}")
|
29 |
self.tokenizer = AutoTokenizer.from_pretrained(model_base, token=token, use_fast=False)
|
30 |
self.model = AutoModelForCausalLM.from_pretrained(model_base, token=token, torch_dtype=torch.float32).to(device)
|
31 |
+
else:
|
32 |
log(f"📦 Model indiriliyor veya yükleniyor: {model_base}")
|
33 |
self.tokenizer = AutoTokenizer.from_pretrained(model_base, use_fast=False)
|
34 |
self.model = AutoModelForCausalLM.from_pretrained(model_base, torch_dtype=torch.float32).to(device)
|
|
|
|
|
35 |
|
36 |
self.tokenizer.pad_token = self.tokenizer.pad_token or self.tokenizer.eos_token
|
37 |
self.model.config.pad_token_id = self.tokenizer.pad_token_id
|
|
|
43 |
log(f"❌ LLMModel setup() hatası: {e}")
|
44 |
traceback.print_exc()
|
45 |
|
46 |
+
async def generate_response_with_messages(self, messages, project_config):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
encodeds = self.tokenizer.apply_chat_template(messages, return_tensors="pt", add_generation_prompt=True)
|
48 |
input_ids = encodeds.to(self.model.device)
|
49 |
attention_mask = (input_ids != self.tokenizer.pad_token_id).long()
|
|
|
52 |
output = self.model.generate(
|
53 |
input_ids=input_ids,
|
54 |
attention_mask=attention_mask,
|
55 |
+
max_new_tokens=256,
|
56 |
do_sample=project_config["use_sampling"],
|
57 |
eos_token_id=self.eos_token_id,
|
58 |
pad_token_id=self.tokenizer.pad_token_id,
|
|
|
60 |
output_scores=True
|
61 |
)
|
62 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
63 |
decoded = self.tokenizer.decode(output.sequences[0], skip_special_tokens=True).strip()
|
64 |
+
return decoded
|
|
|
|
|
|
|
|
|
|