Ahmet Kaan Sever commited on
Commit
d3c5563
·
2 Parent(s): 5912286 79a1b57

Merge branch 'main' into aysu

Browse files
src/deepeval/base_task.py CHANGED
@@ -1,171 +1,176 @@
1
- from abc import ABC, abstractmethod
2
- from datasets import load_dataset
3
- import os
4
- from dotenv import load_dotenv
5
- from transformers import AutoModelForCausalLM, AutoTokenizer, LogitsProcessorList, LogitsProcessor
6
- import torch
7
- from typing import List
8
- load_dotenv()
9
- HF_TOKEN=os.getenv("HF_TOKEN")
10
-
11
- class BaseTask(ABC):
12
- _model_cache = {} # Class-level cache for models and tokenizers
13
-
14
- def __init__(self, dataset_repo, model_name):
15
- self.dataset_repo = dataset_repo
16
- self.dataset = self.load_dataset_from_hf()
17
- self.device = "cuda:0" if torch.cuda.is_available() else "cpu"
18
- self.model, self.tokenizer = self.get_cached_model(model_name, self.device)
19
-
20
-
21
- @classmethod
22
- def get_cached_model(cls, model_name, device):
23
- """Ensures the same model and tokenizer are used for every instance of subclasses."""
24
- if model_name not in cls._model_cache:
25
- cls._model_cache[model_name] = cls.load_model(model_name, device)
26
- return cls._model_cache[model_name]
27
-
28
- @staticmethod
29
- def load_model(model_name: str, device):
30
- """Loads model and tokenizer once and caches it."""
31
- model = AutoModelForCausalLM.from_pretrained(
32
- model_name,
33
- torch_dtype=torch.float16,
34
- device_map=device,
35
- token=HF_TOKEN, # Replace with actual token
36
- )
37
- tokenizer = AutoTokenizer.from_pretrained(model_name)
38
- return model, tokenizer
39
-
40
-
41
- def generate_response_mcqa(self, msg, max_new_tokens=1, choices: List[str]=[]):
42
- # Ensure the tokenizer has a padding token
43
- if self.tokenizer.pad_token is None:
44
- self.tokenizer.pad_token = self.tokenizer.eos_token # Use EOS token as PAD token
45
-
46
- inputs = self.tokenizer(msg, return_tensors="pt", padding=True, truncation=True)
47
- input_ids = inputs.input_ids.to(self.model.device)
48
- attention_mask = inputs.attention_mask.to(self.model.device)
49
-
50
- if self.model.config.pad_token_id is None:
51
- self.model.config.pad_token_id = self.tokenizer.eos_token_id
52
-
53
- # Get token IDs for answer choices
54
- valid_answers = choices
55
- valid_token_ids = [self.tokenizer.convert_tokens_to_ids(ans) for ans in valid_answers]
56
-
57
- class MultipleChoiceLogitsProcessor:
58
- def __call__(self, input_ids, scores):
59
- mask = torch.full_like(scores, float("-inf"))
60
- mask[:, valid_token_ids] = scores[:, valid_token_ids] # Allow only valid tokens
61
- return mask
62
-
63
- logits_processor = LogitsProcessorList([MultipleChoiceLogitsProcessor()])
64
-
65
- output = self.model.generate(
66
- input_ids,
67
- attention_mask=attention_mask, # Fix: Pass attention_mask to avoid warning
68
- max_new_tokens=max_new_tokens,
69
- logits_processor=logits_processor
70
- )
71
- answer = self.tokenizer.decode(output[0][-1])
72
-
73
- return answer
74
-
75
- def generate_response_mcqa_multi_token(self, msg, max_new_tokens=5, choices: list = []):
76
- """
77
- Handles multiple-choice questions where answers might have multiple tokens.
78
- """
79
- # Ensure tokenizer has proper special tokens set
80
- if self.tokenizer.pad_token is None:
81
- self.tokenizer.pad_token = self.tokenizer.eos_token
82
-
83
- if self.model.config.pad_token_id is None:
84
- self.model.config.pad_token_id = self.tokenizer.pad_token_id
85
-
86
- chat = [
87
- {"role": "user", "content": "You are a multiple choice question-answering chatbot. Do not give an answer that is not included in the choices. Only answer with letters like A, B, C, D..."},
88
- {"role": "assistant", "content": "I am ready to answer your questions. Feel free to ask anything.\n"},
89
- {"role": "user", "content": f"{msg}"},
90
- ]
91
- formatted_chat = self.tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
92
- print(formatted_chat)
93
- inputs = self.tokenizer(formatted_chat, return_tensors="pt", padding=True, truncation=True)
94
- input_ids = inputs.input_ids.to(self.model.device)
95
- attention_mask = inputs.attention_mask.to(self.model.device)
96
-
97
- # Generate the sequence of letters starting from 'A'
98
- letters = [chr(ord('A') + i) for i in range(len(choices))] # Create option letters A, B, C, D, E, ...
99
- encoded_choices = [self.tokenizer.encode(letter, add_special_tokens=False) for letter in letters]
100
- flattened_encoded_choices = [item for sublist in encoded_choices for item in sublist] # Flatten the list
101
- print(flattened_encoded_choices)
102
-
103
- allowed_tokens = flattened_encoded_choices
104
- allowed_tokens += self.get_chat_template_tokens() # Get the special chat tokens
105
- allowed_token_ids = set(allowed_tokens) # Ensure uniqueness
106
-
107
- # Custom LogitsProcessor to restrict generation
108
- class RestrictToABCDLogitsProcessor(LogitsProcessor):
109
- def __call__(self, input_ids, scores):
110
- mask = torch.full_like(scores, float("-inf")) # Block all tokens
111
- mask[:, list(allowed_token_ids)] = scores[:, list(allowed_token_ids)] # Allow only A, B, C, D tokens
112
- return mask
113
- logits_processor = LogitsProcessorList([RestrictToABCDLogitsProcessor()])
114
-
115
- # Generate response
116
- output = self.model.generate(
117
- input_ids,
118
- do_sample=True,
119
- attention_mask=attention_mask,
120
- max_new_tokens=max_new_tokens,
121
- eos_token_id=self.tokenizer.eos_token_id,
122
- pad_token_id=self.tokenizer.pad_token_id,
123
- temperature=0.4,
124
- logits_processor=logits_processor,
125
- )
126
- generated_ids = output[0] # The generated sequence including the prompt
127
- generated_tokens = generated_ids[len(input_ids[0]):] # Exclude the input_ids part
128
- generated_text = self.tokenizer.decode(generated_tokens, skip_special_tokens=True)
129
- return generated_text
130
-
131
- def generate_response(self, prompt: str, max_new_tokens: int = 100) -> str:
132
-
133
- if self.tokenizer.pad_token is None:
134
- self.tokenizer.pad_token = self.tokenizer.eos_token
135
-
136
- inputs = self.tokenizer(prompt, return_tensors="pt", padding=True, truncation=True)
137
- input_ids = inputs.input_ids.to(self.model.device)
138
- attention_mask = inputs.attention_mask.to(self.model.device)
139
-
140
- if self.model.config.pad_token_id is None:
141
- self.model.config.pad_token_id = self.tokenizer.eos_token_id
142
-
143
- output = self.model.generate(
144
- input_ids,
145
- attention_mask=attention_mask,
146
- max_new_tokens=max_new_tokens,
147
- do_sample=True,
148
- temperature=0.7,
149
- )
150
- result = self.tokenizer.decode(output[0], skip_special_tokens=True)
151
- return result
152
-
153
- def get_chat_template_tokens(self):
154
- allowed_token_chat = [
155
- {"role": "user", "content": ""},
156
- {"role": "assistant", "content": ""}
157
- ]
158
- allowed_special_tokens = self.tokenizer.apply_chat_template(allowed_token_chat, tokenize=True)
159
- return allowed_special_tokens
160
-
161
- @abstractmethod
162
- def load_dataset_from_hf(self):
163
- """
164
- Define your own loading method if needed.
165
- :return: Dataset
166
- """
167
- return load_dataset(self.dataset_repo, token=HF_TOKEN, split="train")
168
-
169
- @abstractmethod
170
- def evaluate(self):
 
 
 
 
 
171
  pass
 
1
+ from abc import ABC, abstractmethod
2
+ from datasets import load_dataset
3
+ import os
4
+ from dotenv import load_dotenv
5
+ from transformers import AutoModelForCausalLM, AutoTokenizer, LogitsProcessorList, LogitsProcessor
6
+ import torch
7
+ from typing import List
8
+ load_dotenv()
9
+ HF_TOKEN=os.getenv("HF_TOKEN")
10
+
11
+ class BaseTask(ABC):
12
+ _model_cache = {} # Class-level cache for models and tokenizers
13
+
14
+ def __init__(self, dataset_repo, model_name):
15
+ self.dataset_repo = dataset_repo
16
+ self.dataset = self.load_dataset_from_hf()
17
+ self.device = "cuda" if torch.cuda.is_available() else "cpu"
18
+ self.model, self.tokenizer = self.get_cached_model(model_name, self.device)
19
+
20
+
21
+ @classmethod
22
+ def get_cached_model(cls, model_name, device):
23
+ """Ensures the same model and tokenizer are used for every instance of subclasses."""
24
+ if model_name not in cls._model_cache:
25
+ cls._model_cache[model_name] = cls.load_model(model_name, device)
26
+ return cls._model_cache[model_name]
27
+
28
+ @staticmethod
29
+ def load_model(model_name: str, device):
30
+ """Loads model and tokenizer once and caches it."""
31
+ print(f"Loading model: {model_name}")
32
+ model = AutoModelForCausalLM.from_pretrained(
33
+ model_name,
34
+ torch_dtype=torch.float16,
35
+ device_map=device,
36
+ token=HF_TOKEN, # Replace with actual token
37
+ )
38
+ print("Model loaded.")
39
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
40
+ return model, tokenizer
41
+
42
+
43
+ def generate_response_mcqa(self, msg, max_new_tokens=1, choices: List[str]=[]):
44
+ # Ensure the tokenizer has a padding token
45
+ if self.tokenizer.pad_token is None:
46
+ self.tokenizer.pad_token = self.tokenizer.eos_token # Use EOS token as PAD token
47
+
48
+ inputs = self.tokenizer(msg, return_tensors="pt", padding=True, truncation=True)
49
+ input_ids = inputs.input_ids.to(self.model.device)
50
+ attention_mask = inputs.attention_mask.to(self.model.device)
51
+
52
+ if self.model.config.pad_token_id is None:
53
+ self.model.config.pad_token_id = self.tokenizer.eos_token_id
54
+
55
+ # Get token IDs for answer choices
56
+ valid_answers = choices
57
+ valid_token_ids = [self.tokenizer.convert_tokens_to_ids(ans) for ans in valid_answers]
58
+
59
+ class MultipleChoiceLogitsProcessor:
60
+ def __call__(self, input_ids, scores):
61
+ mask = torch.full_like(scores, float("-inf"))
62
+ mask[:, valid_token_ids] = scores[:, valid_token_ids] # Allow only valid tokens
63
+ return mask
64
+
65
+ logits_processor = LogitsProcessorList([MultipleChoiceLogitsProcessor()])
66
+
67
+ output = self.model.generate(
68
+ input_ids,
69
+ attention_mask=attention_mask, # Fix: Pass attention_mask to avoid warning
70
+ max_new_tokens=max_new_tokens,
71
+ logits_processor=logits_processor
72
+ )
73
+ answer = self.tokenizer.decode(output[0][-1])
74
+
75
+ return answer
76
+
77
+ def generate_response_mcqa_multi_token(self, msg, max_new_tokens=5, choices: list = []):
78
+ """
79
+ Handles multiple-choice questions where answers might have multiple tokens.
80
+ """
81
+ # Ensure tokenizer has proper special tokens set
82
+ if self.tokenizer.pad_token is None:
83
+ self.tokenizer.pad_token = self.tokenizer.eos_token
84
+
85
+ if self.model.config.pad_token_id is None:
86
+ self.model.config.pad_token_id = self.tokenizer.pad_token_id
87
+
88
+ chat = [
89
+ {"role": "user", "content": "You are a multiple choice question-answering chatbot. Do not give an answer that is not included in the choices. Only answer with letters like A, B, C, D..."},
90
+ {"role": "assistant", "content": "I am ready to answer your questions. Feel free to ask anything.\n"},
91
+ {"role": "user", "content": f"{msg}"},
92
+ ]
93
+ formatted_chat = self.tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
94
+ print(formatted_chat)
95
+ inputs = self.tokenizer(formatted_chat, return_tensors="pt", padding=True, truncation=True)
96
+ input_ids = inputs.input_ids.to(self.model.device)
97
+ attention_mask = inputs.attention_mask.to(self.model.device)
98
+
99
+ # Generate the sequence of letters starting from 'A'
100
+ letters = [chr(ord('A') + i) for i in range(len(choices))] # Create option letters A, B, C, D, E, ...
101
+ encoded_choices = [self.tokenizer.encode(letter, add_special_tokens=False) for letter in letters]
102
+ flattened_encoded_choices = [item for sublist in encoded_choices for item in sublist] # Flatten the list
103
+ print(flattened_encoded_choices)
104
+
105
+ allowed_tokens = flattened_encoded_choices
106
+ allowed_tokens += self.get_chat_template_tokens() # Get the special chat tokens
107
+ allowed_token_ids = set(allowed_tokens) # Ensure uniqueness
108
+
109
+ # Custom LogitsProcessor to restrict generation
110
+ class RestrictToABCDLogitsProcessor(LogitsProcessor):
111
+ def __call__(self, input_ids, scores):
112
+ mask = torch.full_like(scores, float("-inf")) # Block all tokens
113
+ mask[:, list(allowed_token_ids)] = scores[:, list(allowed_token_ids)] # Allow only A, B, C, D tokens
114
+ return mask
115
+ logits_processor = LogitsProcessorList([RestrictToABCDLogitsProcessor()])
116
+
117
+ # Generate response
118
+ output = self.model.generate(
119
+ input_ids,
120
+ do_sample=True,
121
+ attention_mask=attention_mask,
122
+ max_new_tokens=max_new_tokens,
123
+ eos_token_id=self.tokenizer.eos_token_id,
124
+ pad_token_id=self.tokenizer.pad_token_id,
125
+ temperature=0.4,
126
+ logits_processor=logits_processor,
127
+ )
128
+ generated_ids = output[0] # The generated sequence including the prompt
129
+ generated_tokens = generated_ids[len(input_ids[0]):] # Exclude the input_ids part
130
+ generated_text = self.tokenizer.decode(generated_tokens, skip_special_tokens=True)
131
+ return generated_text
132
+
133
+ def generate_response(self, prompt: str, max_new_tokens: int = 100) -> str:
134
+
135
+ if self.tokenizer.pad_token is None:
136
+ self.tokenizer.pad_token = self.tokenizer.eos_token
137
+
138
+ inputs = self.tokenizer(prompt, return_tensors="pt", padding=True, truncation=True)
139
+ input_ids = inputs.input_ids.to(self.model.device)
140
+ attention_mask = inputs.attention_mask.to(self.model.device)
141
+
142
+ if self.model.config.pad_token_id is None:
143
+ self.model.config.pad_token_id = self.tokenizer.eos_token_id
144
+
145
+ output = self.model.generate(
146
+ input_ids,
147
+ attention_mask=attention_mask,
148
+ max_new_tokens=max_new_tokens,
149
+ do_sample=True,
150
+ temperature=0.7,
151
+ )
152
+ result = self.tokenizer.decode(output[0], skip_special_tokens=True)
153
+ return result
154
+
155
+ def get_chat_template_tokens(self):
156
+ allowed_token_chat = [
157
+ {"role": "user", "content": ""},
158
+ {"role": "assistant", "content": ""}
159
+ ]
160
+ allowed_special_tokens = self.tokenizer.apply_chat_template(allowed_token_chat, tokenize=True)
161
+ return allowed_special_tokens
162
+
163
+ @abstractmethod
164
+ def load_dataset_from_hf(self):
165
+ """
166
+ Define your own loading method if needed.
167
+ :return: Dataset
168
+ """
169
+ print("Loading dataset from Hugging Face.")
170
+ dataset= load_dataset(self.dataset_repo, token=HF_TOKEN, split="train")
171
+ print("Dataset loaded.")
172
+ return dataset
173
+
174
+ @abstractmethod
175
+ def evaluate(self):
176
  pass
src/deepeval/turkish_general_knowledge_task.py CHANGED
@@ -42,7 +42,7 @@ class TurkishGeneralKnowledgeTask(BaseTask):
42
 
43
  #"""Wrap the result between final_answer tags. For example: <final_answer/> letter <final_answer>.
44
  #"""
45
- model_answer = self.generate_response_mcqa_multi_token(message, choices=choices, max_new_tokens=30)
46
  responses.append(model_answer)
47
  print(f"Correct Answer: {choices[answer_index]}")
48
  print(f"Model Answer: {model_answer}")
 
42
 
43
  #"""Wrap the result between final_answer tags. For example: <final_answer/> letter <final_answer>.
44
  #"""
45
+ model_answer = self.generate_response_mcqa_multi_token(message, choices=choices, max_new_tokens=2)
46
  responses.append(model_answer)
47
  print(f"Correct Answer: {choices[answer_index]}")
48
  print(f"Model Answer: {model_answer}")
svc/router.py CHANGED
@@ -10,6 +10,7 @@ import os
10
  import json
11
  from src.deepeval.deepeval_task_manager import DeepEvalTaskManager
12
  import torch
 
13
  from time import time
14
  from huggingface_hub import HfApi, ModelInfo
15
 
@@ -111,9 +112,15 @@ async def deep_eval_suite(request: DeepEvalSuiteRequest):
111
  "end_time": end_time
112
  }
113
 
114
-
115
  json_results = json.dumps(tbr_dict)
116
 
 
 
 
 
 
 
 
117
  return TaskResponse(results=json_results)
118
 
119
 
 
10
  import json
11
  from src.deepeval.deepeval_task_manager import DeepEvalTaskManager
12
  import torch
13
+ import gc
14
  from time import time
15
  from huggingface_hub import HfApi, ModelInfo
16
 
 
112
  "end_time": end_time
113
  }
114
 
 
115
  json_results = json.dumps(tbr_dict)
116
 
117
+ #Free up VRAM
118
+ torch.cuda.empty_cache()
119
+
120
+ #Free up RAM
121
+ des = None
122
+ gc.collect()
123
+
124
  return TaskResponse(results=json_results)
125
 
126