ctoraman commited on
Commit
043565e
·
verified ·
1 Parent(s): 2094c7b

Update src/deepeval/base_task.py

Browse files
Files changed (1) hide show
  1. src/deepeval/base_task.py +275 -273
src/deepeval/base_task.py CHANGED
@@ -1,274 +1,276 @@
1
- from abc import ABC, abstractmethod
2
- from datasets import load_dataset
3
- import os
4
- from dotenv import load_dotenv
5
- import openai
6
- from peft import PeftModel
7
- from transformers import AutoModelForCausalLM, AutoTokenizer, LogitsProcessorList, LogitsProcessor
8
- import torch
9
- from typing import List
10
- from datetime import datetime
11
- load_dotenv()
12
- HF_TOKEN=os.getenv("HF_TOKEN")
13
- OPENAI_KEY = os.getenv("OPENAI_API_KEY")
14
-
15
- class BaseTask(ABC):
16
- _model_cache = {} # Class-level cache for models and tokenizers
17
-
18
- def __init__(self, dataset_repo, model_name):
19
- self.dataset_repo = dataset_repo
20
- self.dataset = self.load_dataset_from_hf()
21
-
22
- device_count = torch.cuda.device_count()
23
- if device_count > 1:
24
- self.device = "auto"
25
- print(f"Using {device_count} GPUs with auto config.")
26
- elif device_count == 1:
27
- self.device = "cuda"
28
- print(f"Using {device_count} GPU with cuda config.")
29
- else:
30
- self.device = "cpu"
31
- print("No GPU found. Using CPU.")
32
-
33
- self.model, self.tokenizer = self.get_cached_model(model_name, self.device)
34
- openai.api_key = OPENAI_KEY
35
-
36
-
37
- @classmethod
38
- def get_cached_model(cls, model_name, device):
39
- """Ensures the same model and tokenizer are used for every instance of subclasses."""
40
- if model_name not in cls._model_cache:
41
- cls._model_cache[model_name] = cls.load_model(model_name, device)
42
- return cls._model_cache[model_name]
43
-
44
- @staticmethod
45
- def load_model(model_name: str, device):
46
- """Loads model and tokenizer once and caches it."""
47
- print(f"Loading model: {model_name}")
48
- start_time = datetime.now()
49
- model = AutoModelForCausalLM.from_pretrained(
50
- model_name,
51
- torch_dtype=torch.float16,
52
- device_map=device,
53
- token=HF_TOKEN, # Replace with actual token
54
- )
55
- end_time = datetime.now()
56
- print(f"Model loaded in {(end_time - start_time).seconds} seconds.")
57
- print("Model loaded.")
58
- tokenizer = AutoTokenizer.from_pretrained(model_name)
59
- return model, tokenizer
60
-
61
- # @staticmethod
62
- # def load_model(model_name: str, device, weight, dtype, base_model):
63
- # """Loads model and tokenizer once and caches it."""
64
- # print(f"Loading model: {model_name}")
65
- # start_time = datetime.now()
66
- # if weight == "Adapter":
67
- # base_model_1 = AutoModelForCausalLM.from_pretrained(
68
- # base_model,
69
- # torch_dtype=dtype,
70
- # device_map=device,
71
- # token=HF_TOKEN, # Replace with actual token
72
- # )
73
- # model = PeftModel.from_pretrained(base_model_1, base_model)
74
- # tokenizer = AutoTokenizer.from_pretrained(base_model)
75
- # end_time = datetime.now()
76
- # else:
77
- # model = AutoModelForCausalLM.from_pretrained(
78
- # model_name,
79
- # torch_dtype=dtype,
80
- # device_map=device,
81
- # token=HF_TOKEN, # Replace with actual token
82
- # )
83
- # tokenizer = AutoTokenizer.from_pretrained(model_name)
84
- # end_time = datetime.now()
85
- # print(f"Model loaded in {(end_time - start_time).seconds} seconds.")
86
- # print("Model loaded.")
87
-
88
- # return model, tokenizer
89
-
90
-
91
- def generate_response_mcqa(self, msg, max_new_tokens=1, choices: List[str]=[]):
92
- # Ensure the tokenizer has a padding token
93
- if self.tokenizer.pad_token is None:
94
- self.tokenizer.pad_token = self.tokenizer.eos_token # Use EOS token as PAD token
95
-
96
- inputs = self.tokenizer(msg, return_tensors="pt", padding=True, truncation=True)
97
- input_ids = inputs.input_ids
98
- attention_mask = inputs.attention_mask
99
-
100
- if self.model.config.pad_token_id is None:
101
- self.model.config.pad_token_id = self.tokenizer.eos_token_id
102
-
103
- # Get token IDs for answer choices
104
- valid_answers = choices
105
- valid_token_ids = [self.tokenizer.convert_tokens_to_ids(ans) for ans in valid_answers]
106
-
107
- class MultipleChoiceLogitsProcessor:
108
- def __call__(self, input_ids, scores):
109
- mask = torch.full_like(scores, float("-inf"))
110
- mask[:, valid_token_ids] = scores[:, valid_token_ids] # Allow only valid tokens
111
- return mask
112
-
113
- logits_processor = LogitsProcessorList([MultipleChoiceLogitsProcessor()])
114
-
115
- output = self.model.generate(
116
- input_ids,
117
- attention_mask=attention_mask, # Fix: Pass attention_mask to avoid warning
118
- max_new_tokens=max_new_tokens,
119
- logits_processor=logits_processor
120
- )
121
- answer = self.tokenizer.decode(output[0][-1])
122
-
123
- return answer
124
-
125
- def generate_response_mcqa_multi_token(self, msg, max_new_tokens=2, choices: list = []):
126
- """
127
- Handles multiple-choice questions where answers might have multiple tokens.
128
- """
129
- # Ensure tokenizer has proper special tokens set
130
- if self.tokenizer.pad_token is None:
131
- self.tokenizer.pad_token = self.tokenizer.eos_token
132
-
133
- if self.model.config.pad_token_id is None:
134
- self.model.config.pad_token_id = self.tokenizer.pad_token_id
135
-
136
- chat = [
137
- {"role": "user", "content": "You are a multiple choice question-answering chatbot. Do not give an answer that is not included in the choices. Only answer with letters like A, B, C, D..."},
138
- {"role": "assistant", "content": "I am ready to answer your questions. Feel free to ask anything.\n"},
139
- {"role": "user", "content": f"{msg}"},
140
- ]
141
- formatted_chat = self.tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
142
- #print(formatted_chat)
143
- inputs = self.tokenizer(formatted_chat, return_tensors="pt", padding=True, truncation=True)
144
-
145
- if self.device == "auto":
146
- input_ids = inputs.input_ids
147
- attention_mask = inputs.attention_mask
148
- else:
149
- input_ids = inputs.input_ids.to(self.model.device)
150
- attention_mask = inputs.attention_mask.to(self.model.device)
151
-
152
- # Generate the sequence of letters starting from 'A'
153
- letters = [chr(ord('A') + i) for i in range(len(choices))] # Create option letters A, B, C, D, E, ...
154
- encoded_choices = [self.tokenizer.encode(letter, add_special_tokens=False) for letter in letters]
155
- flattened_encoded_choices = [item for sublist in encoded_choices for item in sublist] # Flatten the list
156
- #print(flattened_encoded_choices)
157
-
158
- allowed_tokens = flattened_encoded_choices
159
- allowed_tokens += self.get_chat_template_tokens() # Get the special chat tokens
160
- allowed_token_ids = set(allowed_tokens) # Ensure uniqueness
161
-
162
- # Custom LogitsProcessor to restrict generation
163
- class RestrictToABCDLogitsProcessor(LogitsProcessor):
164
- def __call__(self, input_ids, scores):
165
- mask = torch.full_like(scores, float("-inf")) # Block all tokens
166
- mask[:, list(allowed_token_ids)] = scores[:, list(allowed_token_ids)] # Allow only A, B, C, D tokens
167
- return mask
168
- logits_processor = LogitsProcessorList([RestrictToABCDLogitsProcessor()])
169
-
170
- # Generate response
171
- output = self.model.generate(
172
- input_ids,
173
- do_sample=True,
174
- attention_mask=attention_mask,
175
- max_new_tokens=max_new_tokens,
176
- eos_token_id=self.tokenizer.eos_token_id,
177
- pad_token_id=self.tokenizer.pad_token_id,
178
- temperature=0.4,
179
- logits_processor=logits_processor,
180
- )
181
- generated_ids = output[0] # The generated sequence including the prompt
182
- generated_tokens = generated_ids[len(input_ids[0]):] # Exclude the input_ids part
183
- generated_text = self.tokenizer.decode(generated_tokens, skip_special_tokens=True)
184
- return generated_text
185
-
186
- def generate_response(self, prompt: str, max_new_tokens: int = 100) -> str:
187
-
188
- if self.tokenizer.pad_token is None:
189
- self.tokenizer.pad_token = self.tokenizer.eos_token
190
-
191
- if self.model.config.pad_token_id is None:
192
- self.model.config.pad_token_id = self.tokenizer.eos_token_id
193
-
194
- chat = [
195
- {"role": "user", "content": "You are a helpful AI assistant."},
196
- {"role": "assistant", "content": "I am here to help you with any questions you may have."},
197
- {"role": "user", "content": prompt},
198
- ]
199
-
200
- formatted_chat = self.tokenizer.apply_chat_template(
201
- chat,
202
- tokenize=False,
203
- add_generation_prompt=True
204
- )
205
-
206
- inputs = self.tokenizer(formatted_chat, return_tensors="pt", padding=True, truncation=True)
207
-
208
- if self.device == "auto":
209
- input_ids = inputs.input_ids
210
- attention_mask = inputs.attention_mask
211
- else:
212
- input_ids = inputs.input_ids.to(self.model.device)
213
- attention_mask = inputs.attention_mask.to(self.model.device)
214
-
215
- output = self.model.generate(
216
- input_ids,
217
- attention_mask=attention_mask,
218
- max_new_tokens=max_new_tokens,
219
- do_sample=True,
220
- temperature=0.7,
221
- )
222
-
223
- generated_ids = output[0]
224
- prompt_len = input_ids.shape[1]
225
- generated_tokens = generated_ids[prompt_len:]
226
- result = self.tokenizer.decode(generated_tokens, skip_special_tokens=True)
227
- return result
228
-
229
- def get_chat_template_tokens(self):
230
- allowed_token_chat = [
231
- {"role": "user", "content": ""},
232
- {"role": "assistant", "content": ""}
233
- ]
234
- allowed_special_tokens = self.tokenizer.apply_chat_template(allowed_token_chat, tokenize=True)
235
- return allowed_special_tokens
236
-
237
- @abstractmethod
238
- def load_dataset_from_hf(self):
239
- """
240
- Define your own loading method if needed.
241
- :return: Dataset
242
- """
243
- print("Loading dataset from Hugging Face.")
244
- start_time = datetime.now()
245
- dataset= load_dataset(self.dataset_repo, token=HF_TOKEN, split="train")
246
- print("Dataset loaded.")
247
-
248
- # Load 50 from each dataset
249
- # if len(dataset) > 50:
250
- # dataset = dataset.shuffle(seed=42).select(range(50))
251
- end_time = datetime.now()
252
- print(f"Dataset loaded in {(end_time - start_time).seconds} seconds.")
253
- return dataset
254
-
255
- def load_dataset_lmjudge_from_hf(self):
256
- """
257
- Define your own loading method if needed.
258
- :return: Dataset
259
- """
260
- print("Loading dataset from Hugging Face.")
261
- start_time = datetime.now()
262
- dataset= load_dataset(self.dataset_repo, token=HF_TOKEN, split="train")
263
- print("Dataset loaded.")
264
-
265
- #Load 100 from each dataset
266
- if len(dataset) > 100:
267
- dataset = dataset.shuffle(seed=42).select(range(100))
268
- end_time = datetime.now()
269
- print(f"Dataset loaded in {(end_time - start_time).seconds} seconds.")
270
- return dataset
271
-
272
- @abstractmethod
273
- def evaluate(self):
 
 
274
  pass
 
1
+ from abc import ABC, abstractmethod
2
+ from datasets import load_dataset
3
+ import os
4
+ from dotenv import load_dotenv
5
+ import openai
6
+ from peft import PeftModel
7
+ from transformers import AutoModelForCausalLM, AutoTokenizer, LogitsProcessorList, LogitsProcessor
8
+ import torch
9
+ from typing import List
10
+ from datetime import datetime
11
+ load_dotenv()
12
+ HF_TOKEN=os.getenv("HF_TOKEN")
13
+ OPENAI_KEY = os.getenv("OPENAI_API_KEY")
14
+
15
+ class BaseTask(ABC):
16
+ _model_cache = {} # Class-level cache for models and tokenizers
17
+
18
+ def __init__(self, dataset_repo, model_name):
19
+ self.dataset_repo = dataset_repo
20
+ self.dataset = self.load_dataset_from_hf()
21
+
22
+ device_count = torch.cuda.device_count()
23
+ if device_count > 1:
24
+ self.device = "auto"
25
+ print(f"Using {device_count} GPUs with auto config.")
26
+ elif device_count == 1:
27
+ self.device = "cuda"
28
+ print(f"Using {device_count} GPU with cuda config.")
29
+ else:
30
+ self.device = "cpu"
31
+ print("No GPU found. Using CPU.")
32
+
33
+ self.model, self.tokenizer = self.get_cached_model(model_name, self.device)
34
+ openai.api_key = OPENAI_KEY
35
+
36
+
37
+ @classmethod
38
+ def get_cached_model(cls, model_name, device):
39
+ """Ensures the same model and tokenizer are used for every instance of subclasses."""
40
+ if model_name not in cls._model_cache:
41
+ cls._model_cache[model_name] = cls.load_model(model_name, device)
42
+ return cls._model_cache[model_name]
43
+
44
+ @staticmethod
45
+ def load_model(model_name: str, device):
46
+ """Loads model and tokenizer once and caches it."""
47
+ print(f"Loading model: {model_name}")
48
+ start_time = datetime.now()
49
+ model = AutoModelForCausalLM.from_pretrained(
50
+ model_name,
51
+ torch_dtype=torch.float16,
52
+ device_map=device,
53
+ token=HF_TOKEN, # Replace with actual token
54
+ )
55
+ end_time = datetime.now()
56
+ print(f"Model loaded in {(end_time - start_time).seconds} seconds.")
57
+ print("Model loaded.")
58
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
59
+ return model, tokenizer
60
+
61
+ # @staticmethod
62
+ # def load_model(model_name: str, device, weight, dtype, base_model):
63
+ # """Loads model and tokenizer once and caches it."""
64
+ # print(f"Loading model: {model_name}")
65
+ # start_time = datetime.now()
66
+ # if weight == "Adapter":
67
+ # base_model_1 = AutoModelForCausalLM.from_pretrained(
68
+ # base_model,
69
+ # torch_dtype=dtype,
70
+ # device_map=device,
71
+ # token=HF_TOKEN, # Replace with actual token
72
+ # )
73
+ # model = PeftModel.from_pretrained(base_model_1, base_model)
74
+ # tokenizer = AutoTokenizer.from_pretrained(base_model)
75
+ # end_time = datetime.now()
76
+ # else:
77
+ # model = AutoModelForCausalLM.from_pretrained(
78
+ # model_name,
79
+ # torch_dtype=dtype,
80
+ # device_map=device,
81
+ # token=HF_TOKEN, # Replace with actual token
82
+ # )
83
+ # tokenizer = AutoTokenizer.from_pretrained(model_name)
84
+ # end_time = datetime.now()
85
+ # print(f"Model loaded in {(end_time - start_time).seconds} seconds.")
86
+ # print("Model loaded.")
87
+
88
+ # return model, tokenizer
89
+
90
+
91
+ def generate_response_mcqa(self, msg, max_new_tokens=1, choices: List[str]=[]):
92
+ # Ensure the tokenizer has a padding token
93
+ if self.tokenizer.pad_token is None:
94
+ self.tokenizer.pad_token = self.tokenizer.eos_token # Use EOS token as PAD token
95
+
96
+ inputs = self.tokenizer(msg, return_tensors="pt", padding=True, truncation=True)
97
+ input_ids = inputs.input_ids
98
+ attention_mask = inputs.attention_mask
99
+
100
+ if self.model.config.pad_token_id is None:
101
+ self.model.config.pad_token_id = self.tokenizer.eos_token_id
102
+
103
+ # Get token IDs for answer choices
104
+ valid_answers = choices
105
+ valid_token_ids = [self.tokenizer.convert_tokens_to_ids(ans) for ans in valid_answers]
106
+
107
+ class MultipleChoiceLogitsProcessor:
108
+ def __call__(self, input_ids, scores):
109
+ mask = torch.full_like(scores, float("-inf"))
110
+ mask[:, valid_token_ids] = scores[:, valid_token_ids] # Allow only valid tokens
111
+ return mask
112
+
113
+ logits_processor = LogitsProcessorList([MultipleChoiceLogitsProcessor()])
114
+
115
+ output = self.model.generate(
116
+ input_ids,
117
+ attention_mask=attention_mask, # Fix: Pass attention_mask to avoid warning
118
+ max_new_tokens=max_new_tokens,
119
+ logits_processor=logits_processor
120
+ )
121
+ answer = self.tokenizer.decode(output[0][-1])
122
+
123
+ return answer
124
+
125
+ def generate_response_mcqa_multi_token(self, msg, max_new_tokens=2, choices: list = []):
126
+ """
127
+ Handles multiple-choice questions where answers might have multiple tokens.
128
+ """
129
+ # Ensure tokenizer has proper special tokens set
130
+ if self.tokenizer.pad_token is None:
131
+ self.tokenizer.pad_token = self.tokenizer.eos_token
132
+
133
+ if self.model.config.pad_token_id is None:
134
+ self.model.config.pad_token_id = self.tokenizer.pad_token_id
135
+
136
+ chat = [
137
+ {"role": "user", "content": "You are a multiple choice question-answering chatbot. Do not give an answer that is not included in the choices. Only answer with letters like A, B, C, D..."},
138
+ {"role": "assistant", "content": "I am ready to answer your questions. Feel free to ask anything.\n"},
139
+ {"role": "user", "content": f"{msg}"},
140
+ ]
141
+ formatted_chat = self.tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
142
+ #print(formatted_chat)
143
+ inputs = self.tokenizer(formatted_chat, return_tensors="pt", padding=True, truncation=True)
144
+
145
+ if self.device == "auto":
146
+ input_ids = inputs.input_ids
147
+ attention_mask = inputs.attention_mask
148
+ else:
149
+ input_ids = inputs.input_ids.to(self.model.device)
150
+ attention_mask = inputs.attention_mask.to(self.model.device)
151
+
152
+ # Generate the sequence of letters starting from 'A'
153
+ letters = [chr(ord('A') + i) for i in range(len(choices))] # Create option letters A, B, C, D, E, ...
154
+ encoded_choices = [self.tokenizer.encode(letter, add_special_tokens=False) for letter in letters]
155
+ flattened_encoded_choices = [item for sublist in encoded_choices for item in sublist] # Flatten the list
156
+ #print(flattened_encoded_choices)
157
+
158
+ allowed_tokens = flattened_encoded_choices
159
+ allowed_tokens += self.get_chat_template_tokens() # Get the special chat tokens
160
+ allowed_token_ids = set(allowed_tokens) # Ensure uniqueness
161
+
162
+ # Custom LogitsProcessor to restrict generation
163
+ class RestrictToABCDLogitsProcessor(LogitsProcessor):
164
+ def __call__(self, input_ids, scores):
165
+ mask = torch.full_like(scores, float("-inf")) # Block all tokens
166
+ mask[:, list(allowed_token_ids)] = scores[:, list(allowed_token_ids)] # Allow only A, B, C, D tokens
167
+ return mask
168
+ logits_processor = LogitsProcessorList([RestrictToABCDLogitsProcessor()])
169
+
170
+ # Generate response
171
+ output = self.model.generate(
172
+ input_ids,
173
+ do_sample=True,
174
+ attention_mask=attention_mask,
175
+ max_new_tokens=max_new_tokens,
176
+ eos_token_id=self.tokenizer.eos_token_id,
177
+ pad_token_id=self.tokenizer.pad_token_id,
178
+ temperature=0.4,
179
+ logits_processor=logits_processor,
180
+ )
181
+ generated_ids = output[0] # The generated sequence including the prompt
182
+ generated_tokens = generated_ids[len(input_ids[0]):] # Exclude the input_ids part
183
+ generated_text = self.tokenizer.decode(generated_tokens, skip_special_tokens=True)
184
+ return generated_text
185
+
186
+ def generate_response(self, prompt: str, max_new_tokens: int = 100) -> str:
187
+
188
+ if self.tokenizer.pad_token is None:
189
+ self.tokenizer.pad_token = self.tokenizer.eos_token
190
+
191
+ if self.model.config.pad_token_id is None:
192
+ self.model.config.pad_token_id = self.tokenizer.eos_token_id
193
+
194
+ chat = [
195
+ {"role": "user", "content": "You are a helpful AI assistant."},
196
+ {"role": "assistant", "content": "I am here to help you with any questions you may have."},
197
+ {"role": "user", "content": prompt},
198
+ ]
199
+
200
+ formatted_chat = self.tokenizer.apply_chat_template(
201
+ chat,
202
+ tokenize=False,
203
+ add_generation_prompt=True
204
+ )
205
+
206
+ inputs = self.tokenizer(formatted_chat, return_tensors="pt", padding=True, truncation=True)
207
+
208
+ if self.device == "auto":
209
+ input_ids = inputs.input_ids
210
+ attention_mask = inputs.attention_mask
211
+ else:
212
+ input_ids = inputs.input_ids.to(self.model.device)
213
+ attention_mask = inputs.attention_mask.to(self.model.device)
214
+
215
+ output = self.model.generate(
216
+ input_ids,
217
+ attention_mask=attention_mask,
218
+ max_new_tokens=max_new_tokens,
219
+ do_sample=True,
220
+ temperature=0.7,
221
+ )
222
+
223
+ generated_ids = output[0]
224
+ prompt_len = input_ids.shape[1]
225
+ generated_tokens = generated_ids[prompt_len:]
226
+ result = self.tokenizer.decode(generated_tokens, skip_special_tokens=True)
227
+ return result
228
+
229
+ def get_chat_template_tokens(self):
230
+ allowed_token_chat = [
231
+ {"role": "user", "content": ""},
232
+ {"role": "assistant", "content": ""}
233
+ ]
234
+ allowed_special_tokens = self.tokenizer.apply_chat_template(allowed_token_chat, tokenize=True)
235
+ return allowed_special_tokens
236
+
237
+ @abstractmethod
238
+ def load_dataset_from_hf(self):
239
+ """
240
+ Define your own loading method if needed.
241
+ :return: Dataset
242
+ """
243
+ print("Loading dataset from Hugging Face.")
244
+ start_time = datetime.now()
245
+ dataset= load_dataset(self.dataset_repo, token=HF_TOKEN, split="train")
246
+ print("Dataset loaded.")
247
+
248
+ # Load 50 from each dataset
249
+ mcqa_sample_size = 3
250
+ if len(dataset) > mcqa_sample_size:
251
+ dataset = dataset.shuffle(seed=42).select(range(mcqa_sample_size))
252
+ end_time = datetime.now()
253
+ print(f"Dataset loaded in {(end_time - start_time).seconds} seconds.")
254
+ return dataset
255
+
256
+ def load_dataset_lmjudge_from_hf(self):
257
+ """
258
+ Define your own loading method if needed.
259
+ :return: Dataset
260
+ """
261
+ print("Loading dataset from Hugging Face.")
262
+ start_time = datetime.now()
263
+ dataset= load_dataset(self.dataset_repo, token=HF_TOKEN, split="train")
264
+ print("Dataset loaded.")
265
+
266
+ #Load 100 from each dataset
267
+ llmjudge_sample_size = 3
268
+ if len(dataset) > llmjudge_sample_size:
269
+ dataset = dataset.shuffle(seed=42).select(range(llmjudge_sample_size))
270
+ end_time = datetime.now()
271
+ print(f"Dataset loaded in {(end_time - start_time).seconds} seconds.")
272
+ return dataset
273
+
274
+ @abstractmethod
275
+ def evaluate(self):
276
  pass