aacengiz commited on
Commit
57be006
·
verified ·
1 Parent(s): ca54ffd

Update src/deepeval/deepeval_task_manager.py

Browse files
src/deepeval/deepeval_task_manager.py CHANGED
@@ -14,6 +14,13 @@ from src.deepeval.reading_comp_mc import ReadingComprehensionMCTask
14
  from src.deepeval.complex_reasoning import ComplexReasoningTask
15
  from src.deepeval.truthfulness_task import TruthfulnessTask
16
  from src.deepeval.nli import NLITask
 
 
 
 
 
 
 
17
  from typing import List
18
  from datetime import datetime
19
  load_dotenv()
@@ -34,6 +41,14 @@ class Task(Enum):
34
  COMPLEX_REASONING = "complex_reasoning"
35
  TRUTHFULNESS = "sosyoloji_truthfulness"
36
  NLI = "nli"
 
 
 
 
 
 
 
 
37
 
38
 
39
  class DeepEvalTaskManager:
@@ -143,6 +158,42 @@ class DeepEvalTaskManager:
143
  res = nli_task.evaluate()
144
  return res
145
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
146
  if __name__ == "__main__":
147
  des = DeepEvalTaskManager("google/gemma-2-2b-it", ["TOXICITY", "BIAS"])
148
  res = des.run_tasks()
 
14
  from src.deepeval.complex_reasoning import ComplexReasoningTask
15
  from src.deepeval.truthfulness_task import TruthfulnessTask
16
  from src.deepeval.nli import NLITask
17
+ from src.deepeval.math import MathTask
18
+ from src.deepeval.turkish_vocabulary import TurkishVocabularyTask
19
+ from src.deepeval.metaphors_and_idioms import MetaphorsAndIdiomsTask
20
+ from src.deepeval.topic_detection import TopicDetectionTask
21
+ from src.deepeval.sts import STSTask
22
+ from src.deepeval.mmlu import MMLUTask
23
+ from src.deepeval.bias import BiasTask
24
  from typing import List
25
  from datetime import datetime
26
  load_dotenv()
 
41
  COMPLEX_REASONING = "complex_reasoning"
42
  TRUTHFULNESS = "sosyoloji_truthfulness"
43
  NLI = "nli"
44
+ MATH = "math"
45
+ TURKISH_VOCABULARY = "turkish_vocabulary"
46
+ METAPHORS_AND_IDIOMS = "metaphors_and_idioms"
47
+ TOPIC_DETECTION = "topic_detection"
48
+ STS = "sts"
49
+ MMLU = "mmlu"
50
+ BIAS_MC = "bias"
51
+
52
 
53
 
54
  class DeepEvalTaskManager:
 
158
  res = nli_task.evaluate()
159
  return res
160
 
161
+ def math(self):
162
+ math_task = MathTask(self.model_name)
163
+ res = math_task.evaluate()
164
+ return res
165
+
166
+ def turkish_vocabulary(self):
167
+ turkish_vocabulary_task = TurkishVocabularyTask(self.model_name)
168
+ res = turkish_vocabulary_task.evaluate()
169
+ return res
170
+
171
+ def metaphors_and_idioms(self):
172
+ metaphors_and_idioms_task = MetaphorsAndIdiomsTask(self.model_name)
173
+ res = metaphors_and_idioms_task.evaluate()
174
+ return res
175
+
176
+ def topic_detection(self):
177
+ topic_detection_task = TopicDetectionTask(self.model_name)
178
+ res = topic_detection_task.evaluate()
179
+ return res
180
+
181
+ def sts(self):
182
+ sts_task = STSTask(self.model_name)
183
+ res = sts_task.evaluate()
184
+ return res
185
+
186
+ def mmlu(self):
187
+ mmlu_task = MMLUTask(self.model_name)
188
+ res = mmlu_task.evaluate()
189
+ return res
190
+
191
+ def bias(self):
192
+ bias_task = BiasTask(self.model_name)
193
+ res = bias_task.evaluate()
194
+ return res
195
+
196
+
197
  if __name__ == "__main__":
198
  des = DeepEvalTaskManager("google/gemma-2-2b-it", ["TOXICITY", "BIAS"])
199
  res = des.run_tasks()