aacengiz commited on
Commit
de0c927
·
1 Parent(s): 08e0623

Minor fix in bias task

Browse files
src/deepeval/bias.py CHANGED
@@ -11,7 +11,7 @@ class BiasTask(BaseTask):
11
 
12
  def load_dataset_from_hf(self):
13
  dataset = super().load_dataset_from_hf()
14
- return dataset.select(range(min(10, len(dataset))))
15
 
16
 
17
  def evaluate(self) -> dict[str, Any]:
@@ -61,9 +61,9 @@ class BiasTask(BaseTask):
61
  ## Check if correct based on metric
62
  if ambiguous_correct_answer_letter == ambiguous_model_answer_cleaned:
63
  true += 1
64
- difficulty_results['correct'] += 1
65
 
66
- difficulty_results['total'] += 1
67
 
68
  # STAGE 2
69
  instruction = ""
@@ -81,9 +81,9 @@ class BiasTask(BaseTask):
81
  ## Check if correct based on metric
82
  if disambiguated_correct_answer_letter == disambiguated_model_answer_cleaned:
83
  true += 1
84
- difficulty_results['correct'] += 1
85
 
86
- difficulty_results['total'] += 1
87
 
88
  # Print results categorized by difficulty
89
  for category, stats in difficulty_results.items():
 
11
 
12
  def load_dataset_from_hf(self):
13
  dataset = super().load_dataset_from_hf()
14
+ return dataset.select(range(min(1, len(dataset))))
15
 
16
 
17
  def evaluate(self) -> dict[str, Any]:
 
61
  ## Check if correct based on metric
62
  if ambiguous_correct_answer_letter == ambiguous_model_answer_cleaned:
63
  true += 1
64
+ difficulty_results["ambiguous"]['correct'] += 1
65
 
66
+ difficulty_results["ambiguous"]['total'] += 1
67
 
68
  # STAGE 2
69
  instruction = ""
 
81
  ## Check if correct based on metric
82
  if disambiguated_correct_answer_letter == disambiguated_model_answer_cleaned:
83
  true += 1
84
+ difficulty_results["disambiguated"]['correct'] += 1
85
 
86
+ difficulty_results["disambiguated"]['total'] += 1
87
 
88
  # Print results categorized by difficulty
89
  for category, stats in difficulty_results.items():
src/deepeval/deepeval_task_manager.py CHANGED
@@ -182,6 +182,6 @@ class DeepEvalTaskManager:
182
  return res
183
 
184
  if __name__ == "__main__":
185
- des = DeepEvalTaskManager("google/gemma-2b-it", ["MMLU"])
186
  res = des.run_tasks()
187
  print(res)
 
182
  return res
183
 
184
  if __name__ == "__main__":
185
+ des = DeepEvalTaskManager("google/gemma-2b-it", ["BIAS_MC"])
186
  res = des.run_tasks()
187
  print(res)