Spaces:
Paused
Paused
Minor fix in bias task
Browse files
src/deepeval/bias.py
CHANGED
@@ -11,7 +11,7 @@ class BiasTask(BaseTask):
|
|
11 |
|
12 |
def load_dataset_from_hf(self):
|
13 |
dataset = super().load_dataset_from_hf()
|
14 |
-
return dataset.select(range(min(
|
15 |
|
16 |
|
17 |
def evaluate(self) -> dict[str, Any]:
|
@@ -61,9 +61,9 @@ class BiasTask(BaseTask):
|
|
61 |
## Check if correct based on metric
|
62 |
if ambiguous_correct_answer_letter == ambiguous_model_answer_cleaned:
|
63 |
true += 1
|
64 |
-
difficulty_results['correct'] += 1
|
65 |
|
66 |
-
difficulty_results['total'] += 1
|
67 |
|
68 |
# STAGE 2
|
69 |
instruction = ""
|
@@ -81,9 +81,9 @@ class BiasTask(BaseTask):
|
|
81 |
## Check if correct based on metric
|
82 |
if disambiguated_correct_answer_letter == disambiguated_model_answer_cleaned:
|
83 |
true += 1
|
84 |
-
difficulty_results['correct'] += 1
|
85 |
|
86 |
-
difficulty_results['total'] += 1
|
87 |
|
88 |
# Print results categorized by difficulty
|
89 |
for category, stats in difficulty_results.items():
|
|
|
11 |
|
12 |
def load_dataset_from_hf(self):
|
13 |
dataset = super().load_dataset_from_hf()
|
14 |
+
return dataset.select(range(min(1, len(dataset))))
|
15 |
|
16 |
|
17 |
def evaluate(self) -> dict[str, Any]:
|
|
|
61 |
## Check if correct based on metric
|
62 |
if ambiguous_correct_answer_letter == ambiguous_model_answer_cleaned:
|
63 |
true += 1
|
64 |
+
difficulty_results["ambiguous"]['correct'] += 1
|
65 |
|
66 |
+
difficulty_results["ambiguous"]['total'] += 1
|
67 |
|
68 |
# STAGE 2
|
69 |
instruction = ""
|
|
|
81 |
## Check if correct based on metric
|
82 |
if disambiguated_correct_answer_letter == disambiguated_model_answer_cleaned:
|
83 |
true += 1
|
84 |
+
difficulty_results["disambiguated"]['correct'] += 1
|
85 |
|
86 |
+
difficulty_results["disambiguated"]['total'] += 1
|
87 |
|
88 |
# Print results categorized by difficulty
|
89 |
for category, stats in difficulty_results.items():
|
src/deepeval/deepeval_task_manager.py
CHANGED
@@ -182,6 +182,6 @@ class DeepEvalTaskManager:
|
|
182 |
return res
|
183 |
|
184 |
if __name__ == "__main__":
|
185 |
-
des = DeepEvalTaskManager("google/gemma-2b-it", ["
|
186 |
res = des.run_tasks()
|
187 |
print(res)
|
|
|
182 |
return res
|
183 |
|
184 |
if __name__ == "__main__":
|
185 |
+
des = DeepEvalTaskManager("google/gemma-2b-it", ["BIAS_MC"])
|
186 |
res = des.run_tasks()
|
187 |
print(res)
|