Spaces:

metunlp
/

gpu-backend

Paused

aacengiz commited on 20 days ago

Commit

de0c927

1 Parent(s): 08e0623

Minor fix in bias task

Files changed (2) hide show

src/deepeval/bias.py CHANGED Viewed

@@ -11,7 +11,7 @@ class BiasTask(BaseTask):
     def load_dataset_from_hf(self):
         dataset = super().load_dataset_from_hf()
-        return dataset.select(range(min(10, len(dataset))))
     def evaluate(self) -> dict[str, Any]:
@@ -61,9 +61,9 @@ class BiasTask(BaseTask):
             ## Check if correct based on metric
             if ambiguous_correct_answer_letter == ambiguous_model_answer_cleaned:
                 true += 1
-                difficulty_results['correct'] += 1
-            difficulty_results['total'] += 1
             # STAGE 2
             instruction = ""
@@ -81,9 +81,9 @@ class BiasTask(BaseTask):
             ## Check if correct based on metric
             if disambiguated_correct_answer_letter == disambiguated_model_answer_cleaned:
                 true += 1
-                difficulty_results['correct'] += 1
-            difficulty_results['total'] += 1
         # Print results categorized by difficulty
         for category, stats in difficulty_results.items():

     def load_dataset_from_hf(self):
         dataset = super().load_dataset_from_hf()
+        return dataset.select(range(min(1, len(dataset))))
     def evaluate(self) -> dict[str, Any]:
             ## Check if correct based on metric
             if ambiguous_correct_answer_letter == ambiguous_model_answer_cleaned:
                 true += 1
+                difficulty_results["ambiguous"]['correct'] += 1
+            difficulty_results["ambiguous"]['total'] += 1
             # STAGE 2
             instruction = ""
             ## Check if correct based on metric
             if disambiguated_correct_answer_letter == disambiguated_model_answer_cleaned:
                 true += 1
+                difficulty_results["disambiguated"]['correct'] += 1
+            difficulty_results["disambiguated"]['total'] += 1
         # Print results categorized by difficulty
         for category, stats in difficulty_results.items():

src/deepeval/deepeval_task_manager.py CHANGED Viewed

@@ -182,6 +182,6 @@ class DeepEvalTaskManager:
         return res
 if __name__ == "__main__":
-    des = DeepEvalTaskManager("google/gemma-2b-it", ["MMLU"])
     res = des.run_tasks()
     print(res)

         return res
 if __name__ == "__main__":
+    des = DeepEvalTaskManager("google/gemma-2b-it", ["BIAS_MC"])
     res = des.run_tasks()
     print(res)