Spaces:
Paused
Paused
Ahmet Kaan Sever
commited on
Commit
·
9c25ebd
1
Parent(s):
771dabb
Distinguished bias mc and bias oe
Browse files
src/deepeval/bias_task.py
CHANGED
@@ -4,7 +4,7 @@ from deepeval.metrics import BiasMetric
|
|
4 |
from deepeval.test_case import LLMTestCase
|
5 |
from typing import Any
|
6 |
|
7 |
-
class
|
8 |
|
9 |
def __init__(self, model_name: str):
|
10 |
super().__init__("metunlp/sosyoloji_bias", model_name=model_name)
|
@@ -56,7 +56,7 @@ class BiasTask(BaseTask):
|
|
56 |
})
|
57 |
#Sum all scores in results and divide to nubmer of results
|
58 |
overallScore = (sum([result["score"] for result in results]) / len(results)) * 100
|
59 |
-
|
60 |
print(f"Total model time: {total_model_time} seconds")
|
61 |
print(f"Total judge time: {total_judge_time} seconds")
|
62 |
return {"results": overallScore}
|
|
|
4 |
from deepeval.test_case import LLMTestCase
|
5 |
from typing import Any
|
6 |
|
7 |
+
class BiasTaskOE(BaseTask):
|
8 |
|
9 |
def __init__(self, model_name: str):
|
10 |
super().__init__("metunlp/sosyoloji_bias", model_name=model_name)
|
|
|
56 |
})
|
57 |
#Sum all scores in results and divide to nubmer of results
|
58 |
overallScore = (sum([result["score"] for result in results]) / len(results)) * 100
|
59 |
+
|
60 |
print(f"Total model time: {total_model_time} seconds")
|
61 |
print(f"Total judge time: {total_judge_time} seconds")
|
62 |
return {"results": overallScore}
|
src/deepeval/deepeval_task_manager.py
CHANGED
@@ -7,7 +7,7 @@ from src.deepeval.commonsense_reasoning_task import CommonsenseReasoningTask
|
|
7 |
from src.deepeval.summarization_task import SummarizationTask
|
8 |
from src.deepeval.faithfulness_task import FaithfulnessTask
|
9 |
from src.deepeval.toxicity_task import ToxicityTask
|
10 |
-
from src.deepeval.bias_task import
|
11 |
from src.deepeval.instruction_following_task import InstructionFollowingTask
|
12 |
from src.deepeval.reading_comprehension_task import ReadingComprehensionTask
|
13 |
from src.deepeval.reading_comp_mc import ReadingComprehensionMCTask
|
@@ -119,7 +119,7 @@ class DeepEvalTaskManager:
|
|
119 |
return res
|
120 |
|
121 |
def sosyoloji_bias(self):
|
122 |
-
bias_task =
|
123 |
res = bias_task.evaluate()
|
124 |
return res
|
125 |
|
|
|
7 |
from src.deepeval.summarization_task import SummarizationTask
|
8 |
from src.deepeval.faithfulness_task import FaithfulnessTask
|
9 |
from src.deepeval.toxicity_task import ToxicityTask
|
10 |
+
from src.deepeval.bias_task import BiasTaskOE
|
11 |
from src.deepeval.instruction_following_task import InstructionFollowingTask
|
12 |
from src.deepeval.reading_comprehension_task import ReadingComprehensionTask
|
13 |
from src.deepeval.reading_comp_mc import ReadingComprehensionMCTask
|
|
|
119 |
return res
|
120 |
|
121 |
def sosyoloji_bias(self):
|
122 |
+
bias_task = BiasTaskOE(self.model_name)
|
123 |
res = bias_task.evaluate()
|
124 |
return res
|
125 |
|