Spaces:
Sleeping
Sleeping
customize the data count
Browse files- src/deepeval/base_task.py +20 -2
- src/deepeval/bias.py +6 -0
- src/deepeval/faithfulness_task.py +2 -2
- src/deepeval/instruction_following_task.py +2 -2
- src/deepeval/reading_comprehension_task.py +2 -2
- src/deepeval/summarization_task.py +2 -2
- src/deepeval/toxicity_task.py +2 -2
- src/deepeval/truthfulness_task.py +2 -2
src/deepeval/base_task.py
CHANGED
@@ -246,8 +246,26 @@ class BaseTask(ABC):
|
|
246 |
print("Dataset loaded.")
|
247 |
|
248 |
# Load 50 from each dataset
|
249 |
-
if len(dataset) >
|
250 |
-
dataset = dataset.shuffle(seed=42).select(range(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
251 |
end_time = datetime.now()
|
252 |
print(f"Dataset loaded in {(end_time - start_time).seconds} seconds.")
|
253 |
return dataset
|
|
|
246 |
print("Dataset loaded.")
|
247 |
|
248 |
# Load 50 from each dataset
|
249 |
+
if len(dataset) > 100:
|
250 |
+
dataset = dataset.shuffle(seed=42).select(range(100))
|
251 |
+
end_time = datetime.now()
|
252 |
+
print(f"Dataset loaded in {(end_time - start_time).seconds} seconds.")
|
253 |
+
return dataset
|
254 |
+
|
255 |
+
@abstractmethod
|
256 |
+
def load_dataset_lmjudge_from_hf(self):
|
257 |
+
"""
|
258 |
+
Define your own loading method if needed.
|
259 |
+
:return: Dataset
|
260 |
+
"""
|
261 |
+
print("Loading dataset from Hugging Face.")
|
262 |
+
start_time = datetime.now()
|
263 |
+
dataset= load_dataset(self.dataset_repo, token=HF_TOKEN, split="train")
|
264 |
+
print("Dataset loaded.")
|
265 |
+
|
266 |
+
# Load 50 from each dataset
|
267 |
+
if len(dataset) > 10:
|
268 |
+
dataset = dataset.shuffle(seed=42).select(range(10))
|
269 |
end_time = datetime.now()
|
270 |
print(f"Dataset loaded in {(end_time - start_time).seconds} seconds.")
|
271 |
return dataset
|
src/deepeval/bias.py
CHANGED
@@ -9,9 +9,15 @@ class BiasTask(BaseTask):
|
|
9 |
def __init__(self, model_name):
|
10 |
super().__init__("metunlp/sosyoloji_bias", model_name=model_name)
|
11 |
|
|
|
12 |
def load_dataset_from_hf(self):
|
13 |
dataset = super().load_dataset_from_hf()
|
14 |
return dataset
|
|
|
|
|
|
|
|
|
|
|
15 |
|
16 |
|
17 |
def evaluate(self) -> dict[str, Any]:
|
|
|
9 |
def __init__(self, model_name):
|
10 |
super().__init__("metunlp/sosyoloji_bias", model_name=model_name)
|
11 |
|
12 |
+
<<<<<<< HEAD
|
13 |
def load_dataset_from_hf(self):
|
14 |
dataset = super().load_dataset_from_hf()
|
15 |
return dataset
|
16 |
+
=======
|
17 |
+
def load_dataset_lmjudge_from_hf(self):
|
18 |
+
dataset = super().load_dataset_lmjudge_from_hf()
|
19 |
+
return dataset.select(range(min(1, len(dataset))))
|
20 |
+
>>>>>>> 2dca79c (customize the data count)
|
21 |
|
22 |
|
23 |
def evaluate(self) -> dict[str, Any]:
|
src/deepeval/faithfulness_task.py
CHANGED
@@ -8,8 +8,8 @@ class FaithfulnessTask(BaseTask):
|
|
8 |
def __init__(self, model_name: str):
|
9 |
super().__init__("metunlp/sosyoloji_faithfulness", model_name=model_name)
|
10 |
|
11 |
-
def
|
12 |
-
dataset = super().
|
13 |
return dataset
|
14 |
|
15 |
def evaluate(self) -> dict[str, Any]:
|
|
|
8 |
def __init__(self, model_name: str):
|
9 |
super().__init__("metunlp/sosyoloji_faithfulness", model_name=model_name)
|
10 |
|
11 |
+
def load_dataset_lmjudge_from_hf(self):
|
12 |
+
dataset = super().load_dataset_lmjudge_from_hf()
|
13 |
return dataset
|
14 |
|
15 |
def evaluate(self) -> dict[str, Any]:
|
src/deepeval/instruction_following_task.py
CHANGED
@@ -9,8 +9,8 @@ class InstructionFollowingTask(BaseTask):
|
|
9 |
def __init__(self, model_name: str):
|
10 |
super().__init__("metunlp/instruction_following_tr", model_name=model_name)
|
11 |
|
12 |
-
def
|
13 |
-
dataset = super().
|
14 |
return dataset
|
15 |
|
16 |
def evaluate(self) -> dict[str, Any]:
|
|
|
9 |
def __init__(self, model_name: str):
|
10 |
super().__init__("metunlp/instruction_following_tr", model_name=model_name)
|
11 |
|
12 |
+
def load_dataset_lmjudge_from_hf(self):
|
13 |
+
dataset = super().load_dataset_lmjudge_from_hf()
|
14 |
return dataset
|
15 |
|
16 |
def evaluate(self) -> dict[str, Any]:
|
src/deepeval/reading_comprehension_task.py
CHANGED
@@ -27,8 +27,8 @@ class ReadingComprehensionTask(BaseTask):
|
|
27 |
],
|
28 |
)
|
29 |
|
30 |
-
def
|
31 |
-
dataset = super().
|
32 |
return dataset
|
33 |
|
34 |
def evaluate(self) -> dict[str, Any]:
|
|
|
27 |
],
|
28 |
)
|
29 |
|
30 |
+
def load_dataset_lmjudge_from_hf(self):
|
31 |
+
dataset = super().load_dataset_lmjudge_from_hf()
|
32 |
return dataset
|
33 |
|
34 |
def evaluate(self) -> dict[str, Any]:
|
src/deepeval/summarization_task.py
CHANGED
@@ -8,8 +8,8 @@ class SummarizationTask(BaseTask):
|
|
8 |
def __init__(self, model_name: str):
|
9 |
super().__init__("metunlp/summarization_tr", model_name=model_name)
|
10 |
|
11 |
-
def
|
12 |
-
dataset = super().
|
13 |
return dataset
|
14 |
|
15 |
def evaluate(self) -> dict[str, Any]:
|
|
|
8 |
def __init__(self, model_name: str):
|
9 |
super().__init__("metunlp/summarization_tr", model_name=model_name)
|
10 |
|
11 |
+
def load_dataset_lmjudge_from_hf(self):
|
12 |
+
dataset = super().load_dataset_lmjudge_from_hf()
|
13 |
return dataset
|
14 |
|
15 |
def evaluate(self) -> dict[str, Any]:
|
src/deepeval/toxicity_task.py
CHANGED
@@ -8,8 +8,8 @@ class ToxicityTask(BaseTask):
|
|
8 |
def __init__(self, model_name: str):
|
9 |
super().__init__("metunlp/sosyoloji_toxicity", model_name=model_name)
|
10 |
|
11 |
-
def
|
12 |
-
dataset = super().
|
13 |
return dataset
|
14 |
|
15 |
|
|
|
8 |
def __init__(self, model_name: str):
|
9 |
super().__init__("metunlp/sosyoloji_toxicity", model_name=model_name)
|
10 |
|
11 |
+
def load_dataset_lmjudge_from_hf(self):
|
12 |
+
dataset = super().load_dataset_lmjudge_from_hf()
|
13 |
return dataset
|
14 |
|
15 |
|
src/deepeval/truthfulness_task.py
CHANGED
@@ -25,8 +25,8 @@ class TruthfulnessTask(BaseTask):
|
|
25 |
],
|
26 |
)
|
27 |
|
28 |
-
def
|
29 |
-
dataset = super().
|
30 |
return dataset
|
31 |
|
32 |
def evaluate(self) -> dict[str, Any]:
|
|
|
25 |
],
|
26 |
)
|
27 |
|
28 |
+
def load_dataset_lmjudge_from_hf(self):
|
29 |
+
dataset = super().load_dataset_lmjudge_from_hf()
|
30 |
return dataset
|
31 |
|
32 |
def evaluate(self) -> dict[str, Any]:
|