Spaces:
Sleeping
Sleeping
Ahmet Kaan Sever
commited on
Commit
·
dbf76bc
1
Parent(s):
41affa9
Adjusted load dataset to get 25% of each dataset
Browse files- src/deepeval/base_task.py +6 -1
- src/deepeval/bias_task.py +1 -1
- src/deepeval/commonsense_reasoning_task.py +1 -1
- src/deepeval/complex_reasoning.py +1 -1
- src/deepeval/faithfulness_task.py +1 -1
- src/deepeval/instruction_following_task.py +1 -1
- src/deepeval/nli.py +1 -1
- src/deepeval/reading_comp_mc.py +1 -1
- src/deepeval/reading_comprehension_task.py +1 -1
- src/deepeval/sentiment_analysis_task.py +1 -1
- src/deepeval/summarization_task.py +1 -1
- src/deepeval/toxicity_task.py +1 -1
- src/deepeval/truthfulness_task.py +1 -1
- src/deepeval/turkish_general_knowledge_task.py +1 -1
src/deepeval/base_task.py
CHANGED
@@ -188,7 +188,12 @@ class BaseTask(ABC):
|
|
188 |
print("Loading dataset from Hugging Face.")
|
189 |
dataset= load_dataset(self.dataset_repo, token=HF_TOKEN, split="train")
|
190 |
print("Dataset loaded.")
|
191 |
-
|
|
|
|
|
|
|
|
|
|
|
192 |
|
193 |
@abstractmethod
|
194 |
def evaluate(self):
|
|
|
188 |
print("Loading dataset from Hugging Face.")
|
189 |
dataset= load_dataset(self.dataset_repo, token=HF_TOKEN, split="train")
|
190 |
print("Dataset loaded.")
|
191 |
+
|
192 |
+
# Load %25 of each dataset
|
193 |
+
print("Original dataset size: ", len(dataset))
|
194 |
+
dataset = dataset.shuffle(seed=42).select(range(int(len(dataset) * 0.25)))
|
195 |
+
print("Reduced dataset size: ", len(dataset))
|
196 |
+
return dataset
|
197 |
|
198 |
@abstractmethod
|
199 |
def evaluate(self):
|
src/deepeval/bias_task.py
CHANGED
@@ -10,7 +10,7 @@ class BiasTask(BaseTask):
|
|
10 |
|
11 |
def load_dataset_from_hf(self):
|
12 |
dataset = super().load_dataset_from_hf()
|
13 |
-
return dataset
|
14 |
|
15 |
def evaluate(self) -> dict[str, Any]:
|
16 |
|
|
|
10 |
|
11 |
def load_dataset_from_hf(self):
|
12 |
dataset = super().load_dataset_from_hf()
|
13 |
+
return dataset
|
14 |
|
15 |
def evaluate(self) -> dict[str, Any]:
|
16 |
|
src/deepeval/commonsense_reasoning_task.py
CHANGED
@@ -10,7 +10,7 @@ class CommonsenseReasoningTask(BaseTask):
|
|
10 |
|
11 |
def load_dataset_from_hf(self):
|
12 |
dataset = super().load_dataset_from_hf()
|
13 |
-
return dataset
|
14 |
|
15 |
|
16 |
def evaluate(self) -> dict[str, Any]:
|
|
|
10 |
|
11 |
def load_dataset_from_hf(self):
|
12 |
dataset = super().load_dataset_from_hf()
|
13 |
+
return dataset
|
14 |
|
15 |
|
16 |
def evaluate(self) -> dict[str, Any]:
|
src/deepeval/complex_reasoning.py
CHANGED
@@ -11,7 +11,7 @@ class ComplexReasoningTask(BaseTask):
|
|
11 |
|
12 |
def load_dataset_from_hf(self):
|
13 |
dataset = super().load_dataset_from_hf()
|
14 |
-
return dataset
|
15 |
|
16 |
|
17 |
def evaluate(self) -> dict[str, Any]:
|
|
|
11 |
|
12 |
def load_dataset_from_hf(self):
|
13 |
dataset = super().load_dataset_from_hf()
|
14 |
+
return dataset
|
15 |
|
16 |
|
17 |
def evaluate(self) -> dict[str, Any]:
|
src/deepeval/faithfulness_task.py
CHANGED
@@ -9,7 +9,7 @@ class FaithfulnessTask(BaseTask):
|
|
9 |
|
10 |
def load_dataset_from_hf(self):
|
11 |
dataset = super().load_dataset_from_hf()
|
12 |
-
return dataset
|
13 |
|
14 |
def evaluate(self) -> dict[str, Any]:
|
15 |
|
|
|
9 |
|
10 |
def load_dataset_from_hf(self):
|
11 |
dataset = super().load_dataset_from_hf()
|
12 |
+
return dataset
|
13 |
|
14 |
def evaluate(self) -> dict[str, Any]:
|
15 |
|
src/deepeval/instruction_following_task.py
CHANGED
@@ -10,7 +10,7 @@ class InstructionFollowingTask(BaseTask):
|
|
10 |
|
11 |
def load_dataset_from_hf(self):
|
12 |
dataset = super().load_dataset_from_hf()
|
13 |
-
return dataset
|
14 |
|
15 |
def evaluate(self) -> dict[str, Any]:
|
16 |
results = []
|
|
|
10 |
|
11 |
def load_dataset_from_hf(self):
|
12 |
dataset = super().load_dataset_from_hf()
|
13 |
+
return dataset
|
14 |
|
15 |
def evaluate(self) -> dict[str, Any]:
|
16 |
results = []
|
src/deepeval/nli.py
CHANGED
@@ -10,7 +10,7 @@ class NLITask(BaseTask):
|
|
10 |
|
11 |
def load_dataset_from_hf(self):
|
12 |
dataset = super().load_dataset_from_hf()
|
13 |
-
return dataset
|
14 |
|
15 |
|
16 |
def evaluate(self) -> dict[str, Any]:
|
|
|
10 |
|
11 |
def load_dataset_from_hf(self):
|
12 |
dataset = super().load_dataset_from_hf()
|
13 |
+
return dataset
|
14 |
|
15 |
|
16 |
def evaluate(self) -> dict[str, Any]:
|
src/deepeval/reading_comp_mc.py
CHANGED
@@ -11,7 +11,7 @@ class ReadingComprehensionMCTask(BaseTask):
|
|
11 |
|
12 |
def load_dataset_from_hf(self):
|
13 |
dataset = super().load_dataset_from_hf()
|
14 |
-
return dataset
|
15 |
|
16 |
|
17 |
def evaluate(self) -> dict[str, Any]:
|
|
|
11 |
|
12 |
def load_dataset_from_hf(self):
|
13 |
dataset = super().load_dataset_from_hf()
|
14 |
+
return dataset
|
15 |
|
16 |
|
17 |
def evaluate(self) -> dict[str, Any]:
|
src/deepeval/reading_comprehension_task.py
CHANGED
@@ -28,7 +28,7 @@ class ReadingComprehensionTask(BaseTask):
|
|
28 |
|
29 |
def load_dataset_from_hf(self):
|
30 |
dataset = super().load_dataset_from_hf()
|
31 |
-
return dataset
|
32 |
|
33 |
def evaluate(self) -> dict[str, Any]:
|
34 |
results = []
|
|
|
28 |
|
29 |
def load_dataset_from_hf(self):
|
30 |
dataset = super().load_dataset_from_hf()
|
31 |
+
return dataset
|
32 |
|
33 |
def evaluate(self) -> dict[str, Any]:
|
34 |
results = []
|
src/deepeval/sentiment_analysis_task.py
CHANGED
@@ -9,7 +9,7 @@ class SentimentAnalysisTask(BaseTask):
|
|
9 |
def load_dataset_from_hf(self):
|
10 |
print("Loading the dataset")
|
11 |
dataset = super().load_dataset_from_hf()
|
12 |
-
return dataset
|
13 |
|
14 |
|
15 |
def evaluate(self) -> dict[str, Any]:
|
|
|
9 |
def load_dataset_from_hf(self):
|
10 |
print("Loading the dataset")
|
11 |
dataset = super().load_dataset_from_hf()
|
12 |
+
return dataset
|
13 |
|
14 |
|
15 |
def evaluate(self) -> dict[str, Any]:
|
src/deepeval/summarization_task.py
CHANGED
@@ -9,7 +9,7 @@ class SummarizationTask(BaseTask):
|
|
9 |
|
10 |
def load_dataset_from_hf(self):
|
11 |
dataset = super().load_dataset_from_hf()
|
12 |
-
return dataset
|
13 |
|
14 |
def evaluate(self) -> dict[str, Any]:
|
15 |
results = []
|
|
|
9 |
|
10 |
def load_dataset_from_hf(self):
|
11 |
dataset = super().load_dataset_from_hf()
|
12 |
+
return dataset
|
13 |
|
14 |
def evaluate(self) -> dict[str, Any]:
|
15 |
results = []
|
src/deepeval/toxicity_task.py
CHANGED
@@ -9,7 +9,7 @@ class ToxicityTask(BaseTask):
|
|
9 |
|
10 |
def load_dataset_from_hf(self):
|
11 |
dataset = super().load_dataset_from_hf()
|
12 |
-
return dataset
|
13 |
|
14 |
|
15 |
def evaluate(self) -> dict[str, Any]:
|
|
|
9 |
|
10 |
def load_dataset_from_hf(self):
|
11 |
dataset = super().load_dataset_from_hf()
|
12 |
+
return dataset
|
13 |
|
14 |
|
15 |
def evaluate(self) -> dict[str, Any]:
|
src/deepeval/truthfulness_task.py
CHANGED
@@ -26,7 +26,7 @@ class TruthfulnessTask(BaseTask):
|
|
26 |
|
27 |
def load_dataset_from_hf(self):
|
28 |
dataset = super().load_dataset_from_hf()
|
29 |
-
return dataset
|
30 |
|
31 |
def evaluate(self) -> dict[str, Any]:
|
32 |
results = []
|
|
|
26 |
|
27 |
def load_dataset_from_hf(self):
|
28 |
dataset = super().load_dataset_from_hf()
|
29 |
+
return dataset
|
30 |
|
31 |
def evaluate(self) -> dict[str, Any]:
|
32 |
results = []
|
src/deepeval/turkish_general_knowledge_task.py
CHANGED
@@ -9,7 +9,7 @@ class TurkishGeneralKnowledgeTask(BaseTask):
|
|
9 |
|
10 |
def load_dataset_from_hf(self):
|
11 |
dataset = super().load_dataset_from_hf()
|
12 |
-
return dataset
|
13 |
|
14 |
def evaluate(self):
|
15 |
responses = []
|
|
|
9 |
|
10 |
def load_dataset_from_hf(self):
|
11 |
dataset = super().load_dataset_from_hf()
|
12 |
+
return dataset
|
13 |
|
14 |
def evaluate(self):
|
15 |
responses = []
|