Ahmet Kaan Sever commited on
Commit
dbf76bc
·
1 Parent(s): 41affa9

Adjusted load dataset to get 25% of each dataset

Browse files
src/deepeval/base_task.py CHANGED
@@ -188,7 +188,12 @@ class BaseTask(ABC):
188
  print("Loading dataset from Hugging Face.")
189
  dataset= load_dataset(self.dataset_repo, token=HF_TOKEN, split="train")
190
  print("Dataset loaded.")
191
- return dataset.select(range(min(3, len(dataset))))
 
 
 
 
 
192
 
193
  @abstractmethod
194
  def evaluate(self):
 
188
  print("Loading dataset from Hugging Face.")
189
  dataset= load_dataset(self.dataset_repo, token=HF_TOKEN, split="train")
190
  print("Dataset loaded.")
191
+
192
+ # Load %25 of each dataset
193
+ print("Original dataset size: ", len(dataset))
194
+ dataset = dataset.shuffle(seed=42).select(range(int(len(dataset) * 0.25)))
195
+ print("Reduced dataset size: ", len(dataset))
196
+ return dataset
197
 
198
  @abstractmethod
199
  def evaluate(self):
src/deepeval/bias_task.py CHANGED
@@ -10,7 +10,7 @@ class BiasTask(BaseTask):
10
 
11
  def load_dataset_from_hf(self):
12
  dataset = super().load_dataset_from_hf()
13
- return dataset.select(range(min(3, len(dataset))))
14
 
15
  def evaluate(self) -> dict[str, Any]:
16
 
 
10
 
11
  def load_dataset_from_hf(self):
12
  dataset = super().load_dataset_from_hf()
13
+ return dataset
14
 
15
  def evaluate(self) -> dict[str, Any]:
16
 
src/deepeval/commonsense_reasoning_task.py CHANGED
@@ -10,7 +10,7 @@ class CommonsenseReasoningTask(BaseTask):
10
 
11
  def load_dataset_from_hf(self):
12
  dataset = super().load_dataset_from_hf()
13
- return dataset.select(range(min(10, len(dataset))))
14
 
15
 
16
  def evaluate(self) -> dict[str, Any]:
 
10
 
11
  def load_dataset_from_hf(self):
12
  dataset = super().load_dataset_from_hf()
13
+ return dataset
14
 
15
 
16
  def evaluate(self) -> dict[str, Any]:
src/deepeval/complex_reasoning.py CHANGED
@@ -11,7 +11,7 @@ class ComplexReasoningTask(BaseTask):
11
 
12
  def load_dataset_from_hf(self):
13
  dataset = super().load_dataset_from_hf()
14
- return dataset.select(range(min(10, len(dataset))))
15
 
16
 
17
  def evaluate(self) -> dict[str, Any]:
 
11
 
12
  def load_dataset_from_hf(self):
13
  dataset = super().load_dataset_from_hf()
14
+ return dataset
15
 
16
 
17
  def evaluate(self) -> dict[str, Any]:
src/deepeval/faithfulness_task.py CHANGED
@@ -9,7 +9,7 @@ class FaithfulnessTask(BaseTask):
9
 
10
  def load_dataset_from_hf(self):
11
  dataset = super().load_dataset_from_hf()
12
- return dataset.select(range(min(3, len(dataset))))
13
 
14
  def evaluate(self) -> dict[str, Any]:
15
 
 
9
 
10
  def load_dataset_from_hf(self):
11
  dataset = super().load_dataset_from_hf()
12
+ return dataset
13
 
14
  def evaluate(self) -> dict[str, Any]:
15
 
src/deepeval/instruction_following_task.py CHANGED
@@ -10,7 +10,7 @@ class InstructionFollowingTask(BaseTask):
10
 
11
  def load_dataset_from_hf(self):
12
  dataset = super().load_dataset_from_hf()
13
- return dataset.select(range(min(3, len(dataset))))
14
 
15
  def evaluate(self) -> dict[str, Any]:
16
  results = []
 
10
 
11
  def load_dataset_from_hf(self):
12
  dataset = super().load_dataset_from_hf()
13
+ return dataset
14
 
15
  def evaluate(self) -> dict[str, Any]:
16
  results = []
src/deepeval/nli.py CHANGED
@@ -10,7 +10,7 @@ class NLITask(BaseTask):
10
 
11
  def load_dataset_from_hf(self):
12
  dataset = super().load_dataset_from_hf()
13
- return dataset.select(range(min(10, len(dataset))))
14
 
15
 
16
  def evaluate(self) -> dict[str, Any]:
 
10
 
11
  def load_dataset_from_hf(self):
12
  dataset = super().load_dataset_from_hf()
13
+ return dataset
14
 
15
 
16
  def evaluate(self) -> dict[str, Any]:
src/deepeval/reading_comp_mc.py CHANGED
@@ -11,7 +11,7 @@ class ReadingComprehensionMCTask(BaseTask):
11
 
12
  def load_dataset_from_hf(self):
13
  dataset = super().load_dataset_from_hf()
14
- return dataset.select(range(min(10, len(dataset))))
15
 
16
 
17
  def evaluate(self) -> dict[str, Any]:
 
11
 
12
  def load_dataset_from_hf(self):
13
  dataset = super().load_dataset_from_hf()
14
+ return dataset
15
 
16
 
17
  def evaluate(self) -> dict[str, Any]:
src/deepeval/reading_comprehension_task.py CHANGED
@@ -28,7 +28,7 @@ class ReadingComprehensionTask(BaseTask):
28
 
29
  def load_dataset_from_hf(self):
30
  dataset = super().load_dataset_from_hf()
31
- return dataset.select(range(min(3, len(dataset))))
32
 
33
  def evaluate(self) -> dict[str, Any]:
34
  results = []
 
28
 
29
  def load_dataset_from_hf(self):
30
  dataset = super().load_dataset_from_hf()
31
+ return dataset
32
 
33
  def evaluate(self) -> dict[str, Any]:
34
  results = []
src/deepeval/sentiment_analysis_task.py CHANGED
@@ -9,7 +9,7 @@ class SentimentAnalysisTask(BaseTask):
9
  def load_dataset_from_hf(self):
10
  print("Loading the dataset")
11
  dataset = super().load_dataset_from_hf()
12
- return dataset.select(range(min(10, len(dataset))))
13
 
14
 
15
  def evaluate(self) -> dict[str, Any]:
 
9
  def load_dataset_from_hf(self):
10
  print("Loading the dataset")
11
  dataset = super().load_dataset_from_hf()
12
+ return dataset
13
 
14
 
15
  def evaluate(self) -> dict[str, Any]:
src/deepeval/summarization_task.py CHANGED
@@ -9,7 +9,7 @@ class SummarizationTask(BaseTask):
9
 
10
  def load_dataset_from_hf(self):
11
  dataset = super().load_dataset_from_hf()
12
- return dataset.select(range(min(3, len(dataset))))
13
 
14
  def evaluate(self) -> dict[str, Any]:
15
  results = []
 
9
 
10
  def load_dataset_from_hf(self):
11
  dataset = super().load_dataset_from_hf()
12
+ return dataset
13
 
14
  def evaluate(self) -> dict[str, Any]:
15
  results = []
src/deepeval/toxicity_task.py CHANGED
@@ -9,7 +9,7 @@ class ToxicityTask(BaseTask):
9
 
10
  def load_dataset_from_hf(self):
11
  dataset = super().load_dataset_from_hf()
12
- return dataset.select(range(min(3, len(dataset))))
13
 
14
 
15
  def evaluate(self) -> dict[str, Any]:
 
9
 
10
  def load_dataset_from_hf(self):
11
  dataset = super().load_dataset_from_hf()
12
+ return dataset
13
 
14
 
15
  def evaluate(self) -> dict[str, Any]:
src/deepeval/truthfulness_task.py CHANGED
@@ -26,7 +26,7 @@ class TruthfulnessTask(BaseTask):
26
 
27
  def load_dataset_from_hf(self):
28
  dataset = super().load_dataset_from_hf()
29
- return dataset.select(range(min(3, len(dataset))))
30
 
31
  def evaluate(self) -> dict[str, Any]:
32
  results = []
 
26
 
27
  def load_dataset_from_hf(self):
28
  dataset = super().load_dataset_from_hf()
29
+ return dataset
30
 
31
  def evaluate(self) -> dict[str, Any]:
32
  results = []
src/deepeval/turkish_general_knowledge_task.py CHANGED
@@ -9,7 +9,7 @@ class TurkishGeneralKnowledgeTask(BaseTask):
9
 
10
  def load_dataset_from_hf(self):
11
  dataset = super().load_dataset_from_hf()
12
- return dataset.select(range(min(10, len(dataset))))
13
 
14
  def evaluate(self):
15
  responses = []
 
9
 
10
  def load_dataset_from_hf(self):
11
  dataset = super().load_dataset_from_hf()
12
+ return dataset
13
 
14
  def evaluate(self):
15
  responses = []