Ahmet Kaan Sever commited on
Commit
f6890a5
·
1 Parent(s): 211b909

Changed dataset size to default and fixed imports

Browse files
src/deepeval/bias.py CHANGED
@@ -11,7 +11,7 @@ class BiasTask(BaseTask):
11
 
12
  def load_dataset_from_hf(self):
13
  dataset = super().load_dataset_from_hf()
14
- return dataset.select(range(min(1, len(dataset))))
15
 
16
 
17
  def evaluate(self) -> dict[str, Any]:
 
11
 
12
  def load_dataset_from_hf(self):
13
  dataset = super().load_dataset_from_hf()
14
+ return dataset
15
 
16
 
17
  def evaluate(self) -> dict[str, Any]:
src/deepeval/instruction_following_task.py CHANGED
@@ -1,3 +1,4 @@
 
1
  from src.deepeval.base_task import BaseTask
2
  from deepeval.metrics import PromptAlignmentMetric
3
  from deepeval.test_case import LLMTestCase
 
1
+ import datetime
2
  from src.deepeval.base_task import BaseTask
3
  from deepeval.metrics import PromptAlignmentMetric
4
  from deepeval.test_case import LLMTestCase
src/deepeval/math.py CHANGED
@@ -10,7 +10,7 @@ class MathTask(BaseTask):
10
 
11
  def load_dataset_from_hf(self):
12
  dataset = super().load_dataset_from_hf()
13
- return dataset.select(range(min(1, len(dataset))))
14
 
15
  def generate_response_oeqa_multi_token(self, msg,max_new_tokens: int = 128):
16
  """
 
10
 
11
  def load_dataset_from_hf(self):
12
  dataset = super().load_dataset_from_hf()
13
+ return dataset
14
 
15
  def generate_response_oeqa_multi_token(self, msg,max_new_tokens: int = 128):
16
  """
src/deepeval/mmlu.py CHANGED
@@ -15,7 +15,7 @@ class MMLUTask(BaseTask):
15
  super().__init__("metunlp/mmlu_tr", model_name=model_name)
16
 
17
  def load_dataset_from_hf(self):
18
- evaluate_count = 1
19
  print("Loading dataset from Hugging Face.")
20
  dataset_dict = {}
21
  for subset in self.subsets:
 
15
  super().__init__("metunlp/mmlu_tr", model_name=model_name)
16
 
17
  def load_dataset_from_hf(self):
18
+ evaluate_count = 50
19
  print("Loading dataset from Hugging Face.")
20
  dataset_dict = {}
21
  for subset in self.subsets:
src/deepeval/ner.py CHANGED
@@ -10,7 +10,7 @@ class NERTask(BaseTask):
10
 
11
  def load_dataset_from_hf(self):
12
  dataset = super().load_dataset_from_hf()
13
- return dataset.select(range(min(1, len(dataset))))
14
 
15
  def generate_response_oeqa_multi_token(self, msg,max_new_tokens: int = 128):
16
  """
 
10
 
11
  def load_dataset_from_hf(self):
12
  dataset = super().load_dataset_from_hf()
13
+ return dataset
14
 
15
  def generate_response_oeqa_multi_token(self, msg,max_new_tokens: int = 128):
16
  """
src/deepeval/pos.py CHANGED
@@ -10,7 +10,7 @@ class POSTask(BaseTask):
10
 
11
  def load_dataset_from_hf(self):
12
  dataset = super().load_dataset_from_hf()
13
- return dataset.select(range(min(1, len(dataset))))
14
 
15
  def generate_response_oeqa_multi_token(self, msg,max_new_tokens: int = 128):
16
  """
 
10
 
11
  def load_dataset_from_hf(self):
12
  dataset = super().load_dataset_from_hf()
13
+ return dataset
14
 
15
  def generate_response_oeqa_multi_token(self, msg,max_new_tokens: int = 128):
16
  """
src/deepeval/sts.py CHANGED
@@ -17,7 +17,7 @@ class STSTask(BaseTask):
17
 
18
  def load_dataset_from_hf(self):
19
  dataset = super().load_dataset_from_hf()
20
- return dataset.select(range(min(1, len(dataset))))
21
 
22
  def generate_response_sts_multi_token(self, msg, max_new_tokens=5, choices: list = []):
23
  """
 
17
 
18
  def load_dataset_from_hf(self):
19
  dataset = super().load_dataset_from_hf()
20
+ return dataset
21
 
22
  def generate_response_sts_multi_token(self, msg, max_new_tokens=5, choices: list = []):
23
  """
src/deepeval/summarization_task.py CHANGED
@@ -1,4 +1,4 @@
1
- import datetime
2
  from src.deepeval.base_task import BaseTask
3
  from deepeval.metrics import SummarizationMetric
4
  from deepeval.test_case import LLMTestCase
 
1
+ from datetime import datetime
2
  from src.deepeval.base_task import BaseTask
3
  from deepeval.metrics import SummarizationMetric
4
  from deepeval.test_case import LLMTestCase
src/deepeval/topic_detection.py CHANGED
@@ -11,7 +11,7 @@ class TopicDetectionTask(BaseTask):
11
 
12
  def load_dataset_from_hf(self):
13
  dataset = super().load_dataset_from_hf()
14
- return dataset.select(range(min(10, len(dataset))))
15
 
16
 
17
  def evaluate(self) -> dict[str, Any]:
 
11
 
12
  def load_dataset_from_hf(self):
13
  dataset = super().load_dataset_from_hf()
14
+ return dataset
15
 
16
 
17
  def evaluate(self) -> dict[str, Any]:
src/deepeval/toxicity_task.py CHANGED
@@ -1,3 +1,4 @@
 
1
  from src.deepeval.base_task import BaseTask
2
  from deepeval.metrics import ToxicityMetric
3
  from deepeval.test_case import LLMTestCase
 
1
+ import datetime
2
  from src.deepeval.base_task import BaseTask
3
  from deepeval.metrics import ToxicityMetric
4
  from deepeval.test_case import LLMTestCase
src/deepeval/truthfulness_task.py CHANGED
@@ -1,4 +1,4 @@
1
- import datetime
2
  from src.deepeval.base_task import BaseTask
3
  from deepeval.test_case import LLMTestCase
4
  from typing import Any
 
1
+ from datetime import datetime
2
  from src.deepeval.base_task import BaseTask
3
  from deepeval.test_case import LLMTestCase
4
  from typing import Any
src/deepeval/turkish_vocabulary.py CHANGED
@@ -14,7 +14,7 @@ class TurkishVocabularyTask(BaseTask):
14
  super().__init__("metunlp/turkish_vocabulary", model_name=model_name)
15
 
16
  def load_dataset_from_hf(self):
17
- evaluate_count = 1
18
  print("Loading dataset from Hugging Face.")
19
  dataset_dict = {}
20
  for subset in self.subsets:
 
14
  super().__init__("metunlp/turkish_vocabulary", model_name=model_name)
15
 
16
  def load_dataset_from_hf(self):
17
+ evaluate_count = 50
18
  print("Loading dataset from Hugging Face.")
19
  dataset_dict = {}
20
  for subset in self.subsets: