Spaces:
Running
on
L4
Running
on
L4
Ahmet Kaan Sever
commited on
Commit
·
f6890a5
1
Parent(s):
211b909
Changed dataset size to default and fixed imports
Browse files- src/deepeval/bias.py +1 -1
- src/deepeval/instruction_following_task.py +1 -0
- src/deepeval/math.py +1 -1
- src/deepeval/mmlu.py +1 -1
- src/deepeval/ner.py +1 -1
- src/deepeval/pos.py +1 -1
- src/deepeval/sts.py +1 -1
- src/deepeval/summarization_task.py +1 -1
- src/deepeval/topic_detection.py +1 -1
- src/deepeval/toxicity_task.py +1 -0
- src/deepeval/truthfulness_task.py +1 -1
- src/deepeval/turkish_vocabulary.py +1 -1
src/deepeval/bias.py
CHANGED
@@ -11,7 +11,7 @@ class BiasTask(BaseTask):
|
|
11 |
|
12 |
def load_dataset_from_hf(self):
|
13 |
dataset = super().load_dataset_from_hf()
|
14 |
-
return dataset
|
15 |
|
16 |
|
17 |
def evaluate(self) -> dict[str, Any]:
|
|
|
11 |
|
12 |
def load_dataset_from_hf(self):
|
13 |
dataset = super().load_dataset_from_hf()
|
14 |
+
return dataset
|
15 |
|
16 |
|
17 |
def evaluate(self) -> dict[str, Any]:
|
src/deepeval/instruction_following_task.py
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
from src.deepeval.base_task import BaseTask
|
2 |
from deepeval.metrics import PromptAlignmentMetric
|
3 |
from deepeval.test_case import LLMTestCase
|
|
|
1 |
+
import datetime
|
2 |
from src.deepeval.base_task import BaseTask
|
3 |
from deepeval.metrics import PromptAlignmentMetric
|
4 |
from deepeval.test_case import LLMTestCase
|
src/deepeval/math.py
CHANGED
@@ -10,7 +10,7 @@ class MathTask(BaseTask):
|
|
10 |
|
11 |
def load_dataset_from_hf(self):
|
12 |
dataset = super().load_dataset_from_hf()
|
13 |
-
return dataset
|
14 |
|
15 |
def generate_response_oeqa_multi_token(self, msg,max_new_tokens: int = 128):
|
16 |
"""
|
|
|
10 |
|
11 |
def load_dataset_from_hf(self):
|
12 |
dataset = super().load_dataset_from_hf()
|
13 |
+
return dataset
|
14 |
|
15 |
def generate_response_oeqa_multi_token(self, msg,max_new_tokens: int = 128):
|
16 |
"""
|
src/deepeval/mmlu.py
CHANGED
@@ -15,7 +15,7 @@ class MMLUTask(BaseTask):
|
|
15 |
super().__init__("metunlp/mmlu_tr", model_name=model_name)
|
16 |
|
17 |
def load_dataset_from_hf(self):
|
18 |
-
evaluate_count =
|
19 |
print("Loading dataset from Hugging Face.")
|
20 |
dataset_dict = {}
|
21 |
for subset in self.subsets:
|
|
|
15 |
super().__init__("metunlp/mmlu_tr", model_name=model_name)
|
16 |
|
17 |
def load_dataset_from_hf(self):
|
18 |
+
evaluate_count = 50
|
19 |
print("Loading dataset from Hugging Face.")
|
20 |
dataset_dict = {}
|
21 |
for subset in self.subsets:
|
src/deepeval/ner.py
CHANGED
@@ -10,7 +10,7 @@ class NERTask(BaseTask):
|
|
10 |
|
11 |
def load_dataset_from_hf(self):
|
12 |
dataset = super().load_dataset_from_hf()
|
13 |
-
return dataset
|
14 |
|
15 |
def generate_response_oeqa_multi_token(self, msg,max_new_tokens: int = 128):
|
16 |
"""
|
|
|
10 |
|
11 |
def load_dataset_from_hf(self):
|
12 |
dataset = super().load_dataset_from_hf()
|
13 |
+
return dataset
|
14 |
|
15 |
def generate_response_oeqa_multi_token(self, msg,max_new_tokens: int = 128):
|
16 |
"""
|
src/deepeval/pos.py
CHANGED
@@ -10,7 +10,7 @@ class POSTask(BaseTask):
|
|
10 |
|
11 |
def load_dataset_from_hf(self):
|
12 |
dataset = super().load_dataset_from_hf()
|
13 |
-
return dataset
|
14 |
|
15 |
def generate_response_oeqa_multi_token(self, msg,max_new_tokens: int = 128):
|
16 |
"""
|
|
|
10 |
|
11 |
def load_dataset_from_hf(self):
|
12 |
dataset = super().load_dataset_from_hf()
|
13 |
+
return dataset
|
14 |
|
15 |
def generate_response_oeqa_multi_token(self, msg,max_new_tokens: int = 128):
|
16 |
"""
|
src/deepeval/sts.py
CHANGED
@@ -17,7 +17,7 @@ class STSTask(BaseTask):
|
|
17 |
|
18 |
def load_dataset_from_hf(self):
|
19 |
dataset = super().load_dataset_from_hf()
|
20 |
-
return dataset
|
21 |
|
22 |
def generate_response_sts_multi_token(self, msg, max_new_tokens=5, choices: list = []):
|
23 |
"""
|
|
|
17 |
|
18 |
def load_dataset_from_hf(self):
|
19 |
dataset = super().load_dataset_from_hf()
|
20 |
+
return dataset
|
21 |
|
22 |
def generate_response_sts_multi_token(self, msg, max_new_tokens=5, choices: list = []):
|
23 |
"""
|
src/deepeval/summarization_task.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
import datetime
|
2 |
from src.deepeval.base_task import BaseTask
|
3 |
from deepeval.metrics import SummarizationMetric
|
4 |
from deepeval.test_case import LLMTestCase
|
|
|
1 |
+
from datetime import datetime
|
2 |
from src.deepeval.base_task import BaseTask
|
3 |
from deepeval.metrics import SummarizationMetric
|
4 |
from deepeval.test_case import LLMTestCase
|
src/deepeval/topic_detection.py
CHANGED
@@ -11,7 +11,7 @@ class TopicDetectionTask(BaseTask):
|
|
11 |
|
12 |
def load_dataset_from_hf(self):
|
13 |
dataset = super().load_dataset_from_hf()
|
14 |
-
return dataset
|
15 |
|
16 |
|
17 |
def evaluate(self) -> dict[str, Any]:
|
|
|
11 |
|
12 |
def load_dataset_from_hf(self):
|
13 |
dataset = super().load_dataset_from_hf()
|
14 |
+
return dataset
|
15 |
|
16 |
|
17 |
def evaluate(self) -> dict[str, Any]:
|
src/deepeval/toxicity_task.py
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
from src.deepeval.base_task import BaseTask
|
2 |
from deepeval.metrics import ToxicityMetric
|
3 |
from deepeval.test_case import LLMTestCase
|
|
|
1 |
+
import datetime
|
2 |
from src.deepeval.base_task import BaseTask
|
3 |
from deepeval.metrics import ToxicityMetric
|
4 |
from deepeval.test_case import LLMTestCase
|
src/deepeval/truthfulness_task.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
import datetime
|
2 |
from src.deepeval.base_task import BaseTask
|
3 |
from deepeval.test_case import LLMTestCase
|
4 |
from typing import Any
|
|
|
1 |
+
from datetime import datetime
|
2 |
from src.deepeval.base_task import BaseTask
|
3 |
from deepeval.test_case import LLMTestCase
|
4 |
from typing import Any
|
src/deepeval/turkish_vocabulary.py
CHANGED
@@ -14,7 +14,7 @@ class TurkishVocabularyTask(BaseTask):
|
|
14 |
super().__init__("metunlp/turkish_vocabulary", model_name=model_name)
|
15 |
|
16 |
def load_dataset_from_hf(self):
|
17 |
-
evaluate_count =
|
18 |
print("Loading dataset from Hugging Face.")
|
19 |
dataset_dict = {}
|
20 |
for subset in self.subsets:
|
|
|
14 |
super().__init__("metunlp/turkish_vocabulary", model_name=model_name)
|
15 |
|
16 |
def load_dataset_from_hf(self):
|
17 |
+
evaluate_count = 50
|
18 |
print("Loading dataset from Hugging Face.")
|
19 |
dataset_dict = {}
|
20 |
for subset in self.subsets:
|