Spaces:
Paused
Paused
Commit
·
d1b4b00
1
Parent(s):
c12a803
Add acc_std_err
Browse files
src/deepeval/sentiment_analysis_task.py
CHANGED
@@ -1,4 +1,6 @@
|
|
1 |
from src.deepeval.base_task import BaseTask
|
|
|
|
|
2 |
|
3 |
class SentimentAnalysisTask(BaseTask):
|
4 |
def __init__(self, model_name):
|
@@ -8,10 +10,10 @@ class SentimentAnalysisTask(BaseTask):
|
|
8 |
return super().load_dataset_from_hf()
|
9 |
|
10 |
|
11 |
-
def evaluate(self):
|
12 |
responses = []
|
13 |
total_count = len(self.dataset)
|
14 |
-
|
15 |
for row in self.dataset:
|
16 |
sentence = row["sentence"]
|
17 |
prompt = f"Verilen metin hangi duyguyu ifade ediyor? {sentence}"
|
@@ -19,8 +21,9 @@ class SentimentAnalysisTask(BaseTask):
|
|
19 |
answer = self.generate_response_mcqa(messages, choices=["positive", "negative", "neutral"])
|
20 |
responses.append(answer)
|
21 |
if row["sentiment"] == answer:
|
22 |
-
|
23 |
|
24 |
-
|
25 |
-
|
|
|
26 |
|
|
|
1 |
from src.deepeval.base_task import BaseTask
|
2 |
+
from src.deepeval.utils import accuracy, accuracy_standard_error
|
3 |
+
from typing import Any
|
4 |
|
5 |
class SentimentAnalysisTask(BaseTask):
|
6 |
def __init__(self, model_name):
|
|
|
10 |
return super().load_dataset_from_hf()
|
11 |
|
12 |
|
13 |
+
def evaluate(self) -> dict[str, Any]:
|
14 |
responses = []
|
15 |
total_count = len(self.dataset)
|
16 |
+
n_correct = 0
|
17 |
for row in self.dataset:
|
18 |
sentence = row["sentence"]
|
19 |
prompt = f"Verilen metin hangi duyguyu ifade ediyor? {sentence}"
|
|
|
21 |
answer = self.generate_response_mcqa(messages, choices=["positive", "negative", "neutral"])
|
22 |
responses.append(answer)
|
23 |
if row["sentiment"] == answer:
|
24 |
+
n_correct += 1
|
25 |
|
26 |
+
acc = accuracy(n_correct, total_count)
|
27 |
+
acc_stderr = accuracy_standard_error(n_correct, total_count)
|
28 |
+
return {"acc": acc, "acc_stderr": acc_stderr}
|
29 |
|