ecemumutlu commited on
Commit
d1b4b00
·
1 Parent(s): c12a803

Add acc_std_err

Browse files
src/deepeval/sentiment_analysis_task.py CHANGED
@@ -1,4 +1,6 @@
1
  from src.deepeval.base_task import BaseTask
 
 
2
 
3
  class SentimentAnalysisTask(BaseTask):
4
  def __init__(self, model_name):
@@ -8,10 +10,10 @@ class SentimentAnalysisTask(BaseTask):
8
  return super().load_dataset_from_hf()
9
 
10
 
11
- def evaluate(self):
12
  responses = []
13
  total_count = len(self.dataset)
14
- true = 0
15
  for row in self.dataset:
16
  sentence = row["sentence"]
17
  prompt = f"Verilen metin hangi duyguyu ifade ediyor? {sentence}"
@@ -19,8 +21,9 @@ class SentimentAnalysisTask(BaseTask):
19
  answer = self.generate_response_mcqa(messages, choices=["positive", "negative", "neutral"])
20
  responses.append(answer)
21
  if row["sentiment"] == answer:
22
- true += 1
23
 
24
- print(responses)
25
- return true/total_count
 
26
 
 
1
  from src.deepeval.base_task import BaseTask
2
+ from src.deepeval.utils import accuracy, accuracy_standard_error
3
+ from typing import Any
4
 
5
  class SentimentAnalysisTask(BaseTask):
6
  def __init__(self, model_name):
 
10
  return super().load_dataset_from_hf()
11
 
12
 
13
+ def evaluate(self) -> dict[str, Any]:
14
  responses = []
15
  total_count = len(self.dataset)
16
+ n_correct = 0
17
  for row in self.dataset:
18
  sentence = row["sentence"]
19
  prompt = f"Verilen metin hangi duyguyu ifade ediyor? {sentence}"
 
21
  answer = self.generate_response_mcqa(messages, choices=["positive", "negative", "neutral"])
22
  responses.append(answer)
23
  if row["sentiment"] == answer:
24
+ n_correct += 1
25
 
26
+ acc = accuracy(n_correct, total_count)
27
+ acc_stderr = accuracy_standard_error(n_correct, total_count)
28
+ return {"acc": acc, "acc_stderr": acc_stderr}
29