model-eval-be / src /deepeval /sentiment_analysis_task.py
Ahmet Kaan Sever
Removed unnecessary debug prints and timestamps now return seconds.
8a3d32e
from src.deepeval.base_task import BaseTask
from src.deepeval.utils import accuracy, accuracy_standard_error
from typing import Any
class SentimentAnalysisTask(BaseTask):
def __init__(self, model_name):
super().__init__("metunlp/sentiment_analysis_tr", model_name=model_name)
def load_dataset_from_hf(self):
print("Loading the dataset")
dataset = super().load_dataset_from_hf()
return dataset
def evaluate(self) -> dict[str, Any]:
responses = []
total_count = len(self.dataset)
n_correct = 0
for row in self.dataset:
sentence = row["sentence"]
choices=["positive", "negative", "neutral"]
formatted_choices = "\n".join([f"{chr(65+i)}: {choice}" for i, choice in enumerate(choices)])
prompt = f"Verilen metin hangi duyguyu ifade ediyor? {sentence}\n {formatted_choices}"
messages = prompt
answer = self.generate_response_mcqa_multi_token(messages, choices=choices)
#print("Answer:", answer)
responses.append(answer)
correct_answer_letter = "A" if row["sentiment"] == "positive" else "B" if row["sentiment"] == "negative" else "C" if row["sentiment"] == "neutral" else None
model_answer_cleaned = answer.strip().replace('\n', '').replace(' ', '').upper()
if correct_answer_letter == model_answer_cleaned:
n_correct += 1
acc = accuracy(n_correct, total_count)
acc_stderr = accuracy_standard_error(acc, total_count)
return {"acc": acc, "acc_stderr": acc_stderr}