Spaces:
Paused
Paused
from src.deepeval.base_task import BaseTask | |
from src.deepeval.utils import accuracy, accuracy_standard_error | |
from typing import Any | |
class SentimentAnalysisTask(BaseTask): | |
def __init__(self, model_name): | |
super().__init__("metunlp/sentiment_analysis_tr", model_name=model_name) | |
def load_dataset_from_hf(self): | |
print("Loading the dataset") | |
dataset = super().load_dataset_from_hf() | |
return dataset | |
def evaluate(self) -> dict[str, Any]: | |
responses = [] | |
total_count = len(self.dataset) | |
n_correct = 0 | |
for row in self.dataset: | |
sentence = row["sentence"] | |
choices=["positive", "negative", "neutral"] | |
formatted_choices = "\n".join([f"{chr(65+i)}: {choice}" for i, choice in enumerate(choices)]) | |
prompt = f"Verilen metin hangi duyguyu ifade ediyor? {sentence}\n {formatted_choices}" | |
messages = prompt | |
answer = self.generate_response_mcqa_multi_token(messages, choices=choices) | |
#print("Answer:", answer) | |
responses.append(answer) | |
correct_answer_letter = "A" if row["sentiment"] == "positive" else "B" if row["sentiment"] == "negative" else "C" if row["sentiment"] == "neutral" else None | |
model_answer_cleaned = answer.strip().replace('\n', '').replace(' ', '').upper() | |
if correct_answer_letter == model_answer_cleaned: | |
n_correct += 1 | |
acc = accuracy(n_correct, total_count) | |
acc_stderr = accuracy_standard_error(acc, total_count) | |
return {"acc": acc, "acc_stderr": acc_stderr} | |