Spaces:
Sleeping
Sleeping
Ahmet Kaan Sever
commited on
Commit
·
74312c4
1
Parent(s):
7a6ddbf
Now returning the correct dict format.
Browse files
src/deepeval/turkish_general_knowledge_task.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
from src.deepeval.base_task import BaseTask
|
2 |
from collections import defaultdict
|
|
|
3 |
import ast
|
4 |
|
5 |
class TurkishGeneralKnowledgeTask(BaseTask):
|
@@ -61,9 +62,11 @@ class TurkishGeneralKnowledgeTask(BaseTask):
|
|
61 |
|
62 |
# Print results categorized by difficulty
|
63 |
for category, stats in difficulty_results.items():
|
64 |
-
|
65 |
-
print(f"{category.capitalize()} Accuracy: {
|
66 |
|
67 |
print("Results:", responses)
|
68 |
print("Overall Accuracy:", true / total_count)
|
69 |
-
|
|
|
|
|
|
1 |
from src.deepeval.base_task import BaseTask
|
2 |
from collections import defaultdict
|
3 |
+
from src.deepeval.utils import accuracy, accuracy_standard_error
|
4 |
import ast
|
5 |
|
6 |
class TurkishGeneralKnowledgeTask(BaseTask):
|
|
|
62 |
|
63 |
# Print results categorized by difficulty
|
64 |
for category, stats in difficulty_results.items():
|
65 |
+
calculatedAccuracy = stats['correct'] / stats['total'] if stats['total'] > 0 else 0
|
66 |
+
print(f"{category.capitalize()} Accuracy: {calculatedAccuracy:.2%} ({stats['correct']}/{stats['total']})")
|
67 |
|
68 |
print("Results:", responses)
|
69 |
print("Overall Accuracy:", true / total_count)
|
70 |
+
acc = accuracy(true, total_count)
|
71 |
+
acc_stderr = accuracy_standard_error(acc, total_count)
|
72 |
+
return {"acc": acc, "acc_stderr": acc_stderr}
|