Spaces:
Running
Running
File size: 1,590 Bytes
6a9cefc bfa693b 88ac74b 6a9cefc 17ee47c bfa693b 6a9cefc bfa693b 17ee47c 6a9cefc b75744c 6a9cefc 7aa6f86 bfa693b 7aa6f86 bfa693b 7aa6f86 46074bc 7aa6f86 6a9cefc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 |
import evaluate
from evaluate.evaluation_suite import SubTask
# This is odd because the first dataset is multi-class and
# the second dataset is binary. The model I'm using has 4 labels
# and is finetuned to the first dataset.
# So what does it mean for this model to be evaluated on the second
# dataset?
metric = evaluate.combine(["accuracy"])
class Suite(evaluate.EvaluationSuite):
def __init__(self, name):
super().__init__(name)
self.preprocessor = lambda x: {"text": x["text"].lower()}
self.suite = [
SubTask(
task_type="text-classification",
data="hate_speech18",
split="train[:1000]",
args_for_task={
"metric": metric,
"input_column": "text",
"label_column": "label",
"label_mapping": {
"NO_HATE": 0.0,
"HATE": 1.0,
"RELATION": 1.0,
"IDK": 1.0
}
}
),
SubTask(
task_type="text-classification",
data="mteb/toxic_conversations_50k",
split="test[:1000]",
args_for_task={
"metric": metric,
"input_column": "text",
"label_column": "label",
"label_mapping": {
"NO_HATE": 0.0,
"HATE": 1.0
}
}
)
]
|