File size: 2,320 Bytes
3b2b066 4129d14 3b2b066 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 |
from sklearn.metrics import roc_auc_score, f1_score, recall_score, precision_score
from KMVE_RG.modules.pycocoevalcap.bleu.bleu import Bleu
from KMVE_RG.modules.pycocoevalcap.meteor import Meteor
from KMVE_RG.modules.pycocoevalcap.rouge import Rouge
def compute_scores(gts, res):
"""
Performs the MS COCO evaluation using the Python 3 implementation (https://github.com/salaniz/pycocoevalcap)
:param gts: Dictionary with the image ids and their gold captions,
:param res: Dictionary with the image ids ant their generated captions
:print: Evaluation score (the mean of the scores of all the instances) for each measure
"""
# Set up scorers
scorers = [
(Bleu(4), ["BLEU_1", "BLEU_2", "BLEU_3", "BLEU_4"]),
# (Meteor(), "METEOR"),
(Rouge(), "ROUGE_L")
]
eval_res = {}
# Compute score for each metric
for scorer, method in scorers:
try:
score, scores = scorer.compute_score(gts, res, verbose=0)
except TypeError:
score, scores = scorer.compute_score(gts, res)
if type(method) == list:
for sc, m in zip(score, method):
eval_res[m] = sc
else:
eval_res[method] = score
return eval_res
def compute_mlc(gt, pred, label_set):
res_mlc = {}
avg_aucroc = 0
for i, label in enumerate(label_set):
res_mlc['AUCROC_' + label] = roc_auc_score(gt[:, i], pred[:, i])
avg_aucroc += res_mlc['AUCROC_' + label]
res_mlc['AVG_AUCROC'] = avg_aucroc / len(label_set)
res_mlc['F1_MACRO'] = f1_score(gt, pred, average="macro")
res_mlc['F1_MICRO'] = f1_score(gt, pred, average="micro")
res_mlc['RECALL_MACRO'] = recall_score(gt, pred, average="macro")
res_mlc['RECALL_MICRO'] = recall_score(gt, pred, average="micro")
res_mlc['PRECISION_MACRO'] = precision_score(gt, pred, average="macro")
res_mlc['PRECISION_MICRO'] = precision_score(gt, pred, average="micro")
return res_mlc
class MetricWrapper(object):
def __init__(self, label_set):
self.label_set = label_set
def __call__(self, gts, res, gts_mlc, res_mlc):
eval_res = compute_scores(gts, res)
eval_res_mlc = compute_mlc(gts_mlc, res_mlc, self.label_set)
eval_res.update(**eval_res_mlc)
return eval_res
|