Spaces:
Build error
Build error
from refTools.evaluation.tokenizer.ptbtokenizer import PTBTokenizer | |
from refTools.evaluation.bleu.bleu import Bleu | |
from refTools.evaluation.meteor.meteor import Meteor | |
from refTools.evaluation.rouge.rouge import Rouge | |
from refTools.evaluation.cider.cider import Cider | |
""" | |
Input: refer and Res = [{ref_id, sent}] | |
Things of interest | |
evalRefs - list of ['ref_id', 'CIDEr', 'Bleu_1', 'Bleu_2', 'Bleu_3', 'Bleu_4', 'ROUGE_L', 'METEOR'] | |
eval - dict of {metric: score} | |
refToEval - dict of {ref_id: ['ref_id', 'CIDEr', 'Bleu_1', 'Bleu_2', 'Bleu_3', 'Bleu_4', 'ROUGE_L', 'METEOR']} | |
""" | |
class RefEvaluation: | |
def __init__ (self, refer, Res): | |
""" | |
:param refer: refer class of current dataset | |
:param Res: [{'ref_id', 'sent'}] | |
""" | |
self.evalRefs = [] | |
self.eval = {} | |
self.refToEval = {} | |
self.refer = refer | |
self.Res = Res | |
def evaluate(self): | |
evalRefIds = [ann['ref_id'] for ann in self.Res] | |
refToGts = {} | |
for ref_id in evalRefIds: | |
ref = self.refer.Refs[ref_id] | |
gt_sents = [sent['sent'].encode('ascii', 'ignore').decode('ascii') for sent in ref['sentences']] # up to 3 expressions | |
refToGts[ref_id] = gt_sents | |
refToRes = {ann['ref_id']: [ann['sent']] for ann in self.Res} | |
print('tokenization...') | |
tokenizer = PTBTokenizer() | |
self.refToRes = tokenizer.tokenize(refToRes) | |
self.refToGts = tokenizer.tokenize(refToGts) | |
# ================================================= | |
# Set up scorers | |
# ================================================= | |
print('setting up scorers...') | |
scorers = [ | |
(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]), | |
(Meteor(),"METEOR"), | |
(Rouge(), "ROUGE_L"), | |
(Cider(), "CIDEr") | |
] | |
# ================================================= | |
# Compute scores | |
# ================================================= | |
for scorer, method in scorers: | |
print('computing %s score...'%(scorer.method())) | |
score, scores = scorer.compute_score(self.refToGts, self.refToRes) | |
if type(method) == list: | |
for sc, scs, m in zip(score, scores, method): | |
self.setEval(sc, m) | |
self.setRefToEvalRefs(scs, self.refToGts.keys(), m) | |
print("%s: %0.3f"%(m, sc)) | |
else: | |
self.setEval(score, method) | |
self.setRefToEvalRefs(scores, self.refToGts.keys(), method) | |
print("%s: %0.3f"%(method, score)) | |
self.setEvalRefs() | |
def setEval(self, score, method): | |
self.eval[method] = score | |
def setRefToEvalRefs(self, scores, refIds, method): | |
for refId, score in zip(refIds, scores): | |
if not refId in self.refToEval: | |
self.refToEval[refId] = {} | |
self.refToEval[refId]["ref_id"] = refId | |
self.refToEval[refId][method] = score | |
def setEvalRefs(self): | |
self.evalRefs = [eval for refId, eval in self.refToEval.items()] | |
if __name__ == '__main__': | |
import os.path as osp | |
import sys | |
ROOT_DIR = osp.abspath(osp.join(osp.dirname(__file__), '..', '..')) | |
sys.path.insert(0, osp.join(ROOT_DIR, 'lib', 'datasets')) | |
from refer import REFER | |
# load refer of dataset | |
dataset = 'refcoco' | |
refer = REFER(dataset, splitBy = 'google') | |
# mimic some Res | |
val_refIds = refer.getRefIds(split='test') | |
ref_id = 49767 | |
print("GD: %s" % refer.Refs[ref_id]['sentences']) | |
Res = [{'ref_id': ref_id, 'sent': 'left bottle'}] | |
# evaluate some refer expressions | |
refEval = RefEvaluation(refer, Res) | |
refEval.evaluate() | |
# print output evaluation scores | |
for metric, score in refEval.eval.items(): | |
print('%s: %.3f'%(metric, score)) | |
# demo how to use evalImgs to retrieve low score result | |
# evals = [eva for eva in refEval.evalRefs if eva['CIDEr']<30] | |
# print 'ground truth sents' | |
# refId = evals[0]['ref_id'] | |
# print 'refId: %s' % refId | |
# print [sent['sent'] for sent in refer.Refs[refId]['sentences']] | |
# | |
# print 'generated sent (CIDEr score %0.1f)' % (evals[0]['CIDEr']) | |
# print refEval.refToEval[8] | |