|
import os |
|
import sys |
|
import copy |
|
import pickle |
|
|
|
import numpy as np |
|
import pandas as pd |
|
import fire |
|
|
|
sys.path.append(os.getcwd()) |
|
|
|
|
|
def coco_score(refs, pred, scorer): |
|
if scorer.method() == "Bleu": |
|
scores = np.array([ 0.0 for n in range(4) ]) |
|
else: |
|
scores = 0 |
|
num_cap_per_audio = len(refs[list(refs.keys())[0]]) |
|
|
|
for i in range(num_cap_per_audio): |
|
if i > 0: |
|
for key in refs: |
|
refs[key].insert(0, res[key][0]) |
|
res = {key: [refs[key].pop(),] for key in refs} |
|
score, _ = scorer.compute_score(refs, pred) |
|
|
|
if scorer.method() == "Bleu": |
|
scores += np.array(score) |
|
else: |
|
scores += score |
|
|
|
score = scores / num_cap_per_audio |
|
|
|
for key in refs: |
|
refs[key].insert(0, res[key][0]) |
|
score_allref, _ = scorer.compute_score(refs, pred) |
|
diff = score_allref - score |
|
return diff |
|
|
|
def embedding_score(refs, pred, scorer): |
|
|
|
num_cap_per_audio = len(refs[list(refs.keys())[0]]) |
|
scores = 0 |
|
|
|
for i in range(num_cap_per_audio): |
|
res = {key: [refs[key][i],] for key in refs.keys() if len(refs[key]) == num_cap_per_audio} |
|
refs_i = {key: np.concatenate([refs[key][:i], refs[key][i+1:]]) for key in refs.keys() if len(refs[key]) == num_cap_per_audio} |
|
score, _ = scorer.compute_score(refs_i, pred) |
|
|
|
scores += score |
|
|
|
score = scores / num_cap_per_audio |
|
|
|
score_allref, _ = scorer.compute_score(refs, pred) |
|
diff = score_allref - score |
|
return diff |
|
|
|
def main(output_file, eval_caption_file, eval_embedding_file, output, zh=False): |
|
output_df = pd.read_json(output_file) |
|
output_df["key"] = output_df["filename"].apply(lambda x: os.path.splitext(os.path.basename(x))[0]) |
|
pred = output_df.groupby("key")["tokens"].apply(list).to_dict() |
|
|
|
label_df = pd.read_json(eval_caption_file) |
|
if zh: |
|
refs = label_df.groupby("key")["tokens"].apply(list).to_dict() |
|
else: |
|
refs = label_df.groupby("key")["caption"].apply(list).to_dict() |
|
|
|
from pycocoevalcap.bleu.bleu import Bleu |
|
from pycocoevalcap.cider.cider import Cider |
|
from pycocoevalcap.rouge.rouge import Rouge |
|
|
|
scorer = Bleu(zh=zh) |
|
bleu_scores = coco_score(copy.deepcopy(refs), pred, scorer) |
|
scorer = Cider(zh=zh) |
|
cider_score = coco_score(copy.deepcopy(refs), pred, scorer) |
|
scorer = Rouge(zh=zh) |
|
rouge_score = coco_score(copy.deepcopy(refs), pred, scorer) |
|
|
|
if not zh: |
|
from pycocoevalcap.meteor.meteor import Meteor |
|
scorer = Meteor() |
|
meteor_score = coco_score(copy.deepcopy(refs), pred, scorer) |
|
|
|
from pycocoevalcap.spice.spice import Spice |
|
scorer = Spice() |
|
spice_score = coco_score(copy.deepcopy(refs), pred, scorer) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
with open(output, "w") as f: |
|
f.write("Diff:\n") |
|
for n in range(4): |
|
f.write("BLEU-{}: {:6.3f}\n".format(n+1, bleu_scores[n])) |
|
f.write("CIDEr: {:6.3f}\n".format(cider_score)) |
|
f.write("ROUGE: {:6.3f}\n".format(rouge_score)) |
|
if not zh: |
|
f.write("Meteor: {:6.3f}\n".format(meteor_score)) |
|
f.write("SPICE: {:6.3f}\n".format(spice_score)) |
|
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
fire.Fire(main) |
|
|