Spaces:
Build error
Build error
# Copyright (c) Facebook, Inc. and its affiliates. | |
# -------------------------------------------------------- | |
# X-Decoder -- Generalized Decoding for Pixel, Image, and Language | |
# Copyright (c) 2022 Microsoft | |
# Licensed under The MIT License [see LICENSE for details] | |
# Modified by Xueyan Zou ([email protected]) | |
# -------------------------------------------------------- | |
import os | |
import json | |
import logging | |
import itertools | |
import detectron2.utils.comm as comm | |
from detectron2.evaluation.evaluator import DatasetEvaluator | |
from caption_pycocotools.coco import COCO | |
from pycocoevalcap.eval import COCOEvalCap | |
class CaptioningEvaluator(DatasetEvaluator): | |
""" | |
Evaluate AR for object proposals, AP for instance detection/segmentation, AP | |
for keypoint detection outputs using COCO's metrics. | |
See http://cocodataset.org/#detection-eval and | |
http://cocodataset.org/#keypoints-eval to understand its metrics. | |
The metrics range from 0 to 100 (instead of 0 to 1), where a -1 or NaN means | |
the metric cannot be computed (e.g. due to no predictions made). | |
In addition to COCO, this evaluator is able to support any bounding box detection, | |
instance segmentation, or keypoint detection dataset. | |
""" | |
def __init__( | |
self, | |
distributed=True, | |
output_dir=None, | |
gt_json=None, | |
): | |
""" | |
Args: | |
dataset_name (str): name of the dataset to be evaluated. | |
It must have either the following corresponding metadata: | |
"json_file": the path to the COCO format annotation | |
Or it must be in detectron2's standard dataset format | |
so it can be converted to COCO format automatically. | |
tasks (tuple[str]): tasks that can be evaluated under the given | |
configuration. A task is one of "bbox", "segm", "keypoints". | |
By default, will infer this automatically from predictions. | |
distributed (True): if True, will collect results from all ranks and run evaluation | |
in the main process. | |
Otherwise, will only evaluate the results in the current process. | |
output_dir (str): optional, an output directory to dump all | |
results predicted on the dataset. The dump contains two files: | |
1. "instances_predictions.pth" a file that can be loaded with `torch.load` and | |
contains all the results in the format they are produced by the model. | |
2. "coco_instances_results.json" a json file in COCO's result format. | |
max_dets_per_image (int): limit on the maximum number of detections per image. | |
By default in COCO, this limit is to 100, but this can be customized | |
to be greater, as is needed in evaluation metrics AP fixed and AP pool | |
(see https://arxiv.org/pdf/2102.01066.pdf) | |
This doesn't affect keypoint evaluation. | |
use_fast_impl (bool): use a fast but **unofficial** implementation to compute AP. | |
Although the results should be very close to the official implementation in COCO | |
API, it is still recommended to compute results with the official API for use in | |
papers. The faster implementation also uses more RAM. | |
kpt_oks_sigmas (list[float]): The sigmas used to calculate keypoint OKS. | |
See http://cocodataset.org/#keypoints-eval | |
When empty, it will use the defaults in COCO. | |
Otherwise it should be the same length as ROI_KEYPOINT_HEAD.NUM_KEYPOINTS. | |
allow_cached_coco (bool): Whether to use cached coco json from previous validation | |
runs. You should set this to False if you need to use different validation data. | |
Defaults to True. | |
""" | |
self._logger = logging.getLogger(__name__) | |
self._distributed = distributed | |
self._output_dir = output_dir | |
self._gt_json = COCO(gt_json) | |
def reset(self): | |
self._gen_captions = [] | |
self._image_ids = [] | |
def process(self, inputs, outputs): | |
""" | |
Args: | |
inputs: the inputs to a COCO model (e.g., GeneralizedRCNN). | |
It is a list of dict. Each dict corresponds to an image and | |
contains keys like "height", "width", "file_name", "image_id". | |
outputs: the outputs of a COCO model. It is a list of dicts with key | |
"instances" that contains :class:`Instances`. | |
""" | |
for output in outputs: | |
self._image_ids.append(output['image_id']) | |
self._gen_captions.append(output['captioning_text']) | |
def evaluate(self, img_ids=None): | |
""" | |
Args: | |
img_ids: a list of image IDs to evaluate on. Default to None for the whole dataset | |
""" | |
if self._distributed: | |
comm.synchronize() | |
def gather(x, move=False): | |
x = comm.gather(x) | |
x = list(itertools.chain(*x)) | |
if move: | |
x = [xx.to(self._gen_captions[0].device) for xx in x] | |
return x | |
gen_captions = gather(self._gen_captions) | |
image_ids = gather(self._image_ids) | |
if not comm.is_main_process(): | |
return {} | |
else: | |
gen_captions = self._gen_captions | |
image_ids = self._image_ids | |
assert len(gen_captions) == len(image_ids) | |
pred_captions = [{"image_id": image_id, "caption": gen_caption} for image_id, gen_caption in zip(image_ids, gen_captions)] | |
pred_pth = os.path.join(self._output_dir, 'results.json') | |
json.dump(pred_captions, open(pred_pth, "w")) | |
gt_captions = self._gt_json | |
pred_captions = gt_captions.loadRes(pred_pth) | |
cocoEval = COCOEvalCap(gt_captions, pred_captions) | |
cocoEval.params['image_id'] = pred_captions.getImgIds() | |
cocoEval.evaluate() | |
return cocoEval.eval |