Spaces:
Sleeping
Sleeping
# Copyright (c) Meta Platforms, Inc. and affiliates | |
import contextlib | |
import copy | |
import datetime | |
import io | |
import itertools | |
import json | |
import logging | |
import os | |
import time | |
from collections import defaultdict | |
from typing import List, Union | |
from typing import Tuple | |
import numpy as np | |
import pycocotools.mask as maskUtils | |
import torch | |
from detectron2.utils.memory import retry_if_cuda_oom | |
from detectron2.data import MetadataCatalog, DatasetCatalog | |
from detectron2.evaluation.coco_evaluation import COCOEvaluator | |
from detectron2.structures import BoxMode | |
from detectron2.utils.file_io import PathManager | |
from detectron2.utils.logger import create_small_table, log_every_n_seconds | |
from pycocotools.cocoeval import COCOeval | |
from tabulate import tabulate | |
from detectron2.utils.comm import get_world_size, is_main_process | |
import detectron2.utils.comm as comm | |
from detectron2.evaluation import ( | |
DatasetEvaluators, inference_context, DatasetEvaluator | |
) | |
from collections import OrderedDict, abc | |
from contextlib import ExitStack, contextmanager | |
from torch import nn | |
import logging | |
from cubercnn.data import Omni3D | |
from pytorch3d import _C | |
import torch.nn.functional as F | |
from pytorch3d.ops.iou_box3d import _box_planes, _box_triangles | |
import cubercnn.vis.logperf as utils_logperf | |
from cubercnn.data import ( | |
get_omni3d_categories, | |
simple_register | |
) | |
""" | |
This file contains | |
* Omni3DEvaluationHelper: a helper object to accumulate and summarize evaluation results | |
* Omni3DEval: a wrapper around COCOeval to perform 3D bounding evaluation in the detection setting | |
* Omni3DEvaluator: a wrapper around COCOEvaluator to collect results on each dataset | |
* Omni3DParams: parameters for the evaluation API | |
""" | |
logger = logging.getLogger(__name__) | |
# Defines the max cross of len(dts) * len(gts) | |
# which we will attempt to compute on a GPU. | |
# Fallback is safer computation on a CPU. | |
# 0 is disabled on GPU. | |
MAX_DTS_CROSS_GTS_FOR_IOU3D = 0 | |
def _check_coplanar(boxes: torch.Tensor, eps: float = 1e-4) -> torch.BoolTensor: | |
""" | |
Checks that plane vertices are coplanar. | |
Returns a bool tensor of size B, where True indicates a box is coplanar. | |
""" | |
faces = torch.tensor(_box_planes, dtype=torch.int64, device=boxes.device) | |
verts = boxes.index_select(index=faces.view(-1), dim=1) | |
B = boxes.shape[0] | |
P, V = faces.shape | |
# (B, P, 4, 3) -> (B, P, 3) | |
v0, v1, v2, v3 = verts.reshape(B, P, V, 3).unbind(2) | |
# Compute the normal | |
e0 = F.normalize(v1 - v0, dim=-1) | |
e1 = F.normalize(v2 - v0, dim=-1) | |
normal = F.normalize(torch.cross(e0, e1, dim=-1), dim=-1) | |
# Check the fourth vertex is also on the same plane | |
mat1 = (v3 - v0).view(B, 1, -1) # (B, 1, P*3) | |
mat2 = normal.view(B, -1, 1) # (B, P*3, 1) | |
return (mat1.bmm(mat2).abs() < eps).view(B) | |
def _check_nonzero(boxes: torch.Tensor, eps: float = 1e-8) -> torch.BoolTensor: | |
""" | |
Checks that the sides of the box have a non zero area. | |
Returns a bool tensor of size B, where True indicates a box is nonzero. | |
""" | |
faces = torch.tensor(_box_triangles, dtype=torch.int64, device=boxes.device) | |
verts = boxes.index_select(index=faces.view(-1), dim=1) | |
B = boxes.shape[0] | |
T, V = faces.shape | |
# (B, T, 3, 3) -> (B, T, 3) | |
v0, v1, v2 = verts.reshape(B, T, V, 3).unbind(2) | |
normals = torch.cross(v1 - v0, v2 - v0, dim=-1) # (B, T, 3) | |
face_areas = normals.norm(dim=-1) / 2 | |
return (face_areas > eps).all(1).view(B) | |
def box3d_overlap( | |
boxes_dt: torch.Tensor, boxes_gt: torch.Tensor, | |
eps_coplanar: float = 1e-4, eps_nonzero: float = 1e-8 | |
) -> torch.Tensor: | |
""" | |
Computes the intersection of 3D boxes_dt and boxes_gt. | |
Inputs boxes_dt, boxes_gt are tensors of shape (B, 8, 3) | |
(where B doesn't have to be the same for boxes_dt and boxes_gt), | |
containing the 8 corners of the boxes, as follows: | |
(4) +---------+. (5) | |
| ` . | ` . | |
| (0) +---+-----+ (1) | |
| | | | | |
(7) +-----+---+. (6)| | |
` . | ` . | | |
(3) ` +---------+ (2) | |
NOTE: Throughout this implementation, we assume that boxes | |
are defined by their 8 corners exactly in the order specified in the | |
diagram above for the function to give correct results. In addition | |
the vertices on each plane must be coplanar. | |
As an alternative to the diagram, this is a unit bounding | |
box which has the correct vertex ordering: | |
box_corner_vertices = [ | |
[0, 0, 0], | |
[1, 0, 0], | |
[1, 1, 0], | |
[0, 1, 0], | |
[0, 0, 1], | |
[1, 0, 1], | |
[1, 1, 1], | |
[0, 1, 1], | |
] | |
Args: | |
boxes_dt: tensor of shape (N, 8, 3) of the coordinates of the 1st boxes | |
boxes_gt: tensor of shape (M, 8, 3) of the coordinates of the 2nd boxes | |
Returns: | |
iou: (N, M) tensor of the intersection over union which is | |
defined as: `iou = vol / (vol1 + vol2 - vol)` | |
""" | |
# Make sure predictions are coplanar and nonzero | |
invalid_coplanar = ~_check_coplanar(boxes_dt, eps=eps_coplanar) | |
invalid_nonzero = ~_check_nonzero(boxes_dt, eps=eps_nonzero) | |
ious = _C.iou_box3d(boxes_dt, boxes_gt)[1] | |
# Offending boxes are set to zero IoU | |
if invalid_coplanar.any(): | |
ious[invalid_coplanar] = 0 | |
print('Warning: skipping {:d} non-coplanar boxes at eval.'.format(int(invalid_coplanar.float().sum()))) | |
if invalid_nonzero.any(): | |
ious[invalid_nonzero] = 0 | |
print('Warning: skipping {:d} zero volume boxes at eval.'.format(int(invalid_nonzero.float().sum()))) | |
return ious | |
class Omni3DEvaluationHelper: | |
def __init__(self, | |
dataset_names, | |
filter_settings, | |
output_folder, | |
iter_label='-', | |
only_2d=False, | |
): | |
""" | |
A helper class to initialize, evaluate and summarize Omni3D metrics. | |
The evaluator relies on the detectron2 MetadataCatalog for keeping track | |
of category names and contiguous IDs. Hence, it is important to set | |
these variables appropriately. | |
# (list[str]) the category names in their contiguous order | |
MetadataCatalog.get('omni3d_model').thing_classes = ... | |
# (dict[int: int]) the mapping from Omni3D category IDs to the contiguous order | |
MetadataCatalog.get('omni3d_model').thing_dataset_id_to_contiguous_id | |
Args: | |
dataset_names (list[str]): the individual dataset splits for evaluation | |
filter_settings (dict): the filter settings used for evaluation, see | |
cubercnn/data/datasets.py get_filter_settings_from_cfg | |
output_folder (str): the output folder where results can be stored to disk. | |
iter_label (str): an optional iteration/label used within the summary | |
only_2d (bool): whether the evaluation mode should be 2D or 2D and 3D. | |
""" | |
self.dataset_names = dataset_names | |
self.filter_settings = filter_settings | |
self.output_folder = output_folder | |
self.iter_label = iter_label | |
self.only_2d = only_2d | |
# Each dataset evaluator is stored here | |
self.evaluators = OrderedDict() | |
# These are the main evaluation results | |
self.results = OrderedDict() | |
# These store store per-dataset results to be printed | |
self.results_analysis = OrderedDict() | |
self.results_omni3d = OrderedDict() | |
self.overall_imgIds = set() | |
self.overall_catIds = set() | |
# These store the evaluations for each category and area, | |
# concatenated from ALL evaluated datasets. Doing so avoids | |
# the need to re-compute them when accumulating results. | |
self.evals_per_cat_area2D = {} | |
self.evals_per_cat_area3D = {} | |
self.output_folders = { | |
dataset_name: os.path.join(self.output_folder, dataset_name) | |
for dataset_name in dataset_names | |
} | |
for dataset_name in self.dataset_names: | |
# register any datasets that need it | |
if MetadataCatalog.get(dataset_name).get('json_file') is None: | |
simple_register(dataset_name, filter_settings, filter_empty=False) | |
# create an individual dataset evaluator | |
self.evaluators[dataset_name] = Omni3DEvaluator( | |
dataset_name, output_dir=self.output_folders[dataset_name], | |
filter_settings=self.filter_settings, only_2d=self.only_2d, | |
eval_prox=('Objectron' in dataset_name or 'SUNRGBD' in dataset_name), | |
distributed=False, # actual evaluation should be single process | |
) | |
self.evaluators[dataset_name].reset() | |
self.overall_imgIds.update(set(self.evaluators[dataset_name]._omni_api.getImgIds())) | |
self.overall_catIds.update(set(self.evaluators[dataset_name]._omni_api.getCatIds())) | |
def add_predictions(self, dataset_name, predictions): | |
""" | |
Adds predictions to the evaluator for dataset_name. This can be any number of | |
predictions, including all predictions passed in at once or in batches. | |
Args: | |
dataset_name (str): the dataset split name which the predictions belong to | |
predictions (list[dict]): each item in the list is a dict as follows: | |
{ | |
"image_id": <int> the unique image identifier from Omni3D, | |
"K": <np.array> 3x3 intrinsics matrix for the image, | |
"width": <int> image width, | |
"height": <int> image height, | |
"instances": [ | |
{ | |
"image_id": <int> the unique image identifier from Omni3D, | |
"category_id": <int> the contiguous category prediction IDs, | |
which can be mapped from Omni3D's category ID's using | |
MetadataCatalog.get('omni3d_model').thing_dataset_id_to_contiguous_id | |
"bbox": [float] 2D box as [x1, y1, x2, y2] used for IoU2D, | |
"score": <float> the confidence score for the object, | |
"depth": <float> the depth of the center of the object, | |
"bbox3D": list[list[float]] 8x3 corner vertices used for IoU3D, | |
} | |
... | |
] | |
} | |
""" | |
# concatenate incoming predictions | |
self.evaluators[dataset_name]._predictions += predictions | |
def save_predictions(self, dataset_name): | |
""" | |
Saves the predictions from dataset_name to disk, in a self.output_folder. | |
Args: | |
dataset_name (str): the dataset split name which should be saved. | |
""" | |
# save predictions to disk | |
output_folder_dataset = self.output_folders[dataset_name] | |
PathManager.mkdirs(output_folder_dataset) | |
file_path = os.path.join(output_folder_dataset, "instances_predictions.pth") | |
with PathManager.open(file_path, "wb") as f: | |
torch.save(self.evaluators[dataset_name]._predictions, f) | |
def evaluate(self, dataset_name): | |
""" | |
Runs the evaluation for an individual dataset split, assuming all | |
predictions have been passed in. | |
Args: | |
dataset_name (str): the dataset split name which should be evalated. | |
""" | |
if not dataset_name in self.results: | |
# run evaluation and cache | |
self.results[dataset_name] = self.evaluators[dataset_name].evaluate() | |
results = self.results[dataset_name] | |
logger.info('\n'+results['log_str_2D'].replace('mode=2D', '{} iter={} mode=2D'.format(dataset_name, self.iter_label))) | |
# store the partially accumulated evaluations per category per area | |
for key, item in results['bbox_2D_evals_per_cat_area'].items(): | |
if not key in self.evals_per_cat_area2D: | |
self.evals_per_cat_area2D[key] = [] | |
self.evals_per_cat_area2D[key] += item | |
if not self.only_2d: | |
# store the partially accumulated evaluations per category per area | |
for key, item in results['bbox_3D_evals_per_cat_area'].items(): | |
if not key in self.evals_per_cat_area3D: | |
self.evals_per_cat_area3D[key] = [] | |
self.evals_per_cat_area3D[key] += item | |
logger.info('\n'+results['log_str_3D'].replace('mode=3D', '{} iter={} mode=3D'.format(dataset_name, self.iter_label))) | |
# full model category names | |
category_names = self.filter_settings['category_names'] | |
# The set of categories present in the dataset; there should be no duplicates | |
categories = {cat for cat in category_names if 'AP-{}'.format(cat) in results['bbox_2D']} | |
assert len(categories) == len(set(categories)) | |
# default are all NaN | |
general_2D, general_3D, omni_2D, omni_3D = (np.nan,) * 4 | |
# 2D and 3D performance for categories in dataset; and log | |
general_2D = np.mean([results['bbox_2D']['AP-{}'.format(cat)] for cat in categories]) | |
if not self.only_2d: | |
general_3D = np.mean([results['bbox_3D']['AP-{}'.format(cat)] for cat in categories]) | |
# 2D and 3D performance on Omni3D categories | |
omni3d_dataset_categories = get_omni3d_categories(dataset_name) # dataset-specific categories | |
if len(omni3d_dataset_categories - categories) == 0: # omni3d_dataset_categories is a subset of categories | |
omni_2D = np.mean([results['bbox_2D']['AP-{}'.format(cat)] for cat in omni3d_dataset_categories]) | |
if not self.only_2d: | |
omni_3D = np.mean([results['bbox_3D']['AP-{}'.format(cat)] for cat in omni3d_dataset_categories]) | |
self.results_omni3d[dataset_name] = {"iters": self.iter_label, "AP2D": omni_2D, "AP3D": omni_3D} | |
# Performance analysis | |
extras_AP15, extras_AP25, extras_AP50, extras_APn, extras_APm, extras_APf = (np.nan,)*6 | |
if not self.only_2d: | |
extras_AP15 = results['bbox_3D']['AP15'] | |
extras_AP25 = results['bbox_3D']['AP25'] | |
extras_AP50 = results['bbox_3D']['AP50'] | |
extras_APn = results['bbox_3D']['APn'] | |
extras_APm = results['bbox_3D']['APm'] | |
extras_APf = results['bbox_3D']['APf'] | |
self.results_analysis[dataset_name] = { | |
"iters": self.iter_label, | |
"AP2D": general_2D, "AP3D": general_3D, | |
"AP3D@15": extras_AP15, "AP3D@25": extras_AP25, "AP3D@50": extras_AP50, | |
"AP3D-N": extras_APn, "AP3D-M": extras_APm, "AP3D-F": extras_APf | |
} | |
# Performance per category | |
results_cat = OrderedDict() | |
for cat in category_names: | |
cat_2D, cat_3D = (np.nan,) * 2 | |
if 'AP-{}'.format(cat) in results['bbox_2D']: | |
cat_2D = results['bbox_2D']['AP-{}'.format(cat)] | |
if not self.only_2d: | |
cat_3D = results['bbox_3D']['AP-{}'.format(cat)] | |
if not np.isnan(cat_2D) or not np.isnan(cat_3D): | |
results_cat[cat] = {"AP2D": cat_2D, "AP3D": cat_3D} | |
utils_logperf.print_ap_category_histogram(dataset_name, results_cat) | |
def summarize_all(self,): | |
''' | |
Report collective metrics when possible for the the Omni3D dataset. | |
This uses pre-computed evaluation results from each dataset, | |
which were aggregated and cached while evaluating individually. | |
This process simply re-accumulate and summarizes them. | |
''' | |
# First, double check that we have all the evaluations | |
for dataset_name in self.dataset_names: | |
if not dataset_name in self.results: | |
self.evaluate(dataset_name) | |
thing_classes = MetadataCatalog.get('omni3d_model').thing_classes | |
catId2contiguous = MetadataCatalog.get('omni3d_model').thing_dataset_id_to_contiguous_id | |
ordered_things = [thing_classes[catId2contiguous[cid]] for cid in self.overall_catIds] | |
categories = set(ordered_things) | |
evaluator2D = Omni3Deval(mode='2D') | |
evaluator2D.params.catIds = list(self.overall_catIds) | |
evaluator2D.params.imgIds = list(self.overall_imgIds) | |
evaluator2D.evalImgs = True | |
evaluator2D.evals_per_cat_area = self.evals_per_cat_area2D | |
evaluator2D._paramsEval = copy.deepcopy(evaluator2D.params) | |
evaluator2D.accumulate() | |
summarize_str2D = evaluator2D.summarize() | |
precisions = evaluator2D.eval['precision'] | |
metrics = ["AP", "AP50", "AP75", "AP95", "APs", "APm", "APl"] | |
results2D = { | |
metric: float( | |
evaluator2D.stats[idx] * 100 if evaluator2D.stats[idx] >= 0 else "nan" | |
) | |
for idx, metric in enumerate(metrics) | |
} | |
for idx, name in enumerate(ordered_things): | |
precision = precisions[:, :, idx, 0, -1] | |
precision = precision[precision > -1] | |
ap = np.mean(precision) if precision.size else float("nan") | |
results2D.update({"AP-" + "{}".format(name): float(ap * 100)}) | |
evaluator3D = Omni3Deval(mode='3D') | |
evaluator3D.params.catIds = list(self.overall_catIds) | |
evaluator3D.params.imgIds = list(self.overall_imgIds) | |
evaluator3D.evalImgs = True | |
evaluator3D.evals_per_cat_area = self.evals_per_cat_area3D | |
evaluator3D._paramsEval = copy.deepcopy(evaluator3D.params) | |
evaluator3D.accumulate() | |
summarize_str3D = evaluator3D.summarize() | |
precisions = evaluator3D.eval['precision'] | |
metrics = ["AP", "AP15", "AP25", "AP50", "APn", "APm", "APf"] | |
results3D = { | |
metric: float( | |
evaluator3D.stats[idx] * 100 if evaluator3D.stats[idx] >= 0 else "nan" | |
) | |
for idx, metric in enumerate(metrics) | |
} | |
for idx, name in enumerate(ordered_things): | |
precision = precisions[:, :, idx, 0, -1] | |
precision = precision[precision > -1] | |
ap = np.mean(precision) if precision.size else float("nan") | |
results3D.update({"AP-" + "{}".format(name): float(ap * 100)}) | |
# All concat categories | |
general_2D, general_3D = (np.nan,) * 2 | |
general_2D = np.mean([results2D['AP-{}'.format(cat)] for cat in categories]) | |
if not self.only_2d: | |
general_3D = np.mean([results3D['AP-{}'.format(cat)] for cat in categories]) | |
# Analysis performance | |
extras_AP15, extras_AP25, extras_AP50, extras_APn, extras_APm, extras_APf = (np.nan,) * 6 | |
if not self.only_2d: | |
extras_AP15 = results3D['AP15'] | |
extras_AP25 = results3D['AP25'] | |
extras_AP50 = results3D['AP50'] | |
extras_APn = results3D['APn'] | |
extras_APm = results3D['APm'] | |
extras_APf = results3D['APf'] | |
self.results_analysis["<Concat>"] = { | |
"iters": self.iter_label, | |
"AP2D": general_2D, "AP3D": general_3D, | |
"AP3D@15": extras_AP15, "AP3D@25": extras_AP25, "AP3D@50": extras_AP50, | |
"AP3D-N": extras_APn, "AP3D-M": extras_APm, "AP3D-F": extras_APf | |
} | |
# Omni3D Outdoor performance | |
omni_2D, omni_3D = (np.nan,) * 2 | |
omni3d_outdoor_categories = get_omni3d_categories("omni3d_out") | |
if len(omni3d_outdoor_categories - categories) == 0: | |
omni_2D = np.mean([results2D['AP-{}'.format(cat)] for cat in omni3d_outdoor_categories]) | |
if not self.only_2d: | |
omni_3D = np.mean([results3D['AP-{}'.format(cat)] for cat in omni3d_outdoor_categories]) | |
self.results_omni3d["Omni3D_Out"] = {"iters": self.iter_label, "AP2D": omni_2D, "AP3D": omni_3D} | |
# Omni3D Indoor performance | |
omni_2D, omni_3D = (np.nan,) * 2 | |
omni3d_indoor_categories = get_omni3d_categories("omni3d_in") | |
if len(omni3d_indoor_categories - categories) == 0: | |
omni_2D = np.mean([results2D['AP-{}'.format(cat)] for cat in omni3d_indoor_categories]) | |
if not self.only_2d: | |
omni_3D = np.mean([results3D['AP-{}'.format(cat)] for cat in omni3d_indoor_categories]) | |
self.results_omni3d["Omni3D_In"] = {"iters": self.iter_label, "AP2D": omni_2D, "AP3D": omni_3D} | |
# Omni3D performance | |
omni_2D, omni_3D = (np.nan,) * 2 | |
omni3d_categories = get_omni3d_categories("omni3d") | |
if len(omni3d_categories - categories) == 0: | |
omni_2D = np.mean([results2D['AP-{}'.format(cat)] for cat in omni3d_categories]) | |
if not self.only_2d: | |
omni_3D = np.mean([results3D['AP-{}'.format(cat)] for cat in omni3d_categories]) | |
self.results_omni3d["Omni3D"] = {"iters": self.iter_label, "AP2D": omni_2D, "AP3D": omni_3D} | |
# Per-category performance for the cumulative datasets | |
results_cat = OrderedDict() | |
for cat in self.filter_settings['category_names']: | |
cat_2D, cat_3D = (np.nan,) * 2 | |
if 'AP-{}'.format(cat) in results2D: | |
cat_2D = results2D['AP-{}'.format(cat)] | |
if not self.only_2d: | |
cat_3D = results3D['AP-{}'.format(cat)] | |
if not np.isnan(cat_2D) or not np.isnan(cat_3D): | |
results_cat[cat] = {"AP2D": cat_2D, "AP3D": cat_3D} | |
utils_logperf.print_ap_category_histogram("<Concat>", results_cat) | |
utils_logperf.print_ap_analysis_histogram(self.results_analysis) | |
utils_logperf.print_ap_omni_histogram(self.results_omni3d) | |
def inference_on_dataset(model, data_loader): | |
""" | |
Run model on the data_loader. | |
Also benchmark the inference speed of `model.__call__` accurately. | |
The model will be used in eval mode. | |
Args: | |
model (callable): a callable which takes an object from | |
`data_loader` and returns some outputs. | |
If it's an nn.Module, it will be temporarily set to `eval` mode. | |
If you wish to evaluate a model in `training` mode instead, you can | |
wrap the given model and override its behavior of `.eval()` and `.train()`. | |
data_loader: an iterable object with a length. | |
The elements it generates will be the inputs to the model. | |
Returns: | |
The return value of `evaluator.evaluate()` | |
""" | |
num_devices = get_world_size() | |
distributed = num_devices > 1 | |
logger.info("Start inference on {} batches".format(len(data_loader))) | |
total = len(data_loader) # inference data loader must have a fixed length | |
num_warmup = min(5, total - 1) | |
start_time = time.perf_counter() | |
total_data_time = 0 | |
total_compute_time = 0 | |
total_eval_time = 0 | |
inference_json = [] | |
with ExitStack() as stack: | |
if isinstance(model, nn.Module): | |
stack.enter_context(inference_context(model)) | |
stack.enter_context(torch.no_grad()) | |
start_data_time = time.perf_counter() | |
for idx, inputs in enumerate(data_loader): | |
total_data_time += time.perf_counter() - start_data_time | |
if idx == num_warmup: | |
start_time = time.perf_counter() | |
total_data_time = 0 | |
total_compute_time = 0 | |
total_eval_time = 0 | |
start_compute_time = time.perf_counter() | |
outputs = model(inputs) | |
if torch.cuda.is_available(): | |
torch.cuda.synchronize() | |
total_compute_time += time.perf_counter() - start_compute_time | |
start_eval_time = time.perf_counter() | |
for input, output in zip(inputs, outputs): | |
prediction = { | |
"image_id": input["image_id"], | |
"K": input["K"], | |
"width": input["width"], | |
"height": input["height"], | |
} | |
# convert to json format | |
instances = output["instances"].to('cpu') | |
prediction["instances"] = instances_to_coco_json(instances, input["image_id"]) | |
# store in overall predictions | |
inference_json.append(prediction) | |
total_eval_time += time.perf_counter() - start_eval_time | |
iters_after_start = idx + 1 - num_warmup * int(idx >= num_warmup) | |
data_seconds_per_iter = total_data_time / iters_after_start | |
compute_seconds_per_iter = total_compute_time / iters_after_start | |
eval_seconds_per_iter = total_eval_time / iters_after_start | |
total_seconds_per_iter = (time.perf_counter() - start_time) / iters_after_start | |
if idx >= num_warmup * 2 or compute_seconds_per_iter > 5: | |
eta = datetime.timedelta(seconds=int(total_seconds_per_iter * (total - idx - 1))) | |
log_every_n_seconds( | |
logging.INFO, | |
( | |
f"Inference done {idx + 1}/{total}. " | |
f"Dataloading: {data_seconds_per_iter:.4f} s/iter. " | |
f"Inference: {compute_seconds_per_iter:.4f} s/iter. " | |
f"Eval: {eval_seconds_per_iter:.4f} s/iter. " | |
f"Total: {total_seconds_per_iter:.4f} s/iter. " | |
f"ETA={eta}" | |
), | |
n=5, | |
) | |
start_data_time = time.perf_counter() | |
# Measure the time only for this worker (before the synchronization barrier) | |
total_time = time.perf_counter() - start_time | |
total_time_str = str(datetime.timedelta(seconds=total_time)) | |
# NOTE this format is parsed by grep | |
logger.info( | |
"Total inference time: {} ({:.6f} s / iter per device, on {} devices)".format( | |
total_time_str, total_time / (total - num_warmup), num_devices | |
) | |
) | |
total_compute_time_str = str(datetime.timedelta(seconds=int(total_compute_time))) | |
logger.info( | |
"Total inference pure compute time: {} ({:.6f} s / iter per device, on {} devices)".format( | |
total_compute_time_str, total_compute_time / (total - num_warmup), num_devices | |
) | |
) | |
if distributed: | |
comm.synchronize() | |
inference_json = comm.gather(inference_json, dst=0) | |
inference_json = list(itertools.chain(*inference_json)) | |
if not comm.is_main_process(): | |
return [] | |
return inference_json | |
class Omni3DEvaluator(COCOEvaluator): | |
def __init__( | |
self, | |
dataset_name, | |
tasks=None, | |
distributed=True, | |
output_dir=None, | |
*, | |
max_dets_per_image=None, | |
use_fast_impl=False, | |
eval_prox=False, | |
only_2d=False, | |
filter_settings={}, | |
): | |
""" | |
Args: | |
dataset_name (str): name of the dataset to be evaluated. | |
It must have either the following corresponding metadata: | |
"json_file": the path to the COCO format annotation | |
Or it must be in detectron2's standard dataset format | |
so it can be converted to COCO format automatically. | |
tasks (tuple[str]): tasks that can be evaluated under the given | |
configuration. For now, support only for "bbox". | |
distributed (True): if True, will collect results from all ranks and run evaluation | |
in the main process. | |
Otherwise, will only evaluate the results in the current process. | |
output_dir (str): optional, an output directory to dump all | |
results predicted on the dataset. The dump contains two files: | |
1. "instances_predictions.pth" a file that can be loaded with `torch.load` and | |
contains all the results in the format they are produced by the model. | |
2. "coco_instances_results.json" a json file in COCO's result format. | |
max_dets_per_image (int): limit on the maximum number of detections per image. | |
By default in COCO, this limit is to 100, but this can be customized | |
to be greater, as is needed in evaluation metrics AP fixed and AP pool | |
(see https://arxiv.org/pdf/2102.01066.pdf) | |
This doesn't affect keypoint evaluation. | |
use_fast_impl (bool): use a fast but **unofficial** implementation to compute AP. | |
Although the results should be very close to the official implementation in COCO | |
API, it is still recommended to compute results with the official API for use in | |
papers. The faster implementation also uses more RAM. | |
eval_prox (bool): whether to perform proximity evaluation. For datasets that are not | |
exhaustively annotated. | |
only_2d (bool): evaluates only 2D performance if set to True | |
filter_settions: settings for the dataset loader. TBD | |
""" | |
self._logger = logging.getLogger(__name__) | |
self._distributed = distributed | |
self._output_dir = output_dir | |
self._use_fast_impl = use_fast_impl | |
self._eval_prox = eval_prox | |
self._only_2d = only_2d | |
self._filter_settings = filter_settings | |
# COCOeval requires the limit on the number of detections per image (maxDets) to be a list | |
# with at least 3 elements. The default maxDets in COCOeval is [1, 10, 100], in which the | |
# 3rd element (100) is used as the limit on the number of detections per image when | |
# evaluating AP. COCOEvaluator expects an integer for max_dets_per_image, so for COCOeval, | |
# we reformat max_dets_per_image into [1, 10, max_dets_per_image], based on the defaults. | |
if max_dets_per_image is None: | |
max_dets_per_image = [1, 10, 100] | |
else: | |
max_dets_per_image = [1, 10, max_dets_per_image] | |
self._max_dets_per_image = max_dets_per_image | |
self._tasks = tasks | |
self._cpu_device = torch.device("cpu") | |
self._metadata = MetadataCatalog.get(dataset_name) | |
json_file = PathManager.get_local_path(self._metadata.json_file) | |
with contextlib.redirect_stdout(io.StringIO()): | |
self._omni_api = Omni3D([json_file], filter_settings) | |
# Test set json files do not contain annotations (evaluation must be | |
# performed using the COCO evaluation server). | |
self._do_evaluation = "annotations" in self._omni_api.dataset | |
def process(self, inputs, outputs): | |
""" | |
Args: | |
inputs: the inputs to a model (e.g., GeneralizedRCNN). | |
It is a list of dict. Each dict corresponds to an image and | |
contains keys like "height", "width", "file_name", "image_id". | |
outputs: the outputs of a model. It is a list of dicts with key | |
"instances" that contains :class:`Instances`. | |
""" | |
# Optional image keys to keep when available | |
img_keys_optional = ["p2"] | |
for input, output in zip(inputs, outputs): | |
prediction = { | |
"image_id": input["image_id"], | |
"K": input["K"], | |
"width": input["width"], | |
"height": input["height"], | |
} | |
# store optional keys when available | |
for img_key in img_keys_optional: | |
if img_key in input: | |
prediction.update({img_key: input[img_key]}) | |
# already in COCO format | |
if type(output["instances"]) == list: | |
prediction["instances"] = output["instances"] | |
# tensor instances format | |
else: | |
instances = output["instances"].to(self._cpu_device) | |
prediction["instances"] = instances_to_coco_json( | |
instances, input["image_id"] | |
) | |
if len(prediction) > 1: | |
self._predictions.append(prediction) | |
def _derive_omni_results(self, omni_eval, iou_type, mode, class_names=None): | |
""" | |
Derive the desired score numbers from summarized COCOeval. | |
Args: | |
omni_eval (None or Omni3Deval): None represents no predictions from model. | |
iou_type (str): | |
mode (str): either "2D" or "3D" | |
class_names (None or list[str]): if provided, will use it to predict | |
per-category AP. | |
Returns: | |
a dict of {metric name: score} | |
""" | |
assert mode in ["2D", "3D"] | |
metrics = { | |
"2D": ["AP", "AP50", "AP75", "AP95", "APs", "APm", "APl"], | |
"3D": ["AP", "AP15", "AP25", "AP50", "APn", "APm", "APf"], | |
}[mode] | |
if iou_type != "bbox": | |
raise ValueError("Support only for bbox evaluation.") | |
if omni_eval is None: | |
self._logger.warn("No predictions from the model!") | |
return {metric: float("nan") for metric in metrics} | |
# the standard metrics | |
results = { | |
metric: float( | |
omni_eval.stats[idx] * 100 if omni_eval.stats[idx] >= 0 else "nan" | |
) | |
for idx, metric in enumerate(metrics) | |
} | |
self._logger.info( | |
"Evaluation results for {} in {} mode: \n".format(iou_type, mode) | |
+ create_small_table(results) | |
) | |
if not np.isfinite(sum(results.values())): | |
self._logger.info("Some metrics cannot be computed and is shown as NaN.") | |
if class_names is None or len(class_names) <= 1: | |
return results | |
# Compute per-category AP | |
# from https://github.com/facebookresearch/Detectron/blob/a6a835f5b8208c45d0dce217ce9bbda915f44df7/detectron/datasets/json_dataset_evaluator.py#L222-L252 # noqa | |
precisions = omni_eval.eval["precision"] | |
# precision has dims (iou, recall, cls, area range, max dets) | |
assert len(class_names) == precisions.shape[2] | |
results_per_category = [] | |
for idx, name in enumerate(class_names): | |
# area range index 0: all area ranges | |
# max dets index -1: typically 100 per image | |
precision = precisions[:, :, idx, 0, -1] | |
precision = precision[precision > -1] | |
ap = np.mean(precision) if precision.size else float("nan") | |
results_per_category.append(("{}".format(name), float(ap * 100))) | |
# tabulate it | |
N_COLS = min(6, len(results_per_category) * 2) | |
results_flatten = list(itertools.chain(*results_per_category)) | |
results_table = itertools.zip_longest( | |
*[results_flatten[i::N_COLS] for i in range(N_COLS)] | |
) | |
table = tabulate( | |
results_table, | |
tablefmt="pipe", | |
floatfmt=".3f", | |
headers=["category", "AP"] * (N_COLS // 2), | |
numalign="left", | |
) | |
self._logger.info( | |
"Per-category {} AP in {} mode: \n".format(iou_type, mode) + table | |
) | |
results.update({"AP-" + name: ap for name, ap in results_per_category}) | |
return results | |
def _eval_predictions(self, predictions, img_ids=None): | |
""" | |
Evaluate predictions. Fill self._results with the metrics of the tasks. | |
""" | |
self._logger.info("Preparing results for COCO format ...") | |
omni_results = list(itertools.chain(*[x["instances"] for x in predictions])) | |
tasks = self._tasks or self._tasks_from_predictions(omni_results) | |
omni3d_global_categories = MetadataCatalog.get('omni3d_model').thing_classes | |
# the dataset results will store only the categories that are present | |
# in the corresponding dataset, all others will be dropped. | |
dataset_results = [] | |
# unmap the category ids for COCO | |
if hasattr(self._metadata, "thing_dataset_id_to_contiguous_id"): | |
dataset_id_to_contiguous_id = ( | |
self._metadata.thing_dataset_id_to_contiguous_id | |
) | |
all_contiguous_ids = list(dataset_id_to_contiguous_id.values()) | |
num_classes = len(all_contiguous_ids) | |
assert ( | |
min(all_contiguous_ids) == 0 | |
and max(all_contiguous_ids) == num_classes - 1 | |
) | |
reverse_id_mapping = {v: k for k, v in dataset_id_to_contiguous_id.items()} | |
for result in omni_results: | |
category_id = result["category_id"] | |
assert category_id < num_classes, ( | |
f"A prediction has class={category_id}, " | |
f"but the dataset only has {num_classes} classes and " | |
f"predicted class id should be in [0, {num_classes - 1}]." | |
) | |
result["category_id"] = reverse_id_mapping[category_id] | |
cat_name = omni3d_global_categories[category_id] | |
if cat_name in self._metadata.thing_classes: | |
dataset_results.append(result) | |
# replace the results with the filtered | |
# instances that are in vocabulary. | |
omni_results = dataset_results | |
if self._output_dir: | |
file_path = os.path.join(self._output_dir, "omni_instances_results.json") | |
self._logger.info("Saving results to {}".format(file_path)) | |
with PathManager.open(file_path, "w") as f: | |
f.write(json.dumps(omni_results)) | |
f.flush() | |
if not self._do_evaluation: | |
self._logger.info("Annotations are not available for evaluation.") | |
return | |
self._logger.info( | |
"Evaluating predictions with {} COCO API...".format( | |
"unofficial" if self._use_fast_impl else "official" | |
) | |
) | |
for task in sorted(tasks): | |
assert task in {"bbox"}, f"Got unknown task: {task}!" | |
evals, log_strs = ( | |
_evaluate_predictions_on_omni( | |
self._omni_api, | |
omni_results, | |
task, | |
img_ids=img_ids, | |
only_2d=self._only_2d, | |
eval_prox=self._eval_prox, | |
) | |
if len(omni_results) > 0 | |
else None # cocoapi does not handle empty results very well | |
) | |
modes = evals.keys() | |
for mode in modes: | |
res = self._derive_omni_results( | |
evals[mode], | |
task, | |
mode, | |
class_names=self._metadata.get("thing_classes"), | |
) | |
self._results[task + "_" + format(mode)] = res | |
self._results[task + "_" + format(mode) + '_evalImgs'] = evals[mode].evalImgs | |
self._results[task + "_" + format(mode) + '_evals_per_cat_area'] = evals[mode].evals_per_cat_area | |
self._results["log_str_2D"] = log_strs["2D"] | |
if "3D" in log_strs: | |
self._results["log_str_3D"] = log_strs["3D"] | |
def _evaluate_predictions_on_omni( | |
omni_gt, | |
omni_results, | |
iou_type, | |
img_ids=None, | |
only_2d=False, | |
eval_prox=False, | |
): | |
""" | |
Evaluate the coco results using COCOEval API. | |
""" | |
assert len(omni_results) > 0 | |
log_strs, evals = {}, {} | |
omni_dt = omni_gt.loadRes(omni_results) | |
modes = ["2D"] if only_2d else ["2D", "3D"] | |
for mode in modes: | |
omni_eval = Omni3Deval( | |
omni_gt, omni_dt, iouType=iou_type, mode=mode, eval_prox=eval_prox | |
) | |
if img_ids is not None: | |
omni_eval.params.imgIds = img_ids | |
omni_eval.evaluate() | |
omni_eval.accumulate() | |
log_str = omni_eval.summarize() | |
log_strs[mode] = log_str | |
evals[mode] = omni_eval | |
return evals, log_strs | |
def instances_to_coco_json(instances, img_id): | |
num_instances = len(instances) | |
if num_instances == 0: | |
return [] | |
boxes = BoxMode.convert( | |
instances.pred_boxes.tensor.numpy(), BoxMode.XYXY_ABS, BoxMode.XYWH_ABS | |
).tolist() | |
scores = instances.scores.tolist() | |
classes = instances.pred_classes.tolist() | |
if hasattr(instances, "pred_bbox3D"): | |
bbox3D = instances.pred_bbox3D.tolist() | |
center_cam = instances.pred_center_cam.tolist() | |
center_2D = instances.pred_center_2D.tolist() | |
dimensions = instances.pred_dimensions.tolist() | |
pose = instances.pred_pose.tolist() | |
else: | |
# dummy | |
bbox3D = np.ones([num_instances, 8, 3]).tolist() | |
center_cam = np.ones([num_instances, 3]).tolist() | |
center_2D = np.ones([num_instances, 2]).tolist() | |
dimensions = np.ones([num_instances, 3]).tolist() | |
pose = np.ones([num_instances, 3, 3]).tolist() | |
results = [] | |
for k in range(num_instances): | |
result = { | |
"image_id": img_id, | |
"category_id": classes[k], | |
"bbox": boxes[k], | |
"score": scores[k], | |
"depth": np.array(bbox3D[k])[:, 2].mean(), | |
"bbox3D": bbox3D[k], | |
"center_cam": center_cam[k], | |
"center_2D": center_2D[k], | |
"dimensions": dimensions[k], | |
"pose": pose[k], | |
} | |
results.append(result) | |
return results | |
# --------------------------------------------------------------------- | |
# Omni3DParams | |
# --------------------------------------------------------------------- | |
class Omni3DParams: | |
""" | |
Params for the Omni evaluation API | |
""" | |
def setDet2DParams(self): | |
self.imgIds = [] | |
self.catIds = [] | |
# np.arange causes trouble. the data point on arange is slightly larger than the true value | |
self.iouThrs = np.linspace( | |
0.5, 0.95, int(np.round((0.95 - 0.5) / 0.05)) + 1, endpoint=True | |
) | |
self.recThrs = np.linspace( | |
0.0, 1.00, int(np.round((1.00 - 0.0) / 0.01)) + 1, endpoint=True | |
) | |
self.maxDets = [1, 10, 100] | |
self.areaRng = [ | |
[0 ** 2, 1e5 ** 2], | |
[0 ** 2, 32 ** 2], | |
[32 ** 2, 96 ** 2], | |
[96 ** 2, 1e5 ** 2], | |
] | |
self.areaRngLbl = ["all", "small", "medium", "large"] | |
self.useCats = 1 | |
def setDet3DParams(self): | |
self.imgIds = [] | |
self.catIds = [] | |
# np.arange causes trouble. the data point on arange is slightly larger than the true value | |
self.iouThrs = np.linspace( | |
0.05, 0.5, int(np.round((0.5 - 0.05) / 0.05)) + 1, endpoint=True | |
) | |
self.recThrs = np.linspace( | |
0.0, 1.00, int(np.round((1.00 - 0.0) / 0.01)) + 1, endpoint=True | |
) | |
self.maxDets = [1, 10, 100] | |
self.areaRng = [[0, 1e5], [0, 10], [10, 35], [35, 1e5]] | |
self.areaRngLbl = ["all", "near", "medium", "far"] | |
self.useCats = 1 | |
def __init__(self, mode="2D"): | |
""" | |
Args: | |
iouType (str): defines 2D or 3D evaluation parameters. | |
One of {"2D", "3D"} | |
""" | |
if mode == "2D": | |
self.setDet2DParams() | |
elif mode == "3D": | |
self.setDet3DParams() | |
else: | |
raise Exception("mode %s not supported" % (mode)) | |
self.iouType = "bbox" | |
self.mode = mode | |
# the proximity threshold defines the neighborhood | |
# when evaluating on non-exhaustively annotated datasets | |
self.proximity_thresh = 0.3 | |
# --------------------------------------------------------------------- | |
# Omni3Deval | |
# --------------------------------------------------------------------- | |
class Omni3Deval(COCOeval): | |
""" | |
Wraps COCOeval for 2D or 3D box evaluation depending on mode | |
""" | |
def __init__( | |
self, cocoGt=None, cocoDt=None, iouType="bbox", mode="2D", eval_prox=False | |
): | |
""" | |
Initialize COCOeval using coco APIs for Gt and Dt | |
Args: | |
cocoGt: COCO object with ground truth annotations | |
cocoDt: COCO object with detection results | |
iouType: (str) defines the evaluation type. Supports only "bbox" now. | |
mode: (str) defines whether to evaluate 2D or 3D performance. | |
One of {"2D", "3D"} | |
eval_prox: (bool) if True, performs "Proximity Evaluation", i.e. | |
evaluates detections in the proximity of the ground truth2D boxes. | |
This is used for datasets which are not exhaustively annotated. | |
""" | |
if not iouType: | |
print("iouType not specified. use default iouType bbox") | |
elif iouType != "bbox": | |
print("no support for %s iouType" % (iouType)) | |
self.mode = mode | |
if mode not in ["2D", "3D"]: | |
raise Exception("mode %s not supported" % (mode)) | |
self.eval_prox = eval_prox | |
self.cocoGt = cocoGt # ground truth COCO API | |
self.cocoDt = cocoDt # detections COCO API | |
# per-image per-category evaluation results [KxAxI] elements | |
self.evalImgs = defaultdict(list) | |
self.eval = {} # accumulated evaluation results | |
self._gts = defaultdict(list) # gt for evaluation | |
self._dts = defaultdict(list) # dt for evaluation | |
self.params = Omni3DParams(mode) # parameters | |
self._paramsEval = {} # parameters for evaluation | |
self.stats = [] # result summarization | |
self.ious = {} # ious between all gts and dts | |
if cocoGt is not None: | |
self.params.imgIds = sorted(cocoGt.getImgIds()) | |
self.params.catIds = sorted(cocoGt.getCatIds()) | |
self.evals_per_cat_area = None | |
def _prepare(self): | |
""" | |
Prepare ._gts and ._dts for evaluation based on params | |
""" | |
p = self.params | |
if p.useCats: | |
gts = self.cocoGt.loadAnns(self.cocoGt.getAnnIds(imgIds=p.imgIds, catIds=p.catIds)) | |
dts = self.cocoDt.loadAnns(self.cocoDt.getAnnIds(imgIds=p.imgIds, catIds=p.catIds)) | |
else: | |
gts = self.cocoGt.loadAnns(self.cocoGt.getAnnIds(imgIds=p.imgIds)) | |
dts = self.cocoDt.loadAnns(self.cocoDt.getAnnIds(imgIds=p.imgIds)) | |
# set ignore flag | |
ignore_flag = "ignore2D" if self.mode == "2D" else "ignore3D" | |
for gt in gts: | |
gt[ignore_flag] = gt[ignore_flag] if ignore_flag in gt else 0 | |
self._gts = defaultdict(list) # gt for evaluation | |
self._dts = defaultdict(list) # dt for evaluation | |
for gt in gts: | |
self._gts[gt["image_id"], gt["category_id"]].append(gt) | |
for dt in dts: | |
self._dts[dt["image_id"], dt["category_id"]].append(dt) | |
self.evalImgs = defaultdict(list) # per-image per-category evaluation results | |
self.eval = {} # accumulated evaluation results | |
def accumulate(self, p = None): | |
''' | |
Accumulate per image evaluation results and store the result in self.eval | |
:param p: input params for evaluation | |
:return: None | |
''' | |
print('Accumulating evaluation results...') | |
assert self.evalImgs, 'Please run evaluate() first' | |
tic = time.time() | |
# allows input customized parameters | |
if p is None: | |
p = self.params | |
p.catIds = p.catIds if p.useCats == 1 else [-1] | |
T = len(p.iouThrs) | |
R = len(p.recThrs) | |
K = len(p.catIds) if p.useCats else 1 | |
A = len(p.areaRng) | |
M = len(p.maxDets) | |
precision = -np.ones((T,R,K,A,M)) # -1 for the precision of absent categories | |
recall = -np.ones((T,K,A,M)) | |
scores = -np.ones((T,R,K,A,M)) | |
# create dictionary for future indexing | |
_pe = self._paramsEval | |
catIds = _pe.catIds if _pe.useCats else [-1] | |
setK = set(catIds) | |
setA = set(map(tuple, _pe.areaRng)) | |
setM = set(_pe.maxDets) | |
setI = set(_pe.imgIds) | |
# get inds to evaluate | |
catid_list = [k for n, k in enumerate(p.catIds) if k in setK] | |
k_list = [n for n, k in enumerate(p.catIds) if k in setK] | |
m_list = [m for n, m in enumerate(p.maxDets) if m in setM] | |
a_list = [n for n, a in enumerate(map(lambda x: tuple(x), p.areaRng)) if a in setA] | |
i_list = [n for n, i in enumerate(p.imgIds) if i in setI] | |
I0 = len(_pe.imgIds) | |
A0 = len(_pe.areaRng) | |
has_precomputed_evals = not (self.evals_per_cat_area is None) | |
if has_precomputed_evals: | |
evals_per_cat_area = self.evals_per_cat_area | |
else: | |
evals_per_cat_area = {} | |
# retrieve E at each category, area range, and max number of detections | |
for k, (k0, catId) in enumerate(zip(k_list, catid_list)): | |
Nk = k0*A0*I0 | |
for a, a0 in enumerate(a_list): | |
Na = a0*I0 | |
if has_precomputed_evals: | |
E = evals_per_cat_area[(catId, a)] | |
else: | |
E = [self.evalImgs[Nk + Na + i] for i in i_list] | |
E = [e for e in E if not e is None] | |
evals_per_cat_area[(catId, a)] = E | |
if len(E) == 0: | |
continue | |
for m, maxDet in enumerate(m_list): | |
dtScores = np.concatenate([e['dtScores'][0:maxDet] for e in E]) | |
# different sorting method generates slightly different results. | |
# mergesort is used to be consistent as Matlab implementation. | |
inds = np.argsort(-dtScores, kind='mergesort') | |
dtScoresSorted = dtScores[inds] | |
dtm = np.concatenate([e['dtMatches'][:,0:maxDet] for e in E], axis=1)[:,inds] | |
dtIg = np.concatenate([e['dtIgnore'][:,0:maxDet] for e in E], axis=1)[:,inds] | |
gtIg = np.concatenate([e['gtIgnore'] for e in E]) | |
npig = np.count_nonzero(gtIg==0) | |
if npig == 0: | |
continue | |
tps = np.logical_and( dtm, np.logical_not(dtIg) ) | |
fps = np.logical_and(np.logical_not(dtm), np.logical_not(dtIg) ) | |
tp_sum = np.cumsum(tps, axis=1).astype(dtype=float) | |
fp_sum = np.cumsum(fps, axis=1).astype(dtype=float) | |
for t, (tp, fp) in enumerate(zip(tp_sum, fp_sum)): | |
tp = np.array(tp) | |
fp = np.array(fp) | |
nd = len(tp) | |
rc = tp / npig | |
pr = tp / (fp+tp+np.spacing(1)) | |
q = np.zeros((R,)) | |
ss = np.zeros((R,)) | |
if nd: | |
recall[t,k,a,m] = rc[-1] | |
else: | |
recall[t,k,a,m] = 0 | |
# numpy is slow without cython optimization for accessing elements | |
# use python array gets significant speed improvement | |
pr = pr.tolist(); q = q.tolist() | |
for i in range(nd-1, 0, -1): | |
if pr[i] > pr[i-1]: | |
pr[i-1] = pr[i] | |
inds = np.searchsorted(rc, p.recThrs, side='left') | |
try: | |
for ri, pi in enumerate(inds): | |
q[ri] = pr[pi] | |
ss[ri] = dtScoresSorted[pi] | |
except: | |
pass | |
precision[t,:,k,a,m] = np.array(q) | |
scores[t,:,k,a,m] = np.array(ss) | |
self.evals_per_cat_area = evals_per_cat_area | |
self.eval = { | |
'params': p, | |
'counts': [T, R, K, A, M], | |
'date': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'), | |
'precision': precision, | |
'recall': recall, | |
'scores': scores, | |
} | |
toc = time.time() | |
print('DONE (t={:0.2f}s).'.format( toc-tic)) | |
def evaluate(self): | |
""" | |
Run per image evaluation on given images and store results (a list of dict) in self.evalImgs | |
""" | |
print("Running per image evaluation...") | |
p = self.params | |
print("Evaluate annotation type *{}*".format(p.iouType)) | |
tic = time.time() | |
p.imgIds = list(np.unique(p.imgIds)) | |
if p.useCats: | |
p.catIds = list(np.unique(p.catIds)) | |
p.maxDets = sorted(p.maxDets) | |
self.params = p | |
self._prepare() | |
catIds = p.catIds if p.useCats else [-1] | |
# loop through images, area range, max detection number | |
self.ious = { | |
(imgId, catId): self.computeIoU(imgId, catId) | |
for imgId in p.imgIds | |
for catId in catIds | |
} | |
maxDet = p.maxDets[-1] | |
self.evalImgs = [ | |
self.evaluateImg(imgId, catId, areaRng, maxDet) | |
for catId in catIds | |
for areaRng in p.areaRng | |
for imgId in p.imgIds | |
] | |
self._paramsEval = copy.deepcopy(self.params) | |
toc = time.time() | |
print("DONE (t={:0.2f}s).".format(toc - tic)) | |
def computeIoU(self, imgId, catId): | |
""" | |
ComputeIoU computes the IoUs by sorting based on "score" | |
for either 2D boxes (in 2D mode) or 3D boxes (in 3D mode) | |
""" | |
device = (torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")) | |
p = self.params | |
if p.useCats: | |
gt = self._gts[imgId, catId] | |
dt = self._dts[imgId, catId] | |
else: | |
gt = [_ for cId in p.catIds for _ in self._gts[imgId, cId]] | |
dt = [_ for cId in p.catIds for _ in self._dts[imgId, cId]] | |
if len(gt) == 0 and len(dt) == 0: | |
return [] | |
inds = np.argsort([-d["score"] for d in dt], kind="mergesort") | |
dt = [dt[i] for i in inds] | |
if len(dt) > p.maxDets[-1]: | |
dt = dt[0 : p.maxDets[-1]] | |
if p.iouType == "bbox": | |
if self.mode == "2D": | |
g = [g["bbox"] for g in gt] | |
d = [d["bbox"] for d in dt] | |
elif self.mode == "3D": | |
g = [g["bbox3D"] for g in gt] | |
d = [d["bbox3D"] for d in dt] | |
else: | |
raise Exception("unknown iouType for iou computation") | |
# compute iou between each dt and gt region | |
# iscrowd is required in builtin maskUtils so we | |
# use a dummy buffer for it | |
iscrowd = [0 for o in gt] | |
if self.mode == "2D": | |
ious = maskUtils.iou(d, g, iscrowd) | |
elif len(d) > 0 and len(g) > 0: | |
# For 3D eval, we want to run IoU in CUDA if available | |
if torch.cuda.is_available() and len(d) * len(g) < MAX_DTS_CROSS_GTS_FOR_IOU3D: | |
device = torch.device("cuda:0") | |
else: | |
device = torch.device("cpu") | |
dd = torch.tensor(d, device=device, dtype=torch.float32) | |
gg = torch.tensor(g, device=device, dtype=torch.float32) | |
ious = box3d_overlap(dd, gg).cpu().numpy() | |
else: | |
ious = [] | |
in_prox = None | |
if self.eval_prox: | |
g = [g["bbox"] for g in gt] | |
d = [d["bbox"] for d in dt] | |
iscrowd = [0 for o in gt] | |
ious2d = maskUtils.iou(d, g, iscrowd) | |
if type(ious2d) == list: | |
in_prox = [] | |
else: | |
in_prox = ious2d > p.proximity_thresh | |
return ious, in_prox | |
def evaluateImg(self, imgId, catId, aRng, maxDet): | |
""" | |
Perform evaluation for single category and image | |
Returns: | |
dict (single image results) | |
""" | |
p = self.params | |
if p.useCats: | |
gt = self._gts[imgId, catId] | |
dt = self._dts[imgId, catId] | |
else: | |
gt = [_ for cId in p.catIds for _ in self._gts[imgId, cId]] | |
dt = [_ for cId in p.catIds for _ in self._dts[imgId, cId]] | |
if len(gt) == 0 and len(dt) == 0: | |
return None | |
flag_range = "area" if self.mode == "2D" else "depth" | |
flag_ignore = "ignore2D" if self.mode == "2D" else "ignore3D" | |
for g in gt: | |
if g[flag_ignore] or (g[flag_range] < aRng[0] or g[flag_range] > aRng[1]): | |
g["_ignore"] = 1 | |
else: | |
g["_ignore"] = 0 | |
# sort dt highest score first, sort gt ignore last | |
gtind = np.argsort([g["_ignore"] for g in gt], kind="mergesort") | |
gt = [gt[i] for i in gtind] | |
dtind = np.argsort([-d["score"] for d in dt], kind="mergesort") | |
dt = [dt[i] for i in dtind[0:maxDet]] | |
# load computed ious | |
ious = ( | |
self.ious[imgId, catId][0][:, gtind] | |
if len(self.ious[imgId, catId][0]) > 0 | |
else self.ious[imgId, catId][0] | |
) | |
if self.eval_prox: | |
in_prox = ( | |
self.ious[imgId, catId][1][:, gtind] | |
if len(self.ious[imgId, catId][1]) > 0 | |
else self.ious[imgId, catId][1] | |
) | |
T = len(p.iouThrs) | |
G = len(gt) | |
D = len(dt) | |
gtm = np.zeros((T, G)) | |
dtm = np.zeros((T, D)) | |
gtIg = np.array([g["_ignore"] for g in gt]) | |
dtIg = np.zeros((T, D)) | |
if not len(ious) == 0: | |
for tind, t in enumerate(p.iouThrs): | |
for dind, d in enumerate(dt): | |
# information about best match so far (m=-1 -> unmatched) | |
iou = min([t, 1 - 1e-10]) | |
m = -1 | |
for gind, g in enumerate(gt): | |
# in case of proximity evaluation, if not in proximity continue | |
if self.eval_prox and not in_prox[dind, gind]: | |
continue | |
# if this gt already matched, continue | |
if gtm[tind, gind] > 0: | |
continue | |
# if dt matched to reg gt, and on ignore gt, stop | |
if m > -1 and gtIg[m] == 0 and gtIg[gind] == 1: | |
break | |
# continue to next gt unless better match made | |
if ious[dind, gind] < iou: | |
continue | |
# if match successful and best so far, store appropriately | |
iou = ious[dind, gind] | |
m = gind | |
# if match made store id of match for both dt and gt | |
if m == -1: | |
continue | |
dtIg[tind, dind] = gtIg[m] | |
dtm[tind, dind] = gt[m]["id"] | |
gtm[tind, m] = d["id"] | |
# set unmatched detections outside of area range to ignore | |
a = np.array( | |
[d[flag_range] < aRng[0] or d[flag_range] > aRng[1] for d in dt] | |
).reshape((1, len(dt))) | |
dtIg = np.logical_or(dtIg, np.logical_and(dtm == 0, np.repeat(a, T, 0))) | |
# in case of proximity evaluation, ignore detections which are far from gt regions | |
if self.eval_prox and len(in_prox) > 0: | |
dt_far = in_prox.any(1) == 0 | |
dtIg = np.logical_or(dtIg, np.repeat(dt_far.reshape((1, len(dt))), T, 0)) | |
# store results for given image and category | |
return { | |
"image_id": imgId, | |
"category_id": catId, | |
"aRng": aRng, | |
"maxDet": maxDet, | |
"dtIds": [d["id"] for d in dt], | |
"gtIds": [g["id"] for g in gt], | |
"dtMatches": dtm, | |
"gtMatches": gtm, | |
"dtScores": [d["score"] for d in dt], | |
"gtIgnore": gtIg, | |
"dtIgnore": dtIg, | |
} | |
def summarize(self): | |
""" | |
Compute and display summary metrics for evaluation results. | |
Note this functin can *only* be applied on the default parameter setting | |
""" | |
def _summarize(mode, ap=1, iouThr=None, areaRng="all", maxDets=100, log_str=""): | |
p = self.params | |
eval = self.eval | |
if mode == "2D": | |
iStr = (" {:<18} {} @[ IoU={:<9} | area={:>6s} | maxDets={:>3d} ] = {:0.3f}") | |
elif mode == "3D": | |
iStr = " {:<18} {} @[ IoU={:<9} | depth={:>6s} | maxDets={:>3d} ] = {:0.3f}" | |
titleStr = "Average Precision" if ap == 1 else "Average Recall" | |
typeStr = "(AP)" if ap == 1 else "(AR)" | |
iouStr = ( | |
"{:0.2f}:{:0.2f}".format(p.iouThrs[0], p.iouThrs[-1]) | |
if iouThr is None | |
else "{:0.2f}".format(iouThr) | |
) | |
aind = [i for i, aRng in enumerate(p.areaRngLbl) if aRng == areaRng] | |
mind = [i for i, mDet in enumerate(p.maxDets) if mDet == maxDets] | |
if ap == 1: | |
# dimension of precision: [TxRxKxAxM] | |
s = eval["precision"] | |
# IoU | |
if iouThr is not None: | |
t = np.where(np.isclose(iouThr, p.iouThrs.astype(float)))[0] | |
s = s[t] | |
s = s[:, :, :, aind, mind] | |
else: | |
# dimension of recall: [TxKxAxM] | |
s = eval["recall"] | |
if iouThr is not None: | |
t = np.where(iouThr == p.iouThrs)[0] | |
s = s[t] | |
s = s[:, :, aind, mind] | |
if len(s[s > -1]) == 0: | |
mean_s = -1 | |
else: | |
mean_s = np.mean(s[s > -1]) | |
if log_str != "": | |
log_str += "\n" | |
log_str += "mode={} ".format(mode) + \ | |
iStr.format(titleStr, typeStr, iouStr, areaRng, maxDets, mean_s) | |
return mean_s, log_str | |
def _summarizeDets(mode): | |
params = self.params | |
# the thresholds here, define the thresholds printed in `derive_omni_results` | |
thres = [0.5, 0.75, 0.95] if mode == "2D" else [0.15, 0.25, 0.50] | |
stats = np.zeros((13,)) | |
stats[0], log_str = _summarize(mode, 1) | |
stats[1], log_str = _summarize( | |
mode, 1, iouThr=thres[0], maxDets=params.maxDets[2], log_str=log_str | |
) | |
stats[2], log_str = _summarize( | |
mode, 1, iouThr=thres[1], maxDets=params.maxDets[2], log_str=log_str | |
) | |
stats[3], log_str = _summarize( | |
mode, 1, iouThr=thres[2], maxDets=params.maxDets[2], log_str=log_str | |
) | |
stats[4], log_str = _summarize( | |
mode, | |
1, | |
areaRng=params.areaRngLbl[1], | |
maxDets=params.maxDets[2], | |
log_str=log_str, | |
) | |
stats[5], log_str = _summarize( | |
mode, | |
1, | |
areaRng=params.areaRngLbl[2], | |
maxDets=params.maxDets[2], | |
log_str=log_str, | |
) | |
stats[6], log_str = _summarize( | |
mode, | |
1, | |
areaRng=params.areaRngLbl[3], | |
maxDets=params.maxDets[2], | |
log_str=log_str, | |
) | |
stats[7], log_str = _summarize( | |
mode, 0, maxDets=params.maxDets[0], log_str=log_str | |
) | |
stats[8], log_str = _summarize( | |
mode, 0, maxDets=params.maxDets[1], log_str=log_str | |
) | |
stats[9], log_str = _summarize( | |
mode, 0, maxDets=params.maxDets[2], log_str=log_str | |
) | |
stats[10], log_str = _summarize( | |
mode, | |
0, | |
areaRng=params.areaRngLbl[1], | |
maxDets=params.maxDets[2], | |
log_str=log_str, | |
) | |
stats[11], log_str = _summarize( | |
mode, | |
0, | |
areaRng=params.areaRngLbl[2], | |
maxDets=params.maxDets[2], | |
log_str=log_str, | |
) | |
stats[12], log_str = _summarize( | |
mode, | |
0, | |
areaRng=params.areaRngLbl[3], | |
maxDets=params.maxDets[2], | |
log_str=log_str, | |
) | |
return stats, log_str | |
if not self.eval: | |
raise Exception("Please run accumulate() first") | |
stats, log_str = _summarizeDets(self.mode) | |
self.stats = stats | |
return log_str | |