JohanDL's picture
initial commit
f1dd031
# Copyright (c) OpenMMLab. All rights reserved.
import numpy as np
import torch
from mmdet.datasets.transforms.loading import LoadAnnotations
from mmdet.registry import TRANSFORMS
from mmdet.structures.bbox import get_box_type
@TRANSFORMS.register_module()
class LoadMatchAnnotations(LoadAnnotations):
"""Load and process the ``instances`` and ``seg_map`` annotation provided
by dataset. It must load ``instances_ids`` which is only used in the
tracking tasks. The annotation format is as the following:
.. code-block:: python
{
'instances':
[
{
# List of 4 numbers representing the bounding box of the
# instance, in (x1, y1, x2, y2) order.
'bbox': [x1, y1, x2, y2],
# Label of image classification.
'bbox_label': 1,
# Used in tracking.
# Id of instances.
'instance_id': 100,
# Used in instance/panoptic segmentation. The segmentation mask
# of the instance or the information of segments.
# 1. If list[list[float]], it represents a list of polygons,
# one for each connected component of the object. Each
# list[float] is one simple polygon in the format of
# [x1, y1, ..., xn, yn] (n >= 3). The Xs and Ys are absolute
# coordinates in unit of pixels.
# 2. If dict, it represents the per-pixel segmentation mask in
# COCO's compressed RLE format. The dict should have keys
# “size” and “counts”. Can be loaded by pycocotools
'mask': list[list[float]] or dict,
}
]
# Filename of semantic or panoptic segmentation ground truth file.
'seg_map_path': 'a/b/c'
}
After this module, the annotation has been changed to the format below:
.. code-block:: python
{
# In (x1, y1, x2, y2) order, float type. N is the number of bboxes
# in an image
'gt_bboxes': np.ndarray(N, 4)
# In int type.
'gt_bboxes_labels': np.ndarray(N, )
# In built-in class
'gt_masks': PolygonMasks (H, W) or BitmapMasks (H, W)
# In uint8 type.
'gt_seg_map': np.ndarray (H, W)
# in (x, y, v) order, float type.
}
Required Keys:
- height (optional)
- width (optional)
- instances
- bbox (optional)
- bbox_label
- instance_id (optional)
- mask (optional)
- ignore_flag (optional)
- seg_map_path (optional)
Added Keys:
- gt_bboxes (np.float32)
- gt_bboxes_labels (np.int32)
- gt_instances_ids (np.int32)
- gt_masks (BitmapMasks | PolygonMasks)
- gt_seg_map (np.uint8)
- gt_ignore_flags (np.bool)
"""
def __init__(self, **kwargs) -> None:
super().__init__(**kwargs)
def _load_bboxes(self, results: dict) -> None:
"""Private function to load bounding box annotations.
Args:
results (dict): Result dict from :obj:``mmcv.BaseDataset``.
Returns:
dict: The dict contains loaded bounding box annotations.
"""
gt_bboxes = []
gt_ignore_flags = []
# TODO: use bbox_type
for instance in results["instances"]:
# The datasets which are only format in evaluation don't have
# groundtruth boxes.
if "bbox" in instance:
gt_bboxes.append(instance["bbox"])
if "ignore_flag" in instance:
gt_ignore_flags.append(instance["ignore_flag"])
# TODO: check this case
if len(gt_bboxes) != len(gt_ignore_flags):
# There may be no ``gt_ignore_flags`` in some cases, we treat them
# as all False in order to keep the length of ``gt_bboxes`` and
# ``gt_ignore_flags`` the same
gt_ignore_flags = [False] * len(gt_bboxes)
if self.box_type is None:
results["gt_bboxes"] = np.array(gt_bboxes, dtype=np.float32).reshape(
(-1, 4)
)
else:
_, box_type_cls = get_box_type(self.box_type)
results["gt_bboxes"] = box_type_cls(gt_bboxes, dtype=torch.float32)
results["gt_ignore_flags"] = np.array(gt_ignore_flags, dtype=bool)
def _load_instances_ids(self, results: dict) -> None:
"""Private function to load instances id annotations.
Args:
results (dict): Result dict from :obj :obj:``mmcv.BaseDataset``.
Returns:
dict: The dict containing instances id annotations.
"""
gt_instances_ids = []
for instance in results["instances"]:
gt_instances_ids.append(instance["instance_id"])
results["gt_instances_ids"] = np.array(gt_instances_ids, dtype=np.int32)
def transform(self, results: dict) -> dict:
"""Function to load multiple types annotations.
Args:
results (dict): Result dict from :obj:``mmcv.BaseDataset``.
Returns:
dict: The dict contains loaded bounding box, label, instances id
and semantic segmentation and keypoints annotations.
"""
results = super().transform(results)
self._load_instances_ids(results)
return results
def __repr__(self) -> str:
repr_str = self.__class__.__name__
repr_str += f"(with_bbox={self.with_bbox}, "
repr_str += f"with_label={self.with_label}, "
repr_str += f"with_mask={self.with_mask}, "
repr_str += f"with_seg={self.with_seg}, "
repr_str += f"poly2mask={self.poly2mask}, "
repr_str += f"imdecode_backend='{self.imdecode_backend}', "
repr_str += f"file_client_args={self.file_client_args})"
return repr_str