|
|
|
|
|
import math |
|
from typing import Dict |
|
import torch |
|
import torch.nn.functional as F |
|
|
|
from detectron2.layers import ShapeSpec, cat |
|
from detectron2.layers.roi_align_rotated import ROIAlignRotated |
|
from detectron2.modeling import poolers |
|
from detectron2.modeling.proposal_generator import rpn |
|
from detectron2.modeling.roi_heads.mask_head import mask_rcnn_inference |
|
from detectron2.structures import Boxes, ImageList, Instances, Keypoints, RotatedBoxes |
|
|
|
from .shared import alias, to_device |
|
|
|
|
|
""" |
|
This file contains caffe2-compatible implementation of several detectron2 components. |
|
""" |
|
|
|
|
|
class Caffe2Boxes(Boxes): |
|
""" |
|
Representing a list of detectron2.structures.Boxes from minibatch, each box |
|
is represented by a 5d vector (batch index + 4 coordinates), or a 6d vector |
|
(batch index + 5 coordinates) for RotatedBoxes. |
|
""" |
|
|
|
def __init__(self, tensor): |
|
assert isinstance(tensor, torch.Tensor) |
|
assert tensor.dim() == 2 and tensor.size(-1) in [4, 5, 6], tensor.size() |
|
|
|
|
|
self.tensor = tensor |
|
|
|
|
|
|
|
class InstancesList: |
|
""" |
|
Tensor representation of a list of Instances object for a batch of images. |
|
|
|
When dealing with a batch of images with Caffe2 ops, a list of bboxes |
|
(instances) are usually represented by single Tensor with size |
|
(sigma(Ni), 5) or (sigma(Ni), 4) plus a batch split Tensor. This class is |
|
for providing common functions to convert between these two representations. |
|
""" |
|
|
|
def __init__(self, im_info, indices, extra_fields=None): |
|
|
|
self.im_info = im_info |
|
|
|
self.indices = indices |
|
|
|
self.batch_extra_fields = extra_fields or {} |
|
|
|
self.image_size = self.im_info |
|
|
|
def get_fields(self): |
|
"""like `get_fields` in the Instances object, |
|
but return each field in tensor representations""" |
|
ret = {} |
|
for k, v in self.batch_extra_fields.items(): |
|
|
|
|
|
|
|
|
|
|
|
|
|
ret[k] = v |
|
return ret |
|
|
|
def has(self, name): |
|
return name in self.batch_extra_fields |
|
|
|
def set(self, name, value): |
|
|
|
|
|
|
|
if isinstance(value, Boxes): |
|
data_len = value.tensor.shape[0] |
|
elif isinstance(value, torch.Tensor): |
|
data_len = value.shape[0] |
|
else: |
|
data_len = len(value) |
|
if len(self.batch_extra_fields): |
|
assert ( |
|
len(self) == data_len |
|
), "Adding a field of length {} to a Instances of length {}".format(data_len, len(self)) |
|
self.batch_extra_fields[name] = value |
|
|
|
def __getattr__(self, name): |
|
if name not in self.batch_extra_fields: |
|
raise AttributeError("Cannot find field '{}' in the given Instances!".format(name)) |
|
return self.batch_extra_fields[name] |
|
|
|
def __len__(self): |
|
return len(self.indices) |
|
|
|
def flatten(self): |
|
ret = [] |
|
for _, v in self.batch_extra_fields.items(): |
|
if isinstance(v, (Boxes, Keypoints)): |
|
ret.append(v.tensor) |
|
else: |
|
ret.append(v) |
|
return ret |
|
|
|
@staticmethod |
|
def to_d2_instances_list(instances_list): |
|
""" |
|
Convert InstancesList to List[Instances]. The input `instances_list` can |
|
also be a List[Instances], in this case this method is a non-op. |
|
""" |
|
if not isinstance(instances_list, InstancesList): |
|
assert all(isinstance(x, Instances) for x in instances_list) |
|
return instances_list |
|
|
|
ret = [] |
|
for i, info in enumerate(instances_list.im_info): |
|
instances = Instances(torch.Size([int(info[0].item()), int(info[1].item())])) |
|
|
|
ids = instances_list.indices == i |
|
for k, v in instances_list.batch_extra_fields.items(): |
|
if isinstance(v, torch.Tensor): |
|
instances.set(k, v[ids]) |
|
continue |
|
elif isinstance(v, Boxes): |
|
instances.set(k, v[ids, -4:]) |
|
continue |
|
|
|
target_type, tensor_source = v |
|
assert isinstance(tensor_source, torch.Tensor) |
|
assert tensor_source.shape[0] == instances_list.indices.shape[0] |
|
tensor_source = tensor_source[ids] |
|
|
|
if issubclass(target_type, Boxes): |
|
instances.set(k, Boxes(tensor_source[:, -4:])) |
|
elif issubclass(target_type, Keypoints): |
|
instances.set(k, Keypoints(tensor_source)) |
|
elif issubclass(target_type, torch.Tensor): |
|
instances.set(k, tensor_source) |
|
else: |
|
raise ValueError("Can't handle targe type: {}".format(target_type)) |
|
|
|
ret.append(instances) |
|
return ret |
|
|
|
|
|
class Caffe2Compatible: |
|
""" |
|
A model can inherit this class to indicate that it can be traced and deployed with caffe2. |
|
""" |
|
|
|
def _get_tensor_mode(self): |
|
return self._tensor_mode |
|
|
|
def _set_tensor_mode(self, v): |
|
self._tensor_mode = v |
|
|
|
tensor_mode = property(_get_tensor_mode, _set_tensor_mode) |
|
""" |
|
If true, the model expects C2-style tensor only inputs/outputs format. |
|
""" |
|
|
|
|
|
class Caffe2RPN(Caffe2Compatible, rpn.RPN): |
|
@classmethod |
|
def from_config(cls, cfg, input_shape: Dict[str, ShapeSpec]): |
|
ret = super(Caffe2Compatible, cls).from_config(cfg, input_shape) |
|
assert tuple(cfg.MODEL.RPN.BBOX_REG_WEIGHTS) == (1.0, 1.0, 1.0, 1.0) or tuple( |
|
cfg.MODEL.RPN.BBOX_REG_WEIGHTS |
|
) == (1.0, 1.0, 1.0, 1.0, 1.0) |
|
return ret |
|
|
|
def _generate_proposals( |
|
self, images, objectness_logits_pred, anchor_deltas_pred, gt_instances=None |
|
): |
|
assert isinstance(images, ImageList) |
|
if self.tensor_mode: |
|
im_info = images.image_sizes |
|
else: |
|
im_info = torch.tensor([[im_sz[0], im_sz[1], 1.0] for im_sz in images.image_sizes]).to( |
|
images.tensor.device |
|
) |
|
assert isinstance(im_info, torch.Tensor) |
|
|
|
rpn_rois_list = [] |
|
rpn_roi_probs_list = [] |
|
for scores, bbox_deltas, cell_anchors_tensor, feat_stride in zip( |
|
objectness_logits_pred, |
|
anchor_deltas_pred, |
|
[b for (n, b) in self.anchor_generator.cell_anchors.named_buffers()], |
|
self.anchor_generator.strides, |
|
): |
|
scores = scores.detach() |
|
bbox_deltas = bbox_deltas.detach() |
|
|
|
rpn_rois, rpn_roi_probs = torch.ops._caffe2.GenerateProposals( |
|
scores, |
|
bbox_deltas, |
|
im_info, |
|
cell_anchors_tensor, |
|
spatial_scale=1.0 / feat_stride, |
|
pre_nms_topN=self.pre_nms_topk[self.training], |
|
post_nms_topN=self.post_nms_topk[self.training], |
|
nms_thresh=self.nms_thresh, |
|
min_size=self.min_box_size, |
|
|
|
angle_bound_on=True, |
|
angle_bound_lo=-180, |
|
angle_bound_hi=180, |
|
clip_angle_thresh=1.0, |
|
legacy_plus_one=False, |
|
) |
|
rpn_rois_list.append(rpn_rois) |
|
rpn_roi_probs_list.append(rpn_roi_probs) |
|
|
|
|
|
|
|
|
|
|
|
|
|
if len(objectness_logits_pred) == 1: |
|
rpn_rois = rpn_rois_list[0] |
|
rpn_roi_probs = rpn_roi_probs_list[0] |
|
else: |
|
assert len(rpn_rois_list) == len(rpn_roi_probs_list) |
|
rpn_post_nms_topN = self.post_nms_topk[self.training] |
|
|
|
device = rpn_rois_list[0].device |
|
input_list = [to_device(x, "cpu") for x in (rpn_rois_list + rpn_roi_probs_list)] |
|
|
|
|
|
|
|
feature_strides = list(self.anchor_generator.strides) |
|
rpn_min_level = int(math.log2(feature_strides[0])) |
|
rpn_max_level = int(math.log2(feature_strides[-1])) |
|
assert (rpn_max_level - rpn_min_level + 1) == len( |
|
rpn_rois_list |
|
), "CollectRpnProposals requires continuous levels" |
|
|
|
rpn_rois = torch.ops._caffe2.CollectRpnProposals( |
|
input_list, |
|
|
|
|
|
|
|
|
|
rpn_max_level=2 + len(rpn_rois_list) - 1, |
|
rpn_min_level=2, |
|
rpn_post_nms_topN=rpn_post_nms_topN, |
|
) |
|
rpn_rois = to_device(rpn_rois, device) |
|
rpn_roi_probs = [] |
|
|
|
proposals = self.c2_postprocess(im_info, rpn_rois, rpn_roi_probs, self.tensor_mode) |
|
return proposals, {} |
|
|
|
def forward(self, images, features, gt_instances=None): |
|
assert not self.training |
|
features = [features[f] for f in self.in_features] |
|
objectness_logits_pred, anchor_deltas_pred = self.rpn_head(features) |
|
return self._generate_proposals( |
|
images, |
|
objectness_logits_pred, |
|
anchor_deltas_pred, |
|
gt_instances, |
|
) |
|
|
|
@staticmethod |
|
def c2_postprocess(im_info, rpn_rois, rpn_roi_probs, tensor_mode): |
|
proposals = InstancesList( |
|
im_info=im_info, |
|
indices=rpn_rois[:, 0], |
|
extra_fields={ |
|
"proposal_boxes": Caffe2Boxes(rpn_rois), |
|
"objectness_logits": (torch.Tensor, rpn_roi_probs), |
|
}, |
|
) |
|
if not tensor_mode: |
|
proposals = InstancesList.to_d2_instances_list(proposals) |
|
else: |
|
proposals = [proposals] |
|
return proposals |
|
|
|
|
|
class Caffe2ROIPooler(Caffe2Compatible, poolers.ROIPooler): |
|
@staticmethod |
|
def c2_preprocess(box_lists): |
|
assert all(isinstance(x, Boxes) for x in box_lists) |
|
if all(isinstance(x, Caffe2Boxes) for x in box_lists): |
|
|
|
assert len(box_lists) == 1 |
|
pooler_fmt_boxes = box_lists[0].tensor |
|
else: |
|
pooler_fmt_boxes = poolers.convert_boxes_to_pooler_format(box_lists) |
|
return pooler_fmt_boxes |
|
|
|
def forward(self, x, box_lists): |
|
assert not self.training |
|
|
|
pooler_fmt_boxes = self.c2_preprocess(box_lists) |
|
num_level_assignments = len(self.level_poolers) |
|
|
|
if num_level_assignments == 1: |
|
if isinstance(self.level_poolers[0], ROIAlignRotated): |
|
c2_roi_align = torch.ops._caffe2.RoIAlignRotated |
|
aligned = True |
|
else: |
|
c2_roi_align = torch.ops._caffe2.RoIAlign |
|
aligned = self.level_poolers[0].aligned |
|
|
|
x0 = x[0] |
|
if x0.is_quantized: |
|
x0 = x0.dequantize() |
|
|
|
out = c2_roi_align( |
|
x0, |
|
pooler_fmt_boxes, |
|
order="NCHW", |
|
spatial_scale=float(self.level_poolers[0].spatial_scale), |
|
pooled_h=int(self.output_size[0]), |
|
pooled_w=int(self.output_size[1]), |
|
sampling_ratio=int(self.level_poolers[0].sampling_ratio), |
|
aligned=aligned, |
|
) |
|
return out |
|
|
|
device = pooler_fmt_boxes.device |
|
assert ( |
|
self.max_level - self.min_level + 1 == 4 |
|
), "Currently DistributeFpnProposals only support 4 levels" |
|
fpn_outputs = torch.ops._caffe2.DistributeFpnProposals( |
|
to_device(pooler_fmt_boxes, "cpu"), |
|
roi_canonical_scale=self.canonical_box_size, |
|
roi_canonical_level=self.canonical_level, |
|
roi_max_level=self.max_level, |
|
roi_min_level=self.min_level, |
|
legacy_plus_one=False, |
|
) |
|
fpn_outputs = [to_device(x, device) for x in fpn_outputs] |
|
|
|
rois_fpn_list = fpn_outputs[:-1] |
|
rois_idx_restore_int32 = fpn_outputs[-1] |
|
|
|
roi_feat_fpn_list = [] |
|
for roi_fpn, x_level, pooler in zip(rois_fpn_list, x, self.level_poolers): |
|
if isinstance(pooler, ROIAlignRotated): |
|
c2_roi_align = torch.ops._caffe2.RoIAlignRotated |
|
aligned = True |
|
else: |
|
c2_roi_align = torch.ops._caffe2.RoIAlign |
|
aligned = bool(pooler.aligned) |
|
|
|
if x_level.is_quantized: |
|
x_level = x_level.dequantize() |
|
|
|
roi_feat_fpn = c2_roi_align( |
|
x_level, |
|
roi_fpn, |
|
order="NCHW", |
|
spatial_scale=float(pooler.spatial_scale), |
|
pooled_h=int(self.output_size[0]), |
|
pooled_w=int(self.output_size[1]), |
|
sampling_ratio=int(pooler.sampling_ratio), |
|
aligned=aligned, |
|
) |
|
roi_feat_fpn_list.append(roi_feat_fpn) |
|
|
|
roi_feat_shuffled = cat(roi_feat_fpn_list, dim=0) |
|
assert roi_feat_shuffled.numel() > 0 and rois_idx_restore_int32.numel() > 0, ( |
|
"Caffe2 export requires tracing with a model checkpoint + input that can produce valid" |
|
" detections. But no detections were obtained with the given checkpoint and input!" |
|
) |
|
roi_feat = torch.ops._caffe2.BatchPermutation(roi_feat_shuffled, rois_idx_restore_int32) |
|
return roi_feat |
|
|
|
|
|
def caffe2_fast_rcnn_outputs_inference(tensor_mode, box_predictor, predictions, proposals): |
|
"""equivalent to FastRCNNOutputLayers.inference""" |
|
num_classes = box_predictor.num_classes |
|
score_thresh = box_predictor.test_score_thresh |
|
nms_thresh = box_predictor.test_nms_thresh |
|
topk_per_image = box_predictor.test_topk_per_image |
|
is_rotated = len(box_predictor.box2box_transform.weights) == 5 |
|
|
|
if is_rotated: |
|
box_dim = 5 |
|
assert box_predictor.box2box_transform.weights[4] == 1, ( |
|
"The weights for Rotated BBoxTransform in C2 have only 4 dimensions," |
|
+ " thus enforcing the angle weight to be 1 for now" |
|
) |
|
box2box_transform_weights = box_predictor.box2box_transform.weights[:4] |
|
else: |
|
box_dim = 4 |
|
box2box_transform_weights = box_predictor.box2box_transform.weights |
|
|
|
class_logits, box_regression = predictions |
|
if num_classes + 1 == class_logits.shape[1]: |
|
class_prob = F.softmax(class_logits, -1) |
|
else: |
|
assert num_classes == class_logits.shape[1] |
|
class_prob = F.sigmoid(class_logits) |
|
|
|
|
|
class_prob = torch.cat((class_prob, torch.zeros(class_prob.shape[0], 1)), dim=1) |
|
|
|
assert box_regression.shape[1] % box_dim == 0 |
|
cls_agnostic_bbox_reg = box_regression.shape[1] // box_dim == 1 |
|
|
|
input_tensor_mode = proposals[0].proposal_boxes.tensor.shape[1] == box_dim + 1 |
|
|
|
proposal_boxes = proposals[0].proposal_boxes |
|
if isinstance(proposal_boxes, Caffe2Boxes): |
|
rois = Caffe2Boxes.cat([p.proposal_boxes for p in proposals]) |
|
elif isinstance(proposal_boxes, RotatedBoxes): |
|
rois = RotatedBoxes.cat([p.proposal_boxes for p in proposals]) |
|
elif isinstance(proposal_boxes, Boxes): |
|
rois = Boxes.cat([p.proposal_boxes for p in proposals]) |
|
else: |
|
raise NotImplementedError( |
|
'Expected proposals[0].proposal_boxes to be type "Boxes", ' |
|
f"instead got {type(proposal_boxes)}" |
|
) |
|
|
|
device, dtype = rois.tensor.device, rois.tensor.dtype |
|
if input_tensor_mode: |
|
im_info = proposals[0].image_size |
|
rois = rois.tensor |
|
else: |
|
im_info = torch.tensor([[sz[0], sz[1], 1.0] for sz in [x.image_size for x in proposals]]) |
|
batch_ids = cat( |
|
[ |
|
torch.full((b, 1), i, dtype=dtype, device=device) |
|
for i, b in enumerate(len(p) for p in proposals) |
|
], |
|
dim=0, |
|
) |
|
rois = torch.cat([batch_ids, rois.tensor], dim=1) |
|
|
|
roi_pred_bbox, roi_batch_splits = torch.ops._caffe2.BBoxTransform( |
|
to_device(rois, "cpu"), |
|
to_device(box_regression, "cpu"), |
|
to_device(im_info, "cpu"), |
|
weights=box2box_transform_weights, |
|
apply_scale=True, |
|
rotated=is_rotated, |
|
angle_bound_on=True, |
|
angle_bound_lo=-180, |
|
angle_bound_hi=180, |
|
clip_angle_thresh=1.0, |
|
legacy_plus_one=False, |
|
) |
|
roi_pred_bbox = to_device(roi_pred_bbox, device) |
|
roi_batch_splits = to_device(roi_batch_splits, device) |
|
|
|
nms_outputs = torch.ops._caffe2.BoxWithNMSLimit( |
|
to_device(class_prob, "cpu"), |
|
to_device(roi_pred_bbox, "cpu"), |
|
to_device(roi_batch_splits, "cpu"), |
|
score_thresh=float(score_thresh), |
|
nms=float(nms_thresh), |
|
detections_per_im=int(topk_per_image), |
|
soft_nms_enabled=False, |
|
soft_nms_method="linear", |
|
soft_nms_sigma=0.5, |
|
soft_nms_min_score_thres=0.001, |
|
rotated=is_rotated, |
|
cls_agnostic_bbox_reg=cls_agnostic_bbox_reg, |
|
input_boxes_include_bg_cls=False, |
|
output_classes_include_bg_cls=False, |
|
legacy_plus_one=False, |
|
) |
|
roi_score_nms = to_device(nms_outputs[0], device) |
|
roi_bbox_nms = to_device(nms_outputs[1], device) |
|
roi_class_nms = to_device(nms_outputs[2], device) |
|
roi_batch_splits_nms = to_device(nms_outputs[3], device) |
|
roi_keeps_nms = to_device(nms_outputs[4], device) |
|
roi_keeps_size_nms = to_device(nms_outputs[5], device) |
|
if not tensor_mode: |
|
roi_class_nms = roi_class_nms.to(torch.int64) |
|
|
|
roi_batch_ids = cat( |
|
[ |
|
torch.full((b, 1), i, dtype=dtype, device=device) |
|
for i, b in enumerate(int(x.item()) for x in roi_batch_splits_nms) |
|
], |
|
dim=0, |
|
) |
|
|
|
roi_class_nms = alias(roi_class_nms, "class_nms") |
|
roi_score_nms = alias(roi_score_nms, "score_nms") |
|
roi_bbox_nms = alias(roi_bbox_nms, "bbox_nms") |
|
roi_batch_splits_nms = alias(roi_batch_splits_nms, "batch_splits_nms") |
|
roi_keeps_nms = alias(roi_keeps_nms, "keeps_nms") |
|
roi_keeps_size_nms = alias(roi_keeps_size_nms, "keeps_size_nms") |
|
|
|
results = InstancesList( |
|
im_info=im_info, |
|
indices=roi_batch_ids[:, 0], |
|
extra_fields={ |
|
"pred_boxes": Caffe2Boxes(roi_bbox_nms), |
|
"scores": roi_score_nms, |
|
"pred_classes": roi_class_nms, |
|
}, |
|
) |
|
|
|
if not tensor_mode: |
|
results = InstancesList.to_d2_instances_list(results) |
|
batch_splits = roi_batch_splits_nms.int().tolist() |
|
kept_indices = list(roi_keeps_nms.to(torch.int64).split(batch_splits)) |
|
else: |
|
results = [results] |
|
kept_indices = [roi_keeps_nms] |
|
|
|
return results, kept_indices |
|
|
|
|
|
class Caffe2FastRCNNOutputsInference: |
|
def __init__(self, tensor_mode): |
|
self.tensor_mode = tensor_mode |
|
|
|
def __call__(self, box_predictor, predictions, proposals): |
|
return caffe2_fast_rcnn_outputs_inference( |
|
self.tensor_mode, box_predictor, predictions, proposals |
|
) |
|
|
|
|
|
def caffe2_mask_rcnn_inference(pred_mask_logits, pred_instances): |
|
"""equivalent to mask_head.mask_rcnn_inference""" |
|
if all(isinstance(x, InstancesList) for x in pred_instances): |
|
assert len(pred_instances) == 1 |
|
mask_probs_pred = pred_mask_logits.sigmoid() |
|
mask_probs_pred = alias(mask_probs_pred, "mask_fcn_probs") |
|
pred_instances[0].set("pred_masks", mask_probs_pred) |
|
else: |
|
mask_rcnn_inference(pred_mask_logits, pred_instances) |
|
|
|
|
|
class Caffe2MaskRCNNInference: |
|
def __call__(self, pred_mask_logits, pred_instances): |
|
return caffe2_mask_rcnn_inference(pred_mask_logits, pred_instances) |
|
|
|
|
|
def caffe2_keypoint_rcnn_inference(use_heatmap_max_keypoint, pred_keypoint_logits, pred_instances): |
|
|
|
|
|
output = alias(pred_keypoint_logits, "kps_score") |
|
if all(isinstance(x, InstancesList) for x in pred_instances): |
|
assert len(pred_instances) == 1 |
|
if use_heatmap_max_keypoint: |
|
device = output.device |
|
output = torch.ops._caffe2.HeatmapMaxKeypoint( |
|
to_device(output, "cpu"), |
|
pred_instances[0].pred_boxes.tensor, |
|
should_output_softmax=True, |
|
) |
|
output = to_device(output, device) |
|
output = alias(output, "keypoints_out") |
|
pred_instances[0].set("pred_keypoints", output) |
|
return pred_keypoint_logits |
|
|
|
|
|
class Caffe2KeypointRCNNInference: |
|
def __init__(self, use_heatmap_max_keypoint): |
|
self.use_heatmap_max_keypoint = use_heatmap_max_keypoint |
|
|
|
def __call__(self, pred_keypoint_logits, pred_instances): |
|
return caffe2_keypoint_rcnn_inference( |
|
self.use_heatmap_max_keypoint, pred_keypoint_logits, pred_instances |
|
) |
|
|