|
|
|
import collections |
|
import math |
|
from typing import List |
|
import torch |
|
from torch import nn |
|
|
|
from detectron2.config import configurable |
|
from detectron2.layers import ShapeSpec, move_device_like |
|
from detectron2.structures import Boxes, RotatedBoxes |
|
from detectron2.utils.registry import Registry |
|
|
|
ANCHOR_GENERATOR_REGISTRY = Registry("ANCHOR_GENERATOR") |
|
ANCHOR_GENERATOR_REGISTRY.__doc__ = """ |
|
Registry for modules that creates object detection anchors for feature maps. |
|
|
|
The registered object will be called with `obj(cfg, input_shape)`. |
|
""" |
|
|
|
|
|
class BufferList(nn.Module): |
|
""" |
|
Similar to nn.ParameterList, but for buffers |
|
""" |
|
|
|
def __init__(self, buffers): |
|
super().__init__() |
|
for i, buffer in enumerate(buffers): |
|
|
|
self.register_buffer(str(i), buffer, persistent=False) |
|
|
|
def __len__(self): |
|
return len(self._buffers) |
|
|
|
def __iter__(self): |
|
return iter(self._buffers.values()) |
|
|
|
|
|
def _create_grid_offsets( |
|
size: List[int], stride: int, offset: float, target_device_tensor: torch.Tensor |
|
): |
|
grid_height, grid_width = size |
|
shifts_x = move_device_like( |
|
torch.arange(offset * stride, grid_width * stride, step=stride, dtype=torch.float32), |
|
target_device_tensor, |
|
) |
|
shifts_y = move_device_like( |
|
torch.arange(offset * stride, grid_height * stride, step=stride, dtype=torch.float32), |
|
target_device_tensor, |
|
) |
|
|
|
shift_y, shift_x = torch.meshgrid(shifts_y, shifts_x) |
|
shift_x = shift_x.reshape(-1) |
|
shift_y = shift_y.reshape(-1) |
|
return shift_x, shift_y |
|
|
|
|
|
def _broadcast_params(params, num_features, name): |
|
""" |
|
If one size (or aspect ratio) is specified and there are multiple feature |
|
maps, we "broadcast" anchors of that single size (or aspect ratio) |
|
over all feature maps. |
|
|
|
If params is list[float], or list[list[float]] with len(params) == 1, repeat |
|
it num_features time. |
|
|
|
Returns: |
|
list[list[float]]: param for each feature |
|
""" |
|
assert isinstance( |
|
params, collections.abc.Sequence |
|
), f"{name} in anchor generator has to be a list! Got {params}." |
|
assert len(params), f"{name} in anchor generator cannot be empty!" |
|
if not isinstance(params[0], collections.abc.Sequence): |
|
return [params] * num_features |
|
if len(params) == 1: |
|
return list(params) * num_features |
|
assert len(params) == num_features, ( |
|
f"Got {name} of length {len(params)} in anchor generator, " |
|
f"but the number of input features is {num_features}!" |
|
) |
|
return params |
|
|
|
|
|
@ANCHOR_GENERATOR_REGISTRY.register() |
|
class DefaultAnchorGenerator(nn.Module): |
|
""" |
|
Compute anchors in the standard ways described in |
|
"Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks". |
|
""" |
|
|
|
box_dim: torch.jit.Final[int] = 4 |
|
""" |
|
the dimension of each anchor box. |
|
""" |
|
|
|
@configurable |
|
def __init__(self, *, sizes, aspect_ratios, strides, offset=0.5): |
|
""" |
|
This interface is experimental. |
|
|
|
Args: |
|
sizes (list[list[float]] or list[float]): |
|
If ``sizes`` is list[list[float]], ``sizes[i]`` is the list of anchor sizes |
|
(i.e. sqrt of anchor area) to use for the i-th feature map. |
|
If ``sizes`` is list[float], ``sizes`` is used for all feature maps. |
|
Anchor sizes are given in absolute lengths in units of |
|
the input image; they do not dynamically scale if the input image size changes. |
|
aspect_ratios (list[list[float]] or list[float]): list of aspect ratios |
|
(i.e. height / width) to use for anchors. Same "broadcast" rule for `sizes` applies. |
|
strides (list[int]): stride of each input feature. |
|
offset (float): Relative offset between the center of the first anchor and the top-left |
|
corner of the image. Value has to be in [0, 1). |
|
Recommend to use 0.5, which means half stride. |
|
""" |
|
super().__init__() |
|
|
|
self.strides = strides |
|
self.num_features = len(self.strides) |
|
sizes = _broadcast_params(sizes, self.num_features, "sizes") |
|
aspect_ratios = _broadcast_params(aspect_ratios, self.num_features, "aspect_ratios") |
|
self.cell_anchors = self._calculate_anchors(sizes, aspect_ratios) |
|
|
|
self.offset = offset |
|
assert 0.0 <= self.offset < 1.0, self.offset |
|
|
|
@classmethod |
|
def from_config(cls, cfg, input_shape: List[ShapeSpec]): |
|
return { |
|
"sizes": cfg.MODEL.ANCHOR_GENERATOR.SIZES, |
|
"aspect_ratios": cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS, |
|
"strides": [x.stride for x in input_shape], |
|
"offset": cfg.MODEL.ANCHOR_GENERATOR.OFFSET, |
|
} |
|
|
|
def _calculate_anchors(self, sizes, aspect_ratios): |
|
cell_anchors = [ |
|
self.generate_cell_anchors(s, a).float() for s, a in zip(sizes, aspect_ratios) |
|
] |
|
return BufferList(cell_anchors) |
|
|
|
@property |
|
@torch.jit.unused |
|
def num_cell_anchors(self): |
|
""" |
|
Alias of `num_anchors`. |
|
""" |
|
return self.num_anchors |
|
|
|
@property |
|
@torch.jit.unused |
|
def num_anchors(self): |
|
""" |
|
Returns: |
|
list[int]: Each int is the number of anchors at every pixel |
|
location, on that feature map. |
|
For example, if at every pixel we use anchors of 3 aspect |
|
ratios and 5 sizes, the number of anchors is 15. |
|
(See also ANCHOR_GENERATOR.SIZES and ANCHOR_GENERATOR.ASPECT_RATIOS in config) |
|
|
|
In standard RPN models, `num_anchors` on every feature map is the same. |
|
""" |
|
return [len(cell_anchors) for cell_anchors in self.cell_anchors] |
|
|
|
def _grid_anchors(self, grid_sizes: List[List[int]]): |
|
""" |
|
Returns: |
|
list[Tensor]: #featuremap tensors, each is (#locations x #cell_anchors) x 4 |
|
""" |
|
anchors = [] |
|
|
|
buffers: List[torch.Tensor] = [x[1] for x in self.cell_anchors.named_buffers()] |
|
for size, stride, base_anchors in zip(grid_sizes, self.strides, buffers): |
|
shift_x, shift_y = _create_grid_offsets(size, stride, self.offset, base_anchors) |
|
shifts = torch.stack((shift_x, shift_y, shift_x, shift_y), dim=1) |
|
|
|
anchors.append((shifts.view(-1, 1, 4) + base_anchors.view(1, -1, 4)).reshape(-1, 4)) |
|
|
|
return anchors |
|
|
|
def generate_cell_anchors(self, sizes=(32, 64, 128, 256, 512), aspect_ratios=(0.5, 1, 2)): |
|
""" |
|
Generate a tensor storing canonical anchor boxes, which are all anchor |
|
boxes of different sizes and aspect_ratios centered at (0, 0). |
|
We can later build the set of anchors for a full feature map by |
|
shifting and tiling these tensors (see `meth:_grid_anchors`). |
|
|
|
Args: |
|
sizes (tuple[float]): |
|
aspect_ratios (tuple[float]]): |
|
|
|
Returns: |
|
Tensor of shape (len(sizes) * len(aspect_ratios), 4) storing anchor boxes |
|
in XYXY format. |
|
""" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
anchors = [] |
|
for size in sizes: |
|
area = size**2.0 |
|
for aspect_ratio in aspect_ratios: |
|
|
|
|
|
|
|
|
|
|
|
w = math.sqrt(area / aspect_ratio) |
|
h = aspect_ratio * w |
|
x0, y0, x1, y1 = -w / 2.0, -h / 2.0, w / 2.0, h / 2.0 |
|
anchors.append([x0, y0, x1, y1]) |
|
return torch.tensor(anchors) |
|
|
|
def forward(self, features: List[torch.Tensor]): |
|
""" |
|
Args: |
|
features (list[Tensor]): list of backbone feature maps on which to generate anchors. |
|
|
|
Returns: |
|
list[Boxes]: a list of Boxes containing all the anchors for each feature map |
|
(i.e. the cell anchors repeated over all locations in the feature map). |
|
The number of anchors of each feature map is Hi x Wi x num_cell_anchors, |
|
where Hi, Wi are resolution of the feature map divided by anchor stride. |
|
""" |
|
grid_sizes = [feature_map.shape[-2:] for feature_map in features] |
|
anchors_over_all_feature_maps = self._grid_anchors(grid_sizes) |
|
return [Boxes(x) for x in anchors_over_all_feature_maps] |
|
|
|
|
|
@ANCHOR_GENERATOR_REGISTRY.register() |
|
class RotatedAnchorGenerator(nn.Module): |
|
""" |
|
Compute rotated anchors used by Rotated RPN (RRPN), described in |
|
"Arbitrary-Oriented Scene Text Detection via Rotation Proposals". |
|
""" |
|
|
|
box_dim: int = 5 |
|
""" |
|
the dimension of each anchor box. |
|
""" |
|
|
|
@configurable |
|
def __init__(self, *, sizes, aspect_ratios, strides, angles, offset=0.5): |
|
""" |
|
This interface is experimental. |
|
|
|
Args: |
|
sizes (list[list[float]] or list[float]): |
|
If sizes is list[list[float]], sizes[i] is the list of anchor sizes |
|
(i.e. sqrt of anchor area) to use for the i-th feature map. |
|
If sizes is list[float], the sizes are used for all feature maps. |
|
Anchor sizes are given in absolute lengths in units of |
|
the input image; they do not dynamically scale if the input image size changes. |
|
aspect_ratios (list[list[float]] or list[float]): list of aspect ratios |
|
(i.e. height / width) to use for anchors. Same "broadcast" rule for `sizes` applies. |
|
strides (list[int]): stride of each input feature. |
|
angles (list[list[float]] or list[float]): list of angles (in degrees CCW) |
|
to use for anchors. Same "broadcast" rule for `sizes` applies. |
|
offset (float): Relative offset between the center of the first anchor and the top-left |
|
corner of the image. Value has to be in [0, 1). |
|
Recommend to use 0.5, which means half stride. |
|
""" |
|
super().__init__() |
|
|
|
self.strides = strides |
|
self.num_features = len(self.strides) |
|
sizes = _broadcast_params(sizes, self.num_features, "sizes") |
|
aspect_ratios = _broadcast_params(aspect_ratios, self.num_features, "aspect_ratios") |
|
angles = _broadcast_params(angles, self.num_features, "angles") |
|
self.cell_anchors = self._calculate_anchors(sizes, aspect_ratios, angles) |
|
|
|
self.offset = offset |
|
assert 0.0 <= self.offset < 1.0, self.offset |
|
|
|
@classmethod |
|
def from_config(cls, cfg, input_shape: List[ShapeSpec]): |
|
return { |
|
"sizes": cfg.MODEL.ANCHOR_GENERATOR.SIZES, |
|
"aspect_ratios": cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS, |
|
"strides": [x.stride for x in input_shape], |
|
"offset": cfg.MODEL.ANCHOR_GENERATOR.OFFSET, |
|
"angles": cfg.MODEL.ANCHOR_GENERATOR.ANGLES, |
|
} |
|
|
|
def _calculate_anchors(self, sizes, aspect_ratios, angles): |
|
cell_anchors = [ |
|
self.generate_cell_anchors(size, aspect_ratio, angle).float() |
|
for size, aspect_ratio, angle in zip(sizes, aspect_ratios, angles) |
|
] |
|
return BufferList(cell_anchors) |
|
|
|
@property |
|
def num_cell_anchors(self): |
|
""" |
|
Alias of `num_anchors`. |
|
""" |
|
return self.num_anchors |
|
|
|
@property |
|
def num_anchors(self): |
|
""" |
|
Returns: |
|
list[int]: Each int is the number of anchors at every pixel |
|
location, on that feature map. |
|
For example, if at every pixel we use anchors of 3 aspect |
|
ratios, 2 sizes and 5 angles, the number of anchors is 30. |
|
(See also ANCHOR_GENERATOR.SIZES, ANCHOR_GENERATOR.ASPECT_RATIOS |
|
and ANCHOR_GENERATOR.ANGLES in config) |
|
|
|
In standard RRPN models, `num_anchors` on every feature map is the same. |
|
""" |
|
return [len(cell_anchors) for cell_anchors in self.cell_anchors] |
|
|
|
def _grid_anchors(self, grid_sizes): |
|
anchors = [] |
|
for size, stride, base_anchors in zip(grid_sizes, self.strides, self.cell_anchors): |
|
shift_x, shift_y = _create_grid_offsets(size, stride, self.offset, base_anchors) |
|
zeros = torch.zeros_like(shift_x) |
|
shifts = torch.stack((shift_x, shift_y, zeros, zeros, zeros), dim=1) |
|
|
|
anchors.append((shifts.view(-1, 1, 5) + base_anchors.view(1, -1, 5)).reshape(-1, 5)) |
|
|
|
return anchors |
|
|
|
def generate_cell_anchors( |
|
self, |
|
sizes=(32, 64, 128, 256, 512), |
|
aspect_ratios=(0.5, 1, 2), |
|
angles=(-90, -60, -30, 0, 30, 60, 90), |
|
): |
|
""" |
|
Generate a tensor storing canonical anchor boxes, which are all anchor |
|
boxes of different sizes, aspect_ratios, angles centered at (0, 0). |
|
We can later build the set of anchors for a full feature map by |
|
shifting and tiling these tensors (see `meth:_grid_anchors`). |
|
|
|
Args: |
|
sizes (tuple[float]): |
|
aspect_ratios (tuple[float]]): |
|
angles (tuple[float]]): |
|
|
|
Returns: |
|
Tensor of shape (len(sizes) * len(aspect_ratios) * len(angles), 5) |
|
storing anchor boxes in (x_ctr, y_ctr, w, h, angle) format. |
|
""" |
|
anchors = [] |
|
for size in sizes: |
|
area = size**2.0 |
|
for aspect_ratio in aspect_ratios: |
|
|
|
|
|
|
|
|
|
|
|
w = math.sqrt(area / aspect_ratio) |
|
h = aspect_ratio * w |
|
anchors.extend([0, 0, w, h, a] for a in angles) |
|
|
|
return torch.tensor(anchors) |
|
|
|
def forward(self, features): |
|
""" |
|
Args: |
|
features (list[Tensor]): list of backbone feature maps on which to generate anchors. |
|
|
|
Returns: |
|
list[RotatedBoxes]: a list of Boxes containing all the anchors for each feature map |
|
(i.e. the cell anchors repeated over all locations in the feature map). |
|
The number of anchors of each feature map is Hi x Wi x num_cell_anchors, |
|
where Hi, Wi are resolution of the feature map divided by anchor stride. |
|
""" |
|
grid_sizes = [feature_map.shape[-2:] for feature_map in features] |
|
anchors_over_all_feature_maps = self._grid_anchors(grid_sizes) |
|
return [RotatedBoxes(x) for x in anchors_over_all_feature_maps] |
|
|
|
|
|
def build_anchor_generator(cfg, input_shape): |
|
""" |
|
Built an anchor generator from `cfg.MODEL.ANCHOR_GENERATOR.NAME`. |
|
""" |
|
anchor_generator = cfg.MODEL.ANCHOR_GENERATOR.NAME |
|
return ANCHOR_GENERATOR_REGISTRY.get(anchor_generator)(cfg, input_shape) |
|
|