Spaces:
Running
Running
# Copyright (c) Facebook, Inc. and its affiliates. | |
import collections | |
import math | |
from typing import List | |
import torch | |
from torch import nn | |
from detectron2.config import configurable | |
from detectron2.layers import ShapeSpec | |
from detectron2.structures import Boxes, RotatedBoxes | |
from detectron2.utils.registry import Registry | |
ANCHOR_GENERATOR_REGISTRY = Registry("ANCHOR_GENERATOR") | |
ANCHOR_GENERATOR_REGISTRY.__doc__ = """ | |
Registry for modules that creates object detection anchors for feature maps. | |
The registered object will be called with `obj(cfg, input_shape)`. | |
""" | |
class BufferList(nn.Module): | |
""" | |
Similar to nn.ParameterList, but for buffers | |
""" | |
def __init__(self, buffers): | |
super().__init__() | |
for i, buffer in enumerate(buffers): | |
# Use non-persistent buffer so the values are not saved in checkpoint | |
self.register_buffer(str(i), buffer, persistent=False) | |
def __len__(self): | |
return len(self._buffers) | |
def __iter__(self): | |
return iter(self._buffers.values()) | |
def _create_grid_offsets(size: List[int], stride: int, offset: float, device: torch.device): | |
grid_height, grid_width = size | |
shifts_x = torch.arange( | |
offset * stride, grid_width * stride, step=stride, dtype=torch.float32, device=device | |
) | |
shifts_y = torch.arange( | |
offset * stride, grid_height * stride, step=stride, dtype=torch.float32, device=device | |
) | |
shift_y, shift_x = torch.meshgrid(shifts_y, shifts_x) | |
shift_x = shift_x.reshape(-1) | |
shift_y = shift_y.reshape(-1) | |
return shift_x, shift_y | |
def _broadcast_params(params, num_features, name): | |
""" | |
If one size (or aspect ratio) is specified and there are multiple feature | |
maps, we "broadcast" anchors of that single size (or aspect ratio) | |
over all feature maps. | |
If params is list[float], or list[list[float]] with len(params) == 1, repeat | |
it num_features time. | |
Returns: | |
list[list[float]]: param for each feature | |
""" | |
assert isinstance( | |
params, collections.abc.Sequence | |
), f"{name} in anchor generator has to be a list! Got {params}." | |
assert len(params), f"{name} in anchor generator cannot be empty!" | |
if not isinstance(params[0], collections.abc.Sequence): # params is list[float] | |
return [params] * num_features | |
if len(params) == 1: | |
return list(params) * num_features | |
assert len(params) == num_features, ( | |
f"Got {name} of length {len(params)} in anchor generator, " | |
f"but the number of input features is {num_features}!" | |
) | |
return params | |
class DefaultAnchorGenerator(nn.Module): | |
""" | |
Compute anchors in the standard ways described in | |
"Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks". | |
""" | |
box_dim: torch.jit.Final[int] = 4 | |
""" | |
the dimension of each anchor box. | |
""" | |
def __init__(self, *, sizes, aspect_ratios, strides, offset=0.5): | |
""" | |
This interface is experimental. | |
Args: | |
sizes (list[list[float]] or list[float]): | |
If ``sizes`` is list[list[float]], ``sizes[i]`` is the list of anchor sizes | |
(i.e. sqrt of anchor area) to use for the i-th feature map. | |
If ``sizes`` is list[float], ``sizes`` is used for all feature maps. | |
Anchor sizes are given in absolute lengths in units of | |
the input image; they do not dynamically scale if the input image size changes. | |
aspect_ratios (list[list[float]] or list[float]): list of aspect ratios | |
(i.e. height / width) to use for anchors. Same "broadcast" rule for `sizes` applies. | |
strides (list[int]): stride of each input feature. | |
offset (float): Relative offset between the center of the first anchor and the top-left | |
corner of the image. Value has to be in [0, 1). | |
Recommend to use 0.5, which means half stride. | |
""" | |
super().__init__() | |
self.strides = strides | |
self.num_features = len(self.strides) | |
sizes = _broadcast_params(sizes, self.num_features, "sizes") | |
aspect_ratios = _broadcast_params(aspect_ratios, self.num_features, "aspect_ratios") | |
self.cell_anchors = self._calculate_anchors(sizes, aspect_ratios) | |
self.offset = offset | |
assert 0.0 <= self.offset < 1.0, self.offset | |
def from_config(cls, cfg, input_shape: List[ShapeSpec]): | |
return { | |
"sizes": cfg.MODEL.ANCHOR_GENERATOR.SIZES, | |
"aspect_ratios": cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS, | |
"strides": [x.stride for x in input_shape], | |
"offset": cfg.MODEL.ANCHOR_GENERATOR.OFFSET, | |
} | |
def _calculate_anchors(self, sizes, aspect_ratios): | |
cell_anchors = [ | |
self.generate_cell_anchors(s, a).float() for s, a in zip(sizes, aspect_ratios) | |
] | |
return BufferList(cell_anchors) | |
def num_cell_anchors(self): | |
""" | |
Alias of `num_anchors`. | |
""" | |
return self.num_anchors | |
def num_anchors(self): | |
""" | |
Returns: | |
list[int]: Each int is the number of anchors at every pixel | |
location, on that feature map. | |
For example, if at every pixel we use anchors of 3 aspect | |
ratios and 5 sizes, the number of anchors is 15. | |
(See also ANCHOR_GENERATOR.SIZES and ANCHOR_GENERATOR.ASPECT_RATIOS in config) | |
In standard RPN models, `num_anchors` on every feature map is the same. | |
""" | |
return [len(cell_anchors) for cell_anchors in self.cell_anchors] | |
def _grid_anchors(self, grid_sizes: List[List[int]]): | |
""" | |
Returns: | |
list[Tensor]: #featuremap tensors, each is (#locations x #cell_anchors) x 4 | |
""" | |
anchors = [] | |
# buffers() not supported by torchscript. use named_buffers() instead | |
buffers: List[torch.Tensor] = [x[1] for x in self.cell_anchors.named_buffers()] | |
for size, stride, base_anchors in zip(grid_sizes, self.strides, buffers): | |
shift_x, shift_y = _create_grid_offsets(size, stride, self.offset, base_anchors.device) | |
shifts = torch.stack((shift_x, shift_y, shift_x, shift_y), dim=1) | |
anchors.append((shifts.view(-1, 1, 4) + base_anchors.view(1, -1, 4)).reshape(-1, 4)) | |
return anchors | |
def generate_cell_anchors(self, sizes=(32, 64, 128, 256, 512), aspect_ratios=(0.5, 1, 2)): | |
""" | |
Generate a tensor storing canonical anchor boxes, which are all anchor | |
boxes of different sizes and aspect_ratios centered at (0, 0). | |
We can later build the set of anchors for a full feature map by | |
shifting and tiling these tensors (see `meth:_grid_anchors`). | |
Args: | |
sizes (tuple[float]): | |
aspect_ratios (tuple[float]]): | |
Returns: | |
Tensor of shape (len(sizes) * len(aspect_ratios), 4) storing anchor boxes | |
in XYXY format. | |
""" | |
# This is different from the anchor generator defined in the original Faster R-CNN | |
# code or Detectron. They yield the same AP, however the old version defines cell | |
# anchors in a less natural way with a shift relative to the feature grid and | |
# quantization that results in slightly different sizes for different aspect ratios. | |
# See also https://github.com/facebookresearch/Detectron/issues/227 | |
anchors = [] | |
for size in sizes: | |
area = size ** 2.0 | |
for aspect_ratio in aspect_ratios: | |
# s * s = w * h | |
# a = h / w | |
# ... some algebra ... | |
# w = sqrt(s * s / a) | |
# h = a * w | |
w = math.sqrt(area / aspect_ratio) | |
h = aspect_ratio * w | |
x0, y0, x1, y1 = -w / 2.0, -h / 2.0, w / 2.0, h / 2.0 | |
anchors.append([x0, y0, x1, y1]) | |
return torch.tensor(anchors) | |
def forward(self, features: List[torch.Tensor]): | |
""" | |
Args: | |
features (list[Tensor]): list of backbone feature maps on which to generate anchors. | |
Returns: | |
list[Boxes]: a list of Boxes containing all the anchors for each feature map | |
(i.e. the cell anchors repeated over all locations in the feature map). | |
The number of anchors of each feature map is Hi x Wi x num_cell_anchors, | |
where Hi, Wi are resolution of the feature map divided by anchor stride. | |
""" | |
grid_sizes = [feature_map.shape[-2:] for feature_map in features] | |
anchors_over_all_feature_maps = self._grid_anchors(grid_sizes) | |
return [Boxes(x) for x in anchors_over_all_feature_maps] | |
class RotatedAnchorGenerator(nn.Module): | |
""" | |
Compute rotated anchors used by Rotated RPN (RRPN), described in | |
"Arbitrary-Oriented Scene Text Detection via Rotation Proposals". | |
""" | |
box_dim: int = 5 | |
""" | |
the dimension of each anchor box. | |
""" | |
def __init__(self, *, sizes, aspect_ratios, strides, angles, offset=0.5): | |
""" | |
This interface is experimental. | |
Args: | |
sizes (list[list[float]] or list[float]): | |
If sizes is list[list[float]], sizes[i] is the list of anchor sizes | |
(i.e. sqrt of anchor area) to use for the i-th feature map. | |
If sizes is list[float], the sizes are used for all feature maps. | |
Anchor sizes are given in absolute lengths in units of | |
the input image; they do not dynamically scale if the input image size changes. | |
aspect_ratios (list[list[float]] or list[float]): list of aspect ratios | |
(i.e. height / width) to use for anchors. Same "broadcast" rule for `sizes` applies. | |
strides (list[int]): stride of each input feature. | |
angles (list[list[float]] or list[float]): list of angles (in degrees CCW) | |
to use for anchors. Same "broadcast" rule for `sizes` applies. | |
offset (float): Relative offset between the center of the first anchor and the top-left | |
corner of the image. Value has to be in [0, 1). | |
Recommend to use 0.5, which means half stride. | |
""" | |
super().__init__() | |
self.strides = strides | |
self.num_features = len(self.strides) | |
sizes = _broadcast_params(sizes, self.num_features, "sizes") | |
aspect_ratios = _broadcast_params(aspect_ratios, self.num_features, "aspect_ratios") | |
angles = _broadcast_params(angles, self.num_features, "angles") | |
self.cell_anchors = self._calculate_anchors(sizes, aspect_ratios, angles) | |
self.offset = offset | |
assert 0.0 <= self.offset < 1.0, self.offset | |
def from_config(cls, cfg, input_shape: List[ShapeSpec]): | |
return { | |
"sizes": cfg.MODEL.ANCHOR_GENERATOR.SIZES, | |
"aspect_ratios": cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS, | |
"strides": [x.stride for x in input_shape], | |
"offset": cfg.MODEL.ANCHOR_GENERATOR.OFFSET, | |
"angles": cfg.MODEL.ANCHOR_GENERATOR.ANGLES, | |
} | |
def _calculate_anchors(self, sizes, aspect_ratios, angles): | |
cell_anchors = [ | |
self.generate_cell_anchors(size, aspect_ratio, angle).float() | |
for size, aspect_ratio, angle in zip(sizes, aspect_ratios, angles) | |
] | |
return BufferList(cell_anchors) | |
def num_cell_anchors(self): | |
""" | |
Alias of `num_anchors`. | |
""" | |
return self.num_anchors | |
def num_anchors(self): | |
""" | |
Returns: | |
list[int]: Each int is the number of anchors at every pixel | |
location, on that feature map. | |
For example, if at every pixel we use anchors of 3 aspect | |
ratios, 2 sizes and 5 angles, the number of anchors is 30. | |
(See also ANCHOR_GENERATOR.SIZES, ANCHOR_GENERATOR.ASPECT_RATIOS | |
and ANCHOR_GENERATOR.ANGLES in config) | |
In standard RRPN models, `num_anchors` on every feature map is the same. | |
""" | |
return [len(cell_anchors) for cell_anchors in self.cell_anchors] | |
def _grid_anchors(self, grid_sizes): | |
anchors = [] | |
for size, stride, base_anchors in zip(grid_sizes, self.strides, self.cell_anchors): | |
shift_x, shift_y = _create_grid_offsets(size, stride, self.offset, base_anchors.device) | |
zeros = torch.zeros_like(shift_x) | |
shifts = torch.stack((shift_x, shift_y, zeros, zeros, zeros), dim=1) | |
anchors.append((shifts.view(-1, 1, 5) + base_anchors.view(1, -1, 5)).reshape(-1, 5)) | |
return anchors | |
def generate_cell_anchors( | |
self, | |
sizes=(32, 64, 128, 256, 512), | |
aspect_ratios=(0.5, 1, 2), | |
angles=(-90, -60, -30, 0, 30, 60, 90), | |
): | |
""" | |
Generate a tensor storing canonical anchor boxes, which are all anchor | |
boxes of different sizes, aspect_ratios, angles centered at (0, 0). | |
We can later build the set of anchors for a full feature map by | |
shifting and tiling these tensors (see `meth:_grid_anchors`). | |
Args: | |
sizes (tuple[float]): | |
aspect_ratios (tuple[float]]): | |
angles (tuple[float]]): | |
Returns: | |
Tensor of shape (len(sizes) * len(aspect_ratios) * len(angles), 5) | |
storing anchor boxes in (x_ctr, y_ctr, w, h, angle) format. | |
""" | |
anchors = [] | |
for size in sizes: | |
area = size ** 2.0 | |
for aspect_ratio in aspect_ratios: | |
# s * s = w * h | |
# a = h / w | |
# ... some algebra ... | |
# w = sqrt(s * s / a) | |
# h = a * w | |
w = math.sqrt(area / aspect_ratio) | |
h = aspect_ratio * w | |
anchors.extend([0, 0, w, h, a] for a in angles) | |
return torch.tensor(anchors) | |
def forward(self, features): | |
""" | |
Args: | |
features (list[Tensor]): list of backbone feature maps on which to generate anchors. | |
Returns: | |
list[RotatedBoxes]: a list of Boxes containing all the anchors for each feature map | |
(i.e. the cell anchors repeated over all locations in the feature map). | |
The number of anchors of each feature map is Hi x Wi x num_cell_anchors, | |
where Hi, Wi are resolution of the feature map divided by anchor stride. | |
""" | |
grid_sizes = [feature_map.shape[-2:] for feature_map in features] | |
anchors_over_all_feature_maps = self._grid_anchors(grid_sizes) | |
return [RotatedBoxes(x) for x in anchors_over_all_feature_maps] | |
def build_anchor_generator(cfg, input_shape): | |
""" | |
Built an anchor generator from `cfg.MODEL.ANCHOR_GENERATOR.NAME`. | |
""" | |
anchor_generator = cfg.MODEL.ANCHOR_GENERATOR.NAME | |
return ANCHOR_GENERATOR_REGISTRY.get(anchor_generator)(cfg, input_shape) | |