deanna-emery's picture
updates
93528c6
raw
history blame
7.27 kB
# Copyright 2023 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Multi scale anchor generator definition."""
import tensorflow as tf, tf_keras
# (TODO/tanzheny): consider having customized anchor offset.
class _SingleAnchorGenerator:
"""Utility to generate anchors for a single feature map.
Example:
```python
anchor_gen = _SingleAnchorGenerator(32, [.5, 1., 2.], stride=16)
anchors = anchor_gen([512, 512, 3])
```
"""
def __init__(self,
anchor_size,
scales,
aspect_ratios,
stride,
clip_boxes=False):
"""Constructs single scale anchor.
Args:
anchor_size: A single int represents the base anchor size. The anchor
height will be `anchor_size / sqrt(aspect_ratio)`, anchor width will be
`anchor_size * sqrt(aspect_ratio)`.
scales: A list/tuple, or a list/tuple of a list/tuple of positive
floats representing the actual anchor size to the base `anchor_size`.
aspect_ratios: a list/tuple of positive floats representing the ratio of
anchor width to anchor height.
stride: A single int represents the anchor stride size between center of
each anchor.
clip_boxes: Boolean to represent whether the anchor coordinates should be
clipped to the image size. Defaults to `False`.
Input shape: the size of the image, `[H, W, C]`
Output shape: the size of anchors, `[(H / stride) * (W / stride), 4]`
"""
self.anchor_size = anchor_size
self.scales = scales
self.aspect_ratios = aspect_ratios
self.stride = stride
self.clip_boxes = clip_boxes
def __call__(self, image_size):
image_height = tf.cast(image_size[0], tf.float32)
image_width = tf.cast(image_size[1], tf.float32)
k = len(self.scales) * len(self.aspect_ratios)
aspect_ratios_sqrt = tf.cast(tf.sqrt(self.aspect_ratios), dtype=tf.float32)
anchor_size = tf.cast(self.anchor_size, tf.float32)
# [K]
anchor_heights = []
anchor_widths = []
for scale in self.scales:
anchor_size_t = anchor_size * scale
anchor_height = anchor_size_t / aspect_ratios_sqrt
anchor_width = anchor_size_t * aspect_ratios_sqrt
anchor_heights.append(anchor_height)
anchor_widths.append(anchor_width)
anchor_heights = tf.concat(anchor_heights, axis=0)
anchor_widths = tf.concat(anchor_widths, axis=0)
half_anchor_heights = tf.reshape(0.5 * anchor_heights, [1, 1, k])
half_anchor_widths = tf.reshape(0.5 * anchor_widths, [1, 1, k])
stride = tf.cast(self.stride, tf.float32)
# [W]
cx = tf.range(0.5 * stride, image_width + 0.5 * stride, stride)
# [H]
cy = tf.range(0.5 * stride, image_height + 0.5 * stride, stride)
# [H, W]
cx_grid, cy_grid = tf.meshgrid(cx, cy)
# [H, W, 1]
cx_grid = tf.expand_dims(cx_grid, axis=-1)
cy_grid = tf.expand_dims(cy_grid, axis=-1)
# [H, W, K, 1]
y_min = tf.expand_dims(cy_grid - half_anchor_heights, axis=-1)
y_max = tf.expand_dims(cy_grid + half_anchor_heights, axis=-1)
x_min = tf.expand_dims(cx_grid - half_anchor_widths, axis=-1)
x_max = tf.expand_dims(cx_grid + half_anchor_widths, axis=-1)
if self.clip_boxes:
y_min = tf.maximum(tf.minimum(y_min, image_height), 0.)
y_max = tf.maximum(tf.minimum(y_max, image_height), 0.)
x_min = tf.maximum(tf.minimum(x_min, image_width), 0.)
x_max = tf.maximum(tf.minimum(x_max, image_width), 0.)
# [H, W, K, 4]
result = tf.concat([y_min, x_min, y_max, x_max], axis=-1)
shape = result.shape.as_list()
# [H, W, K * 4]
return tf.reshape(result, [shape[0], shape[1], shape[2] * shape[3]])
class AnchorGenerator():
"""Utility to generate anchors for a multiple feature maps.
Example:
```python
anchor_gen = AnchorGenerator([32, 64], [.5, 1., 2.],
strides=[16, 32])
anchors = anchor_gen([512, 512, 3])
```
"""
def __init__(self,
anchor_sizes,
scales,
aspect_ratios,
strides,
clip_boxes=False):
"""Constructs multiscale anchors.
Args:
anchor_sizes: A list of int represents the anchor size for each scale. The
anchor height will be `anchor_size / sqrt(aspect_ratio)`, anchor width
will be `anchor_size * sqrt(aspect_ratio)` for each scale.
scales: A list/tuple, or a list/tuple of a list/tuple of positive
floats representing the actual anchor size to the base `anchor_size`.
aspect_ratios: A list/tuple, or a list/tuple of a list/tuple of positive
floats representing the ratio of anchor width to anchor height.
strides: A list/tuple of ints represent the anchor stride size between
center of anchors at each scale.
clip_boxes: Boolean to represents whether the anchor coordinates should be
clipped to the image size. Defaults to `False`.
Input shape: the size of the image, `[H, W, C]`
Output shape: the size of anchors concat on each level, `[(H /
strides) * (W / strides), K * 4]`
"""
# aspect_ratio is a single list that is the same across all levels.
aspect_ratios = maybe_map_structure_for_anchor(aspect_ratios, anchor_sizes)
scales = maybe_map_structure_for_anchor(scales, anchor_sizes)
if isinstance(anchor_sizes, dict):
self.anchor_generators = {}
for k in anchor_sizes.keys():
self.anchor_generators[k] = _SingleAnchorGenerator(
anchor_sizes[k], scales[k], aspect_ratios[k], strides[k],
clip_boxes)
elif isinstance(anchor_sizes, (list, tuple)):
self.anchor_generators = []
for anchor_size, scale_list, ar_list, stride in zip(
anchor_sizes, scales, aspect_ratios, strides):
self.anchor_generators.append(
_SingleAnchorGenerator(anchor_size, scale_list, ar_list, stride,
clip_boxes))
def __call__(self, image_size):
anchor_generators = tf.nest.flatten(self.anchor_generators)
results = [anchor_gen(image_size) for anchor_gen in anchor_generators]
return tf.nest.pack_sequence_as(self.anchor_generators, results)
def maybe_map_structure_for_anchor(params, anchor_sizes):
"""broadcast the params to match anchor_sizes."""
if all(isinstance(param, (int, float)) for param in params):
if isinstance(anchor_sizes, (tuple, list)):
return [params] * len(anchor_sizes)
elif isinstance(anchor_sizes, dict):
return tf.nest.map_structure(lambda _: params, anchor_sizes)
else:
raise ValueError("the structure of `anchor_sizes` must be a tuple, "
"list, or dict, given {}".format(anchor_sizes))
else:
return params