Spaces:
Runtime error
Runtime error
# Copyright 2023 The TensorFlow Authors. All Rights Reserved. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
"""Contains definitions of ROI generator.""" | |
from typing import Optional, Mapping | |
# Import libraries | |
import tensorflow as tf, tf_keras | |
from official.vision.ops import box_ops | |
from official.vision.ops import nms | |
def _multilevel_propose_rois(raw_boxes: Mapping[str, tf.Tensor], | |
raw_scores: Mapping[str, tf.Tensor], | |
anchor_boxes: Mapping[str, tf.Tensor], | |
image_shape: tf.Tensor, | |
pre_nms_top_k: int = 2000, | |
pre_nms_score_threshold: float = 0.0, | |
pre_nms_min_size_threshold: float = 0.0, | |
nms_iou_threshold: float = 0.7, | |
num_proposals: int = 1000, | |
use_batched_nms: bool = False, | |
decode_boxes: bool = True, | |
clip_boxes: bool = True, | |
apply_sigmoid_to_score: bool = True): | |
"""Proposes RoIs given a group of candidates from different FPN levels. | |
The following describes the steps: | |
1. For each individual level: | |
a. Apply sigmoid transform if specified. | |
b. Decode boxes if specified. | |
c. Clip boxes if specified. | |
d. Filter small boxes and those fall outside image if specified. | |
e. Apply pre-NMS filtering including pre-NMS top k and score thresholding. | |
f. Apply NMS. | |
2. Aggregate post-NMS boxes from each level. | |
3. Apply an overall top k to generate the final selected RoIs. | |
Args: | |
raw_boxes: A `dict` with keys representing FPN levels and values | |
representing box tenors of shape | |
[batch_size, feature_h, feature_w, num_anchors * 4]. | |
raw_scores: A `dict` with keys representing FPN levels and values | |
representing logit tensors of shape | |
[batch_size, feature_h, feature_w, num_anchors]. | |
anchor_boxes: A `dict` with keys representing FPN levels and values | |
representing anchor box tensors of shape | |
[batch_size, feature_h * feature_w * num_anchors, 4]. | |
image_shape: A `tf.Tensor` of shape [batch_size, 2] where the last dimension | |
are [height, width] of the scaled image. | |
pre_nms_top_k: An `int` of top scoring RPN proposals *per level* to keep | |
before applying NMS. Default: 2000. | |
pre_nms_score_threshold: A `float` between 0 and 1 representing the minimal | |
box score to keep before applying NMS. This is often used as a | |
pre-filtering step for better performance. Default: 0, no filtering is | |
applied. | |
pre_nms_min_size_threshold: A `float` representing the minimal box size in | |
each side (w.r.t. the scaled image) to keep before applying NMS. This is | |
often used as a pre-filtering step for better performance. Default: 0, no | |
filtering is applied. | |
nms_iou_threshold: A `float` between 0 and 1 representing the IoU threshold | |
used for NMS. If 0.0, no NMS is applied. Default: 0.7. | |
num_proposals: An `int` of top scoring RPN proposals *in total* to keep | |
after applying NMS. Default: 1000. | |
use_batched_nms: A `bool` indicating whether NMS is applied in batch using | |
`tf.image.combined_non_max_suppression`. Currently only available in | |
CPU/GPU. Default is False. | |
decode_boxes: A `bool` indicating whether `raw_boxes` needs to be decoded | |
using `anchor_boxes`. If False, use `raw_boxes` directly and ignore | |
`anchor_boxes`. Default is True. | |
clip_boxes: A `bool` indicating whether boxes are first clipped to the | |
scaled image size before appliying NMS. If False, no clipping is applied | |
and `image_shape` is ignored. Default is True. | |
apply_sigmoid_to_score: A `bool` indicating whether apply sigmoid to | |
`raw_scores` before applying NMS. Default is True. | |
Returns: | |
selected_rois: A `tf.Tensor` of shape [batch_size, num_proposals, 4], | |
representing the box coordinates of the selected proposals w.r.t. the | |
scaled image. | |
selected_roi_scores: A `tf.Tensor` of shape [batch_size, num_proposals, 1], | |
representing the scores of the selected proposals. | |
""" | |
with tf.name_scope('multilevel_propose_rois'): | |
rois = [] | |
roi_scores = [] | |
image_shape = tf.expand_dims(image_shape, axis=1) | |
for level in sorted(raw_scores.keys()): | |
with tf.name_scope('level_%s' % level): | |
_, feature_h, feature_w, num_anchors_per_location = ( | |
raw_scores[level].get_shape().as_list()) | |
num_boxes = feature_h * feature_w * num_anchors_per_location | |
this_level_scores = tf.reshape(raw_scores[level], [-1, num_boxes]) | |
this_level_boxes = tf.reshape(raw_boxes[level], [-1, num_boxes, 4]) | |
this_level_anchors = tf.cast( | |
tf.reshape(anchor_boxes[level], [-1, num_boxes, 4]), | |
dtype=this_level_scores.dtype) | |
if apply_sigmoid_to_score: | |
this_level_scores = tf.sigmoid(this_level_scores) | |
if decode_boxes: | |
this_level_boxes = box_ops.decode_boxes( | |
this_level_boxes, this_level_anchors) | |
if clip_boxes: | |
this_level_boxes = box_ops.clip_boxes( | |
this_level_boxes, image_shape) | |
if pre_nms_min_size_threshold > 0.0: | |
this_level_boxes, this_level_scores = box_ops.filter_boxes( | |
this_level_boxes, | |
this_level_scores, | |
image_shape, | |
pre_nms_min_size_threshold) | |
this_level_pre_nms_top_k = min(num_boxes, pre_nms_top_k) | |
this_level_post_nms_top_k = min(num_boxes, num_proposals) | |
if nms_iou_threshold > 0.0: | |
if use_batched_nms: | |
this_level_rois, this_level_roi_scores, _, _ = ( | |
tf.image.combined_non_max_suppression( | |
tf.expand_dims(this_level_boxes, axis=2), | |
tf.expand_dims(this_level_scores, axis=-1), | |
max_output_size_per_class=this_level_pre_nms_top_k, | |
max_total_size=this_level_post_nms_top_k, | |
iou_threshold=nms_iou_threshold, | |
score_threshold=pre_nms_score_threshold, | |
pad_per_class=False, | |
clip_boxes=False)) | |
else: | |
if pre_nms_score_threshold > 0.0: | |
this_level_boxes, this_level_scores = ( | |
box_ops.filter_boxes_by_scores( | |
this_level_boxes, | |
this_level_scores, | |
pre_nms_score_threshold)) | |
this_level_boxes, this_level_scores = box_ops.top_k_boxes( | |
this_level_boxes, this_level_scores, k=this_level_pre_nms_top_k) | |
this_level_roi_scores, this_level_rois = ( | |
nms.sorted_non_max_suppression_padded( | |
this_level_scores, | |
this_level_boxes, | |
max_output_size=this_level_post_nms_top_k, | |
iou_threshold=nms_iou_threshold)) | |
else: | |
this_level_rois, this_level_roi_scores = box_ops.top_k_boxes( | |
this_level_boxes, | |
this_level_scores, | |
k=this_level_post_nms_top_k) | |
rois.append(this_level_rois) | |
roi_scores.append(this_level_roi_scores) | |
all_rois = tf.concat(rois, axis=1) | |
all_roi_scores = tf.concat(roi_scores, axis=1) | |
with tf.name_scope('top_k_rois'): | |
_, num_valid_rois = all_roi_scores.get_shape().as_list() | |
overall_top_k = min(num_valid_rois, num_proposals) | |
selected_rois, selected_roi_scores = box_ops.top_k_boxes( | |
all_rois, all_roi_scores, k=overall_top_k) | |
return selected_rois, selected_roi_scores | |
class MultilevelROIGenerator(tf_keras.layers.Layer): | |
"""Proposes RoIs for the second stage processing.""" | |
def __init__(self, | |
pre_nms_top_k: int = 2000, | |
pre_nms_score_threshold: float = 0.0, | |
pre_nms_min_size_threshold: float = 0.0, | |
nms_iou_threshold: float = 0.7, | |
num_proposals: int = 1000, | |
test_pre_nms_top_k: int = 1000, | |
test_pre_nms_score_threshold: float = 0.0, | |
test_pre_nms_min_size_threshold: float = 0.0, | |
test_nms_iou_threshold: float = 0.7, | |
test_num_proposals: int = 1000, | |
use_batched_nms: bool = False, | |
**kwargs): | |
"""Initializes a ROI generator. | |
The ROI generator transforms the raw predictions from RPN to ROIs. | |
Args: | |
pre_nms_top_k: An `int` of the number of top scores proposals to be kept | |
before applying NMS. | |
pre_nms_score_threshold: A `float` of the score threshold to apply before | |
applying NMS. Proposals whose scores are below this threshold are | |
thrown away. | |
pre_nms_min_size_threshold: A `float` of the threshold of each side of the | |
box (w.r.t. the scaled image). Proposals whose sides are below this | |
threshold are thrown away. | |
nms_iou_threshold: A `float` in [0, 1], the NMS IoU threshold. | |
num_proposals: An `int` of the final number of proposals to generate. | |
test_pre_nms_top_k: An `int` of the number of top scores proposals to be | |
kept before applying NMS in testing. | |
test_pre_nms_score_threshold: A `float` of the score threshold to apply | |
before applying NMS in testing. Proposals whose scores are below this | |
threshold are thrown away. | |
test_pre_nms_min_size_threshold: A `float` of the threshold of each side | |
of the box (w.r.t. the scaled image) in testing. Proposals whose sides | |
are below this threshold are thrown away. | |
test_nms_iou_threshold: A `float` in [0, 1] of the NMS IoU threshold in | |
testing. | |
test_num_proposals: An `int` of the final number of proposals to generate | |
in testing. | |
use_batched_nms: A `bool` of whether or not use | |
`tf.image.combined_non_max_suppression`. | |
**kwargs: Additional keyword arguments passed to Layer. | |
""" | |
self._config_dict = { | |
'pre_nms_top_k': pre_nms_top_k, | |
'pre_nms_score_threshold': pre_nms_score_threshold, | |
'pre_nms_min_size_threshold': pre_nms_min_size_threshold, | |
'nms_iou_threshold': nms_iou_threshold, | |
'num_proposals': num_proposals, | |
'test_pre_nms_top_k': test_pre_nms_top_k, | |
'test_pre_nms_score_threshold': test_pre_nms_score_threshold, | |
'test_pre_nms_min_size_threshold': test_pre_nms_min_size_threshold, | |
'test_nms_iou_threshold': test_nms_iou_threshold, | |
'test_num_proposals': test_num_proposals, | |
'use_batched_nms': use_batched_nms, | |
} | |
super(MultilevelROIGenerator, self).__init__(**kwargs) | |
def call(self, | |
raw_boxes: Mapping[str, tf.Tensor], | |
raw_scores: Mapping[str, tf.Tensor], | |
anchor_boxes: Mapping[str, tf.Tensor], | |
image_shape: tf.Tensor, | |
training: Optional[bool] = None): | |
"""Proposes RoIs given a group of candidates from different FPN levels. | |
The following describes the steps: | |
1. For each individual level: | |
a. Apply sigmoid transform if specified. | |
b. Decode boxes if specified. | |
c. Clip boxes if specified. | |
d. Filter small boxes and those fall outside image if specified. | |
e. Apply pre-NMS filtering including pre-NMS top k and score | |
thresholding. | |
f. Apply NMS. | |
2. Aggregate post-NMS boxes from each level. | |
3. Apply an overall top k to generate the final selected RoIs. | |
Args: | |
raw_boxes: A `dict` with keys representing FPN levels and values | |
representing box tenors of shape | |
[batch, feature_h, feature_w, num_anchors * 4]. | |
raw_scores: A `dict` with keys representing FPN levels and values | |
representing logit tensors of shape | |
[batch, feature_h, feature_w, num_anchors]. | |
anchor_boxes: A `dict` with keys representing FPN levels and values | |
representing anchor box tensors of shape | |
[batch, feature_h * feature_w * num_anchors, 4]. | |
image_shape: A `tf.Tensor` of shape [batch, 2] where the last dimension | |
are [height, width] of the scaled image. | |
training: A `bool` that indicates whether it is in training mode. | |
Returns: | |
roi_boxes: A `tf.Tensor` of shape [batch, num_proposals, 4], the proposed | |
ROIs in the scaled image coordinate. | |
roi_scores: A `tf.Tensor` of shape [batch, num_proposals], scores of the | |
proposed ROIs. | |
""" | |
roi_boxes, roi_scores = _multilevel_propose_rois( | |
raw_boxes, | |
raw_scores, | |
anchor_boxes, | |
image_shape, | |
pre_nms_top_k=( | |
self._config_dict['pre_nms_top_k'] if training | |
else self._config_dict['test_pre_nms_top_k']), | |
pre_nms_score_threshold=( | |
self._config_dict['pre_nms_score_threshold'] if training | |
else self._config_dict['test_pre_nms_score_threshold']), | |
pre_nms_min_size_threshold=( | |
self._config_dict['pre_nms_min_size_threshold'] if training | |
else self._config_dict['test_pre_nms_min_size_threshold']), | |
nms_iou_threshold=( | |
self._config_dict['nms_iou_threshold'] if training | |
else self._config_dict['test_nms_iou_threshold']), | |
num_proposals=( | |
self._config_dict['num_proposals'] if training | |
else self._config_dict['test_num_proposals']), | |
use_batched_nms=self._config_dict['use_batched_nms'], | |
decode_boxes=True, | |
clip_boxes=True, | |
apply_sigmoid_to_score=True) | |
return roi_boxes, roi_scores | |
def get_config(self): | |
return self._config_dict | |
def from_config(cls, config, custom_objects=None): | |
return cls(**config) | |