Spaces:
Runtime error
Runtime error
# Copyright 2023 The TensorFlow Authors. All Rights Reserved. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
"""ROI-related ops.""" | |
from __future__ import absolute_import | |
from __future__ import division | |
from __future__ import print_function | |
import tensorflow as tf, tf_keras | |
from official.legacy.detection.ops import nms | |
from official.legacy.detection.utils import box_utils | |
def multilevel_propose_rois(rpn_boxes, | |
rpn_scores, | |
anchor_boxes, | |
image_shape, | |
rpn_pre_nms_top_k=2000, | |
rpn_post_nms_top_k=1000, | |
rpn_nms_threshold=0.7, | |
rpn_score_threshold=0.0, | |
rpn_min_size_threshold=0.0, | |
decode_boxes=True, | |
clip_boxes=True, | |
use_batched_nms=False, | |
apply_sigmoid_to_score=True): | |
"""Proposes RoIs given a group of candidates from different FPN levels. | |
The following describes the steps: | |
1. For each individual level: | |
a. Apply sigmoid transform if specified. | |
b. Decode boxes if specified. | |
c. Clip boxes if specified. | |
d. Filter small boxes and those fall outside image if specified. | |
e. Apply pre-NMS filtering including pre-NMS top k and score thresholding. | |
f. Apply NMS. | |
2. Aggregate post-NMS boxes from each level. | |
3. Apply an overall top k to generate the final selected RoIs. | |
Args: | |
rpn_boxes: a dict with keys representing FPN levels and values representing | |
box tenors of shape [batch_size, feature_h, feature_w, num_anchors * 4]. | |
rpn_scores: a dict with keys representing FPN levels and values representing | |
logit tensors of shape [batch_size, feature_h, feature_w, num_anchors]. | |
anchor_boxes: a dict with keys representing FPN levels and values | |
representing anchor box tensors of shape [batch_size, feature_h, | |
feature_w, num_anchors * 4]. | |
image_shape: a tensor of shape [batch_size, 2] where the last dimension are | |
[height, width] of the scaled image. | |
rpn_pre_nms_top_k: an integer of top scoring RPN proposals *per level* to | |
keep before applying NMS. Default: 2000. | |
rpn_post_nms_top_k: an integer of top scoring RPN proposals *in total* to | |
keep after applying NMS. Default: 1000. | |
rpn_nms_threshold: a float between 0 and 1 representing the IoU threshold | |
used for NMS. If 0.0, no NMS is applied. Default: 0.7. | |
rpn_score_threshold: a float between 0 and 1 representing the minimal box | |
score to keep before applying NMS. This is often used as a pre-filtering | |
step for better performance. If 0, no filtering is applied. Default: 0. | |
rpn_min_size_threshold: a float representing the minimal box size in each | |
side (w.r.t. the scaled image) to keep before applying NMS. This is often | |
used as a pre-filtering step for better performance. If 0, no filtering is | |
applied. Default: 0. | |
decode_boxes: a boolean indicating whether `rpn_boxes` needs to be decoded | |
using `anchor_boxes`. If False, use `rpn_boxes` directly and ignore | |
`anchor_boxes`. Default: True. | |
clip_boxes: a boolean indicating whether boxes are first clipped to the | |
scaled image size before appliying NMS. If False, no clipping is applied | |
and `image_shape` is ignored. Default: True. | |
use_batched_nms: a boolean indicating whether NMS is applied in batch using | |
`tf.image.combined_non_max_suppression`. Currently only available in | |
CPU/GPU. Default: False. | |
apply_sigmoid_to_score: a boolean indicating whether apply sigmoid to | |
`rpn_scores` before applying NMS. Default: True. | |
Returns: | |
selected_rois: a tensor of shape [batch_size, rpn_post_nms_top_k, 4], | |
representing the box coordinates of the selected proposals w.r.t. the | |
scaled image. | |
selected_roi_scores: a tensor of shape [batch_size, rpn_post_nms_top_k, 1], | |
representing the scores of the selected proposals. | |
""" | |
with tf.name_scope('multilevel_propose_rois'): | |
rois = [] | |
roi_scores = [] | |
image_shape = tf.expand_dims(image_shape, axis=1) | |
for level in sorted(rpn_scores.keys()): | |
with tf.name_scope('level_%d' % level): | |
_, feature_h, feature_w, num_anchors_per_location = ( | |
rpn_scores[level].get_shape().as_list()) | |
num_boxes = feature_h * feature_w * num_anchors_per_location | |
this_level_scores = tf.reshape(rpn_scores[level], [-1, num_boxes]) | |
this_level_boxes = tf.reshape(rpn_boxes[level], [-1, num_boxes, 4]) | |
this_level_anchors = tf.cast( | |
tf.reshape(anchor_boxes[level], [-1, num_boxes, 4]), | |
dtype=this_level_scores.dtype) | |
if apply_sigmoid_to_score: | |
this_level_scores = tf.sigmoid(this_level_scores) | |
if decode_boxes: | |
this_level_boxes = box_utils.decode_boxes(this_level_boxes, | |
this_level_anchors) | |
if clip_boxes: | |
this_level_boxes = box_utils.clip_boxes(this_level_boxes, image_shape) | |
if rpn_min_size_threshold > 0.0: | |
this_level_boxes, this_level_scores = box_utils.filter_boxes( | |
this_level_boxes, this_level_scores, image_shape, | |
rpn_min_size_threshold) | |
this_level_pre_nms_top_k = min(num_boxes, rpn_pre_nms_top_k) | |
this_level_post_nms_top_k = min(num_boxes, rpn_post_nms_top_k) | |
if rpn_nms_threshold > 0.0: | |
if use_batched_nms: | |
this_level_rois, this_level_roi_scores, _, _ = ( | |
tf.image.combined_non_max_suppression( | |
tf.expand_dims(this_level_boxes, axis=2), | |
tf.expand_dims(this_level_scores, axis=-1), | |
max_output_size_per_class=this_level_pre_nms_top_k, | |
max_total_size=this_level_post_nms_top_k, | |
iou_threshold=rpn_nms_threshold, | |
score_threshold=rpn_score_threshold, | |
pad_per_class=False, | |
clip_boxes=False)) | |
else: | |
if rpn_score_threshold > 0.0: | |
this_level_boxes, this_level_scores = ( | |
box_utils.filter_boxes_by_scores(this_level_boxes, | |
this_level_scores, | |
rpn_score_threshold)) | |
this_level_boxes, this_level_scores = box_utils.top_k_boxes( | |
this_level_boxes, this_level_scores, k=this_level_pre_nms_top_k) | |
this_level_roi_scores, this_level_rois = ( | |
nms.sorted_non_max_suppression_padded( | |
this_level_scores, | |
this_level_boxes, | |
max_output_size=this_level_post_nms_top_k, | |
iou_threshold=rpn_nms_threshold)) | |
else: | |
this_level_rois, this_level_roi_scores = box_utils.top_k_boxes( | |
this_level_rois, this_level_scores, k=this_level_post_nms_top_k) | |
rois.append(this_level_rois) | |
roi_scores.append(this_level_roi_scores) | |
all_rois = tf.concat(rois, axis=1) | |
all_roi_scores = tf.concat(roi_scores, axis=1) | |
with tf.name_scope('top_k_rois'): | |
_, num_valid_rois = all_roi_scores.get_shape().as_list() | |
overall_top_k = min(num_valid_rois, rpn_post_nms_top_k) | |
selected_rois, selected_roi_scores = box_utils.top_k_boxes( | |
all_rois, all_roi_scores, k=overall_top_k) | |
return selected_rois, selected_roi_scores | |
class ROIGenerator(tf_keras.layers.Layer): | |
"""Proposes RoIs for the second stage processing.""" | |
def __init__(self, params): | |
self._rpn_pre_nms_top_k = params.rpn_pre_nms_top_k | |
self._rpn_post_nms_top_k = params.rpn_post_nms_top_k | |
self._rpn_nms_threshold = params.rpn_nms_threshold | |
self._rpn_score_threshold = params.rpn_score_threshold | |
self._rpn_min_size_threshold = params.rpn_min_size_threshold | |
self._test_rpn_pre_nms_top_k = params.test_rpn_pre_nms_top_k | |
self._test_rpn_post_nms_top_k = params.test_rpn_post_nms_top_k | |
self._test_rpn_nms_threshold = params.test_rpn_nms_threshold | |
self._test_rpn_score_threshold = params.test_rpn_score_threshold | |
self._test_rpn_min_size_threshold = params.test_rpn_min_size_threshold | |
self._use_batched_nms = params.use_batched_nms | |
super(ROIGenerator, self).__init__(autocast=False) | |
def call(self, boxes, scores, anchor_boxes, image_shape, is_training): | |
"""Generates RoI proposals. | |
Args: | |
boxes: a dict with keys representing FPN levels and values representing | |
box tenors of shape [batch_size, feature_h, feature_w, num_anchors * 4]. | |
scores: a dict with keys representing FPN levels and values representing | |
logit tensors of shape [batch_size, feature_h, feature_w, num_anchors]. | |
anchor_boxes: a dict with keys representing FPN levels and values | |
representing anchor box tensors of shape [batch_size, feature_h, | |
feature_w, num_anchors * 4]. | |
image_shape: a tensor of shape [batch_size, 2] where the last dimension | |
are [height, width] of the scaled image. | |
is_training: a bool indicating whether it is in training or inference | |
mode. | |
Returns: | |
proposed_rois: a tensor of shape [batch_size, rpn_post_nms_top_k, 4], | |
representing the box coordinates of the proposed RoIs w.r.t. the | |
scaled image. | |
proposed_roi_scores: a tensor of shape | |
[batch_size, rpn_post_nms_top_k, 1], representing the scores of the | |
proposed RoIs. | |
""" | |
proposed_rois, proposed_roi_scores = multilevel_propose_rois( | |
boxes, | |
scores, | |
anchor_boxes, | |
image_shape, | |
rpn_pre_nms_top_k=(self._rpn_pre_nms_top_k | |
if is_training else self._test_rpn_pre_nms_top_k), | |
rpn_post_nms_top_k=(self._rpn_post_nms_top_k | |
if is_training else self._test_rpn_post_nms_top_k), | |
rpn_nms_threshold=(self._rpn_nms_threshold | |
if is_training else self._test_rpn_nms_threshold), | |
rpn_score_threshold=(self._rpn_score_threshold if is_training else | |
self._test_rpn_score_threshold), | |
rpn_min_size_threshold=(self._rpn_min_size_threshold if is_training else | |
self._test_rpn_min_size_threshold), | |
decode_boxes=True, | |
clip_boxes=True, | |
use_batched_nms=self._use_batched_nms, | |
apply_sigmoid_to_score=True) | |
return proposed_rois, proposed_roi_scores | |
class OlnROIGenerator(ROIGenerator): | |
"""Proposes RoIs for the second stage processing.""" | |
def __call__(self, boxes, scores, anchor_boxes, image_shape, is_training, | |
is_box_lrtb=False, object_scores=None): | |
"""Generates RoI proposals. | |
Args: | |
boxes: a dict with keys representing FPN levels and values representing | |
box tenors of shape [batch_size, feature_h, feature_w, num_anchors * 4]. | |
scores: a dict with keys representing FPN levels and values representing | |
logit tensors of shape [batch_size, feature_h, feature_w, num_anchors]. | |
anchor_boxes: a dict with keys representing FPN levels and values | |
representing anchor box tensors of shape [batch_size, feature_h, | |
feature_w, num_anchors * 4]. | |
image_shape: a tensor of shape [batch_size, 2] where the last dimension | |
are [height, width] of the scaled image. | |
is_training: a bool indicating whether it is in training or inference | |
mode. | |
is_box_lrtb: a bool indicating whether boxes are in lrtb (=left,right,top, | |
bottom) format. | |
object_scores: another objectness score (e.g., centerness). In OLN, we use | |
object_scores=centerness as a replacement of the scores at each level. | |
A dict with keys representing FPN levels and values representing logit | |
tensors of shape [batch_size, feature_h, feature_w, num_anchors]. | |
Returns: | |
proposed_rois: a tensor of shape [batch_size, rpn_post_nms_top_k, 4], | |
representing the box coordinates of the proposed RoIs w.r.t. the | |
scaled image. | |
proposed_roi_scores: a tensor of shape | |
[batch_size, rpn_post_nms_top_k, 1], representing the scores of the | |
proposed RoIs. | |
""" | |
proposed_rois, proposed_roi_scores = self.oln_multilevel_propose_rois( | |
boxes, | |
scores, | |
anchor_boxes, | |
image_shape, | |
rpn_pre_nms_top_k=(self._rpn_pre_nms_top_k | |
if is_training else self._test_rpn_pre_nms_top_k), | |
rpn_post_nms_top_k=(self._rpn_post_nms_top_k | |
if is_training else self._test_rpn_post_nms_top_k), | |
rpn_nms_threshold=(self._rpn_nms_threshold | |
if is_training else self._test_rpn_nms_threshold), | |
rpn_score_threshold=(self._rpn_score_threshold if is_training else | |
self._test_rpn_score_threshold), | |
rpn_min_size_threshold=(self._rpn_min_size_threshold if is_training else | |
self._test_rpn_min_size_threshold), | |
decode_boxes=True, | |
clip_boxes=True, | |
use_batched_nms=self._use_batched_nms, | |
apply_sigmoid_to_score=True, | |
is_box_lrtb=is_box_lrtb, | |
rpn_object_scores=object_scores,) | |
return proposed_rois, proposed_roi_scores | |
def oln_multilevel_propose_rois(self, | |
rpn_boxes, | |
rpn_scores, | |
anchor_boxes, | |
image_shape, | |
rpn_pre_nms_top_k=2000, | |
rpn_post_nms_top_k=1000, | |
rpn_nms_threshold=0.7, | |
rpn_score_threshold=0.0, | |
rpn_min_size_threshold=0.0, | |
decode_boxes=True, | |
clip_boxes=True, | |
use_batched_nms=False, | |
apply_sigmoid_to_score=True, | |
is_box_lrtb=False, | |
rpn_object_scores=None,): | |
"""Proposes RoIs given a group of candidates from different FPN levels. | |
The following describes the steps: | |
1. For each individual level: | |
a. Adjust scores for each level if specified by rpn_object_scores. | |
b. Apply sigmoid transform if specified. | |
c. Decode boxes (either of xyhw or left-right-top-bottom format) if | |
specified. | |
d. Clip boxes if specified. | |
e. Filter small boxes and those fall outside image if specified. | |
f. Apply pre-NMS filtering including pre-NMS top k and score | |
thresholding. | |
g. Apply NMS. | |
2. Aggregate post-NMS boxes from each level. | |
3. Apply an overall top k to generate the final selected RoIs. | |
Args: | |
rpn_boxes: a dict with keys representing FPN levels and values | |
representing box tenors of shape [batch_size, feature_h, feature_w, | |
num_anchors * 4]. | |
rpn_scores: a dict with keys representing FPN levels and values | |
representing logit tensors of shape [batch_size, feature_h, feature_w, | |
num_anchors]. | |
anchor_boxes: a dict with keys representing FPN levels and values | |
representing anchor box tensors of shape [batch_size, feature_h, | |
feature_w, num_anchors * 4]. | |
image_shape: a tensor of shape [batch_size, 2] where the last dimension | |
are [height, width] of the scaled image. | |
rpn_pre_nms_top_k: an integer of top scoring RPN proposals *per level* to | |
keep before applying NMS. Default: 2000. | |
rpn_post_nms_top_k: an integer of top scoring RPN proposals *in total* to | |
keep after applying NMS. Default: 1000. | |
rpn_nms_threshold: a float between 0 and 1 representing the IoU threshold | |
used for NMS. If 0.0, no NMS is applied. Default: 0.7. | |
rpn_score_threshold: a float between 0 and 1 representing the minimal box | |
score to keep before applying NMS. This is often used as a pre-filtering | |
step for better performance. If 0, no filtering is applied. Default: 0. | |
rpn_min_size_threshold: a float representing the minimal box size in each | |
side (w.r.t. the scaled image) to keep before applying NMS. This is | |
often used as a pre-filtering step for better performance. If 0, no | |
filtering is applied. Default: 0. | |
decode_boxes: a boolean indicating whether `rpn_boxes` needs to be decoded | |
using `anchor_boxes`. If False, use `rpn_boxes` directly and ignore | |
`anchor_boxes`. Default: True. | |
clip_boxes: a boolean indicating whether boxes are first clipped to the | |
scaled image size before appliying NMS. If False, no clipping is applied | |
and `image_shape` is ignored. Default: True. | |
use_batched_nms: a boolean indicating whether NMS is applied in batch | |
using `tf.image.combined_non_max_suppression`. Currently only available | |
in CPU/GPU. Default: False. | |
apply_sigmoid_to_score: a boolean indicating whether apply sigmoid to | |
`rpn_scores` before applying NMS. Default: True. | |
is_box_lrtb: a bool indicating whether boxes are in lrtb (=left,right,top, | |
bottom) format. | |
rpn_object_scores: a predicted objectness score (e.g., centerness). In | |
OLN, we use object_scores=centerness as a replacement of the scores at | |
each level. A dict with keys representing FPN levels and values | |
representing logit tensors of shape [batch_size, feature_h, feature_w, | |
num_anchors]. | |
Returns: | |
selected_rois: a tensor of shape [batch_size, rpn_post_nms_top_k, 4], | |
representing the box coordinates of the selected proposals w.r.t. the | |
scaled image. | |
selected_roi_scores: a tensor of shape [batch_size, rpn_post_nms_top_k, | |
1],representing the scores of the selected proposals. | |
""" | |
with tf.name_scope('multilevel_propose_rois'): | |
rois = [] | |
roi_scores = [] | |
image_shape = tf.expand_dims(image_shape, axis=1) | |
for level in sorted(rpn_scores.keys()): | |
with tf.name_scope('level_%d' % level): | |
_, feature_h, feature_w, num_anchors_per_location = ( | |
rpn_scores[level].get_shape().as_list()) | |
num_boxes = feature_h * feature_w * num_anchors_per_location | |
this_level_scores = tf.reshape(rpn_scores[level], [-1, num_boxes]) | |
this_level_boxes = tf.reshape(rpn_boxes[level], [-1, num_boxes, 4]) | |
this_level_anchors = tf.cast( | |
tf.reshape(anchor_boxes[level], [-1, num_boxes, 4]), | |
dtype=this_level_scores.dtype) | |
if rpn_object_scores: | |
this_level_object_scores = rpn_object_scores[level] | |
this_level_object_scores = tf.reshape(this_level_object_scores, | |
[-1, num_boxes]) | |
this_level_object_scores = tf.cast(this_level_object_scores, | |
this_level_scores.dtype) | |
this_level_scores = this_level_object_scores | |
if apply_sigmoid_to_score: | |
this_level_scores = tf.sigmoid(this_level_scores) | |
if decode_boxes: | |
if is_box_lrtb: # Box in left-right-top-bottom format. | |
this_level_boxes = box_utils.decode_boxes_lrtb( | |
this_level_boxes, this_level_anchors) | |
else: # Box in standard x-y-h-w format. | |
this_level_boxes = box_utils.decode_boxes( | |
this_level_boxes, this_level_anchors) | |
if clip_boxes: | |
this_level_boxes = box_utils.clip_boxes( | |
this_level_boxes, image_shape) | |
if rpn_min_size_threshold > 0.0: | |
this_level_boxes, this_level_scores = box_utils.filter_boxes( | |
this_level_boxes, this_level_scores, image_shape, | |
rpn_min_size_threshold) | |
this_level_pre_nms_top_k = min(num_boxes, rpn_pre_nms_top_k) | |
this_level_post_nms_top_k = min(num_boxes, rpn_post_nms_top_k) | |
if rpn_nms_threshold > 0.0: | |
if use_batched_nms: | |
this_level_rois, this_level_roi_scores, _, _ = ( | |
tf.image.combined_non_max_suppression( | |
tf.expand_dims(this_level_boxes, axis=2), | |
tf.expand_dims(this_level_scores, axis=-1), | |
max_output_size_per_class=this_level_pre_nms_top_k, | |
max_total_size=this_level_post_nms_top_k, | |
iou_threshold=rpn_nms_threshold, | |
score_threshold=rpn_score_threshold, | |
pad_per_class=False, | |
clip_boxes=False)) | |
else: | |
if rpn_score_threshold > 0.0: | |
this_level_boxes, this_level_scores = ( | |
box_utils.filter_boxes_by_scores(this_level_boxes, | |
this_level_scores, | |
rpn_score_threshold)) | |
this_level_boxes, this_level_scores = box_utils.top_k_boxes( | |
this_level_boxes, this_level_scores, | |
k=this_level_pre_nms_top_k) | |
this_level_roi_scores, this_level_rois = ( | |
nms.sorted_non_max_suppression_padded( | |
this_level_scores, | |
this_level_boxes, | |
max_output_size=this_level_post_nms_top_k, | |
iou_threshold=rpn_nms_threshold)) | |
else: | |
this_level_rois, this_level_roi_scores = box_utils.top_k_boxes( | |
this_level_rois, this_level_scores, k=this_level_post_nms_top_k) | |
rois.append(this_level_rois) | |
roi_scores.append(this_level_roi_scores) | |
all_rois = tf.concat(rois, axis=1) | |
all_roi_scores = tf.concat(roi_scores, axis=1) | |
with tf.name_scope('top_k_rois'): | |
_, num_valid_rois = all_roi_scores.get_shape().as_list() | |
overall_top_k = min(num_valid_rois, rpn_post_nms_top_k) | |
selected_rois, selected_roi_scores = box_utils.top_k_boxes( | |
all_rois, all_roi_scores, k=overall_top_k) | |
return selected_rois, selected_roi_scores | |