Spaces:
Runtime error
Runtime error
# Copyright 2023 The TensorFlow Authors. All Rights Reserved. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
"""Tests for detection_generator.py.""" | |
# Import libraries | |
from absl.testing import parameterized | |
import numpy as np | |
import tensorflow as tf, tf_keras | |
from official.vision.modeling.layers import detection_generator | |
from official.vision.ops import anchor | |
class SelectTopKScoresTest(tf.test.TestCase): | |
def testSelectTopKScores(self): | |
pre_nms_num_boxes = 2 | |
scores_data = [[[0.2, 0.2], [0.1, 0.9], [0.5, 0.1], [0.3, 0.5]]] | |
scores_in = tf.constant(scores_data, dtype=tf.float32) | |
top_k_scores, top_k_indices = detection_generator._select_top_k_scores( | |
scores_in, pre_nms_num_detections=pre_nms_num_boxes) | |
expected_top_k_scores = np.array([[[0.5, 0.9], [0.3, 0.5]]], | |
dtype=np.float32) | |
expected_top_k_indices = [[[2, 1], [3, 3]]] | |
self.assertAllEqual(top_k_scores.numpy(), expected_top_k_scores) | |
self.assertAllEqual(top_k_indices.numpy(), expected_top_k_indices) | |
class DetectionGeneratorTest( | |
parameterized.TestCase, tf.test.TestCase): | |
def testDetectionsOutputShape(self, nms_version, use_cpu_nms, soft_nms_sigma, | |
use_sigmoid_probability): | |
max_num_detections = 10 | |
num_classes = 4 | |
pre_nms_top_k = 5000 | |
pre_nms_score_threshold = 0.01 | |
batch_size = 1 | |
kwargs = { | |
'apply_nms': True, | |
'pre_nms_top_k': pre_nms_top_k, | |
'pre_nms_score_threshold': pre_nms_score_threshold, | |
'nms_iou_threshold': 0.5, | |
'max_num_detections': max_num_detections, | |
'nms_version': nms_version, | |
'use_cpu_nms': use_cpu_nms, | |
'soft_nms_sigma': soft_nms_sigma, | |
'use_sigmoid_probability': use_sigmoid_probability, | |
} | |
generator = detection_generator.DetectionGenerator(**kwargs) | |
cls_outputs_all = ( | |
np.random.rand(84, num_classes) - 0.5) * 3 # random 84x3 outputs. | |
box_outputs_all = np.random.rand(84, 4 * num_classes) # random 84 boxes. | |
anchor_boxes_all = np.random.rand(84, 4) # random 84 boxes. | |
class_outputs = tf.reshape( | |
tf.convert_to_tensor(cls_outputs_all, dtype=tf.float32), | |
[1, 84, num_classes]) | |
box_outputs = tf.reshape( | |
tf.convert_to_tensor(box_outputs_all, dtype=tf.float32), | |
[1, 84, 4 * num_classes]) | |
anchor_boxes = tf.reshape( | |
tf.convert_to_tensor(anchor_boxes_all, dtype=tf.float32), | |
[1, 84, 4]) | |
image_info = tf.constant( | |
[[[1000, 1000], [100, 100], [0.1, 0.1], [0, 0]]], | |
dtype=tf.float32) | |
results = generator( | |
box_outputs, class_outputs, anchor_boxes, image_info[:, 1, :]) | |
boxes = results['detection_boxes'] | |
classes = results['detection_classes'] | |
scores = results['detection_scores'] | |
valid_detections = results['num_detections'] | |
self.assertEqual(boxes.numpy().shape, (batch_size, max_num_detections, 4)) | |
self.assertEqual(scores.numpy().shape, (batch_size, max_num_detections,)) | |
self.assertEqual(classes.numpy().shape, (batch_size, max_num_detections,)) | |
self.assertEqual(valid_detections.numpy().shape, (batch_size,)) | |
def test_serialize_deserialize(self): | |
kwargs = { | |
'apply_nms': True, | |
'pre_nms_top_k': 1000, | |
'pre_nms_score_threshold': 0.1, | |
'nms_iou_threshold': 0.5, | |
'max_num_detections': 10, | |
'nms_version': 'v2', | |
'use_cpu_nms': False, | |
'soft_nms_sigma': None, | |
'use_sigmoid_probability': False, | |
} | |
generator = detection_generator.DetectionGenerator(**kwargs) | |
expected_config = dict(kwargs) | |
self.assertEqual(generator.get_config(), expected_config) | |
new_generator = ( | |
detection_generator.DetectionGenerator.from_config( | |
generator.get_config())) | |
self.assertAllEqual(generator.get_config(), new_generator.get_config()) | |
class MultilevelDetectionGeneratorTest( | |
parameterized.TestCase, tf.test.TestCase): | |
def testDetectionsOutputShape( | |
self, | |
nms_version, | |
has_att_heads, | |
use_cpu_nms, | |
soft_nms_sigma, | |
use_regular_nms, | |
use_class_agnostic_nms, | |
): | |
min_level = 4 | |
max_level = 6 | |
num_scales = 2 | |
max_num_detections = 10 | |
aspect_ratios = [1.0, 2.0] | |
anchor_scale = 2.0 | |
output_size = [64, 64] | |
num_classes = 4 | |
pre_nms_top_k = 5000 | |
pre_nms_score_threshold = 0.01 | |
batch_size = 1 | |
tflite_post_processing_config = { | |
'max_detections': max_num_detections, | |
'max_classes_per_detection': 1, | |
'use_regular_nms': use_regular_nms, | |
'nms_score_threshold': 0.01, | |
'nms_iou_threshold': 0.5, | |
'input_image_size': [224, 224], | |
'normalize_anchor_coordinates': True, | |
} | |
kwargs = { | |
'apply_nms': True, | |
'pre_nms_top_k': pre_nms_top_k, | |
'pre_nms_score_threshold': pre_nms_score_threshold, | |
'nms_iou_threshold': 0.5, | |
'max_num_detections': max_num_detections, | |
'nms_version': nms_version, | |
'use_cpu_nms': use_cpu_nms, | |
'soft_nms_sigma': soft_nms_sigma, | |
'tflite_post_processing_config': tflite_post_processing_config, | |
'use_class_agnostic_nms': use_class_agnostic_nms, | |
} | |
input_anchor = anchor.build_anchor_generator(min_level, max_level, | |
num_scales, aspect_ratios, | |
anchor_scale) | |
anchor_boxes = input_anchor(output_size) | |
cls_outputs_all = ( | |
np.random.rand(84, num_classes) - 0.5) * 3 # random 84x3 outputs. | |
box_outputs_all = np.random.rand(84, 4) # random 84 boxes. | |
class_outputs = { | |
'4': | |
tf.reshape( | |
tf.convert_to_tensor(cls_outputs_all[0:64], dtype=tf.float32), | |
[1, 8, 8, num_classes]), | |
'5': | |
tf.reshape( | |
tf.convert_to_tensor(cls_outputs_all[64:80], dtype=tf.float32), | |
[1, 4, 4, num_classes]), | |
'6': | |
tf.reshape( | |
tf.convert_to_tensor(cls_outputs_all[80:84], dtype=tf.float32), | |
[1, 2, 2, num_classes]), | |
} | |
box_outputs = { | |
'4': tf.reshape(tf.convert_to_tensor( | |
box_outputs_all[0:64], dtype=tf.float32), [1, 8, 8, 4]), | |
'5': tf.reshape(tf.convert_to_tensor( | |
box_outputs_all[64:80], dtype=tf.float32), [1, 4, 4, 4]), | |
'6': tf.reshape(tf.convert_to_tensor( | |
box_outputs_all[80:84], dtype=tf.float32), [1, 2, 2, 4]), | |
} | |
if has_att_heads: | |
att_outputs_all = np.random.rand(84, 1) # random attributes. | |
att_outputs = { | |
'depth': { | |
'4': | |
tf.reshape( | |
tf.convert_to_tensor( | |
att_outputs_all[0:64], dtype=tf.float32), | |
[1, 8, 8, 1]), | |
'5': | |
tf.reshape( | |
tf.convert_to_tensor( | |
att_outputs_all[64:80], dtype=tf.float32), | |
[1, 4, 4, 1]), | |
'6': | |
tf.reshape( | |
tf.convert_to_tensor( | |
att_outputs_all[80:84], dtype=tf.float32), | |
[1, 2, 2, 1]), | |
} | |
} | |
else: | |
att_outputs = None | |
image_info = tf.constant([[[1000, 1000], [100, 100], [0.1, 0.1], [0, 0]]], | |
dtype=tf.float32) | |
generator = detection_generator.MultilevelDetectionGenerator(**kwargs) | |
results = generator(box_outputs, class_outputs, anchor_boxes, | |
image_info[:, 1, :], att_outputs) | |
boxes = results['detection_boxes'] | |
classes = results['detection_classes'] | |
scores = results['detection_scores'] | |
valid_detections = results['num_detections'] | |
if nms_version == 'tflite': | |
# When nms_version is `tflite`, all output tensors are empty as the actual | |
# post-processing happens in the TFLite model. | |
self.assertEqual(boxes.numpy().shape, ()) | |
self.assertEqual(scores.numpy().shape, ()) | |
self.assertEqual(classes.numpy().shape, ()) | |
self.assertEqual(valid_detections.numpy().shape, ()) | |
else: | |
self.assertEqual(boxes.numpy().shape, (batch_size, max_num_detections, 4)) | |
self.assertEqual(scores.numpy().shape, ( | |
batch_size, | |
max_num_detections, | |
)) | |
self.assertEqual(classes.numpy().shape, ( | |
batch_size, | |
max_num_detections, | |
)) | |
self.assertEqual(valid_detections.numpy().shape, (batch_size,)) | |
if has_att_heads: | |
for att in results['detection_attributes'].values(): | |
self.assertEqual(att.numpy().shape, | |
(batch_size, max_num_detections, 1)) | |
def test_decode_multilevel_outputs_and_pre_nms_top_k(self): | |
named_params = { | |
'apply_nms': True, | |
'pre_nms_top_k': 5, | |
'pre_nms_score_threshold': 0.05, | |
'nms_iou_threshold': 0.5, | |
'max_num_detections': 2, | |
'nms_version': 'v3', | |
'use_cpu_nms': False, | |
'soft_nms_sigma': None, | |
} | |
generator = detection_generator.MultilevelDetectionGenerator(**named_params) | |
# 2 classes, 3 boxes per pixel, 2 levels '1': 2x2, '2':1x1 | |
background = [1, 0, 0] | |
first = [0, 1, 0] | |
second = [0, 0, 1] | |
some = [0, 0.5, 0.5] | |
class_outputs = { | |
'1': | |
tf.constant([[[ | |
first + background + first, first + background + second | |
], [second + background + first, second + background + second]]], | |
dtype=tf.float32), | |
'2': | |
tf.constant([[[background + some + background]]], dtype=tf.float32), | |
} | |
box_outputs = { | |
'1': tf.zeros(shape=[1, 2, 2, 12], dtype=tf.float32), | |
'2': tf.zeros(shape=[1, 1, 1, 12], dtype=tf.float32) | |
} | |
anchor_boxes = { | |
'1': | |
tf.random.uniform( | |
shape=[2, 2, 12], minval=1., maxval=99., dtype=tf.float32), | |
'2': | |
tf.random.uniform( | |
shape=[1, 1, 12], minval=1., maxval=99., dtype=tf.float32), | |
} | |
boxes, scores = generator._decode_multilevel_outputs_and_pre_nms_top_k( | |
box_outputs, class_outputs, anchor_boxes, | |
tf.constant([[100, 100]], dtype=tf.float32)) | |
self.assertAllClose( | |
scores, | |
tf.sigmoid( | |
tf.constant([[[1, 1, 1, 1, 0.5], [1, 1, 1, 1, 0.5]]], | |
dtype=tf.float32))) | |
self.assertAllClose( | |
tf.squeeze(boxes), | |
tf.stack([ | |
# Where the first is + some as last | |
tf.stack([ | |
anchor_boxes['1'][0, 0, 0:4], anchor_boxes['1'][0, 0, 8:12], | |
anchor_boxes['1'][0, 1, 0:4], anchor_boxes['1'][1, 0, 8:12], | |
anchor_boxes['2'][0, 0, 4:8] | |
]), | |
# Where the second is + some as last | |
tf.stack([ | |
anchor_boxes['1'][0, 1, 8:12], anchor_boxes['1'][1, 0, 0:4], | |
anchor_boxes['1'][1, 1, 0:4], anchor_boxes['1'][1, 1, 8:12], | |
anchor_boxes['2'][0, 0, 4:8] | |
]), | |
])) | |
def test_serialize_deserialize(self): | |
tflite_post_processing_config = { | |
'max_detections': 100, | |
'max_classes_per_detection': 1, | |
'use_regular_nms': True, | |
'nms_score_threshold': 0.01, | |
'nms_iou_threshold': 0.5, | |
'input_image_size': [224, 224], | |
} | |
kwargs = { | |
'apply_nms': True, | |
'pre_nms_top_k': 1000, | |
'pre_nms_score_threshold': 0.1, | |
'nms_iou_threshold': 0.5, | |
'max_num_detections': 10, | |
'nms_version': 'v2', | |
'use_cpu_nms': False, | |
'soft_nms_sigma': None, | |
'tflite_post_processing_config': tflite_post_processing_config, | |
'return_decoded': False, | |
'use_class_agnostic_nms': False, | |
'box_coder_weights': None, | |
} | |
generator = detection_generator.MultilevelDetectionGenerator(**kwargs) | |
expected_config = dict(kwargs) | |
self.assertEqual(generator.get_config(), expected_config) | |
new_generator = ( | |
detection_generator.MultilevelDetectionGenerator.from_config( | |
generator.get_config())) | |
self.assertAllEqual(generator.get_config(), new_generator.get_config()) | |
if __name__ == '__main__': | |
tf.test.main() | |