# Copyright 2023 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """A module for helper tensorflow ops. This is originally implemented in TensorFlow Object Detection API. """ import tensorflow as tf, tf_keras from official.vision.utils.object_detection import shape_utils def indices_to_dense_vector(indices, size, indices_value=1., default_value=0, dtype=tf.float32): """Creates dense vector with indices set to specific value and rest to zeros. This function exists because it is unclear if it is safe to use tf.sparse_to_dense(indices, [size], 1, validate_indices=False) with indices which are not ordered. This function accepts a dynamic size (e.g. tf.shape(tensor)[0]) Args: indices: 1d Tensor with integer indices which are to be set to indices_values. size: scalar with size (integer) of output Tensor. indices_value: values of elements specified by indices in the output vector default_value: values of other elements in the output vector. dtype: data type. Returns: dense 1D Tensor of shape [size] with indices set to indices_values and the rest set to default_value. """ size = tf.cast(size, dtype=tf.int32) zeros = tf.ones([size], dtype=dtype) * default_value values = tf.ones_like(indices, dtype=dtype) * indices_value return tf.dynamic_stitch( [tf.range(size), tf.cast(indices, dtype=tf.int32)], [zeros, values]) def matmul_gather_on_zeroth_axis(params, indices, scope=None): """Matrix multiplication based implementation of tf.gather on zeroth axis. TODO(rathodv, jonathanhuang): enable sparse matmul option. Args: params: A float32 Tensor. The tensor from which to gather values. Must be at least rank 1. indices: A Tensor. Must be one of the following types: int32, int64. Must be in range [0, params.shape[0]) scope: A name for the operation (optional). Returns: A Tensor. Has the same type as params. Values from params gathered from indices given by indices, with shape indices.shape + params.shape[1:]. """ scope = scope or 'MatMulGather' with tf.name_scope(scope): params_shape = shape_utils.combined_static_and_dynamic_shape(params) indices_shape = shape_utils.combined_static_and_dynamic_shape(indices) params2d = tf.reshape(params, [params_shape[0], -1]) indicator_matrix = tf.one_hot(indices, params_shape[0]) gathered_result_flattened = tf.matmul(indicator_matrix, params2d) return tf.reshape(gathered_result_flattened, tf.stack(indices_shape + params_shape[1:])) def merge_boxes_with_multiple_labels( boxes, classes, confidences, num_classes, quantization_bins=10000 ): """Merges boxes with same coordinates and returns K-hot encoded classes. Args: boxes: A tf.float32 tensor with shape [N, 4] holding N boxes. Only normalized coordinates are allowed. classes: A tf.int32 tensor with shape [N] holding class indices. The class index starts at 0. confidences: A tf.float32 tensor with shape [N] holding class confidences. num_classes: total number of classes to use for K-hot encoding. quantization_bins: the number of bins used to quantize the box coordinate. Returns: merged_boxes: A tf.float32 tensor with shape [N', 4] holding boxes, where N' <= N. class_encodings: A tf.int32 tensor with shape [N', num_classes] holding K-hot encodings for the merged boxes. confidence_encodings: A tf.float32 tensor with shape [N', num_classes] holding encodings of confidences for the merged boxes. merged_box_indices: A tf.int32 tensor with shape [N'] holding original indices of the boxes. """ quantized_boxes = tf.cast(boxes * (quantization_bins - 1), dtype=tf.int64) ymin, xmin, ymax, xmax = tf.unstack(quantized_boxes, axis=1) hashcodes = ( ymin + xmin * quantization_bins + ymax * quantization_bins * quantization_bins + xmax * quantization_bins * quantization_bins * quantization_bins ) unique_hashcodes, unique_indices = tf.unique(hashcodes) num_boxes = tf.shape(boxes)[0] num_unique_boxes = tf.shape(unique_hashcodes)[0] merged_box_indices = tf.math.unsorted_segment_min( tf.range(num_boxes), unique_indices, num_unique_boxes ) merged_boxes = tf.gather(boxes, merged_box_indices) unique_indices = tf.cast(unique_indices, dtype=tf.int64) classes = tf.cast(classes, dtype=tf.int64) def map_box_encodings(i): """Produces box K-hot and score encodings for each class index.""" box_mask = tf.equal(unique_indices, i * tf.ones(num_boxes, dtype=tf.int64)) box_mask = tf.reshape(box_mask, [-1]) box_indices = tf.boolean_mask(classes, box_mask) box_confidences = tf.boolean_mask(confidences, box_mask) box_indices = tf.cast(box_indices, dtype=tf.int64) if tf.rank(box_indices) == 1: box_indices = tf.expand_dims(box_indices, axis=-1) box_class_encodings = tf.SparseTensor( box_indices, tf.squeeze(tf.ones_like(box_indices, dtype=tf.int64), axis=-1), [num_classes], ) box_class_encodings = tf.sparse.reorder(box_class_encodings) box_class_encodings = tf.sparse.to_dense(box_class_encodings) if tf.rank(box_confidences) > 1: box_confidences = tf.squeeze(box_confidences, axis=-1) box_confidence_encodings = tf.SparseTensor( box_indices, box_confidences, [num_classes], ) box_confidence_encodings = tf.sparse.reorder(box_confidence_encodings) box_confidence_encodings = tf.sparse.to_dense(box_confidence_encodings) return box_class_encodings, box_confidence_encodings # Important to avoid int32 here since there is no GPU kernel for int32. # int64 and float32 are fine. class_encodings, confidence_encodings = tf.nest.map_structure( tf.stop_gradient, tf.map_fn( map_box_encodings, tf.range(tf.cast(num_unique_boxes, dtype=tf.int64)), dtype=(tf.int64, tf.float32), ), ) merged_boxes = tf.reshape(merged_boxes, [-1, 4]) class_encodings = tf.cast(class_encodings, dtype=tf.int32) class_encodings = tf.reshape(class_encodings, [-1, num_classes]) confidence_encodings = tf.reshape(confidence_encodings, [-1, num_classes]) merged_box_indices = tf.reshape(merged_box_indices, [-1]) return ( merged_boxes, class_encodings, confidence_encodings, merged_box_indices, )