deanna-emery's picture
updates
5672777
raw
history blame
7.08 kB
# Copyright 2023 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""A module for helper tensorflow ops.
This is originally implemented in TensorFlow Object Detection API.
"""
import tensorflow as tf, tf_keras
from official.vision.utils.object_detection import shape_utils
def indices_to_dense_vector(indices,
size,
indices_value=1.,
default_value=0,
dtype=tf.float32):
"""Creates dense vector with indices set to specific value and rest to zeros.
This function exists because it is unclear if it is safe to use
tf.sparse_to_dense(indices, [size], 1, validate_indices=False)
with indices which are not ordered.
This function accepts a dynamic size (e.g. tf.shape(tensor)[0])
Args:
indices: 1d Tensor with integer indices which are to be set to
indices_values.
size: scalar with size (integer) of output Tensor.
indices_value: values of elements specified by indices in the output vector
default_value: values of other elements in the output vector.
dtype: data type.
Returns:
dense 1D Tensor of shape [size] with indices set to indices_values and the
rest set to default_value.
"""
size = tf.cast(size, dtype=tf.int32)
zeros = tf.ones([size], dtype=dtype) * default_value
values = tf.ones_like(indices, dtype=dtype) * indices_value
return tf.dynamic_stitch(
[tf.range(size), tf.cast(indices, dtype=tf.int32)], [zeros, values])
def matmul_gather_on_zeroth_axis(params, indices, scope=None):
"""Matrix multiplication based implementation of tf.gather on zeroth axis.
TODO(rathodv, jonathanhuang): enable sparse matmul option.
Args:
params: A float32 Tensor. The tensor from which to gather values. Must be at
least rank 1.
indices: A Tensor. Must be one of the following types: int32, int64. Must be
in range [0, params.shape[0])
scope: A name for the operation (optional).
Returns:
A Tensor. Has the same type as params. Values from params gathered
from indices given by indices, with shape indices.shape + params.shape[1:].
"""
scope = scope or 'MatMulGather'
with tf.name_scope(scope):
params_shape = shape_utils.combined_static_and_dynamic_shape(params)
indices_shape = shape_utils.combined_static_and_dynamic_shape(indices)
params2d = tf.reshape(params, [params_shape[0], -1])
indicator_matrix = tf.one_hot(indices, params_shape[0])
gathered_result_flattened = tf.matmul(indicator_matrix, params2d)
return tf.reshape(gathered_result_flattened,
tf.stack(indices_shape + params_shape[1:]))
def merge_boxes_with_multiple_labels(
boxes, classes, confidences, num_classes, quantization_bins=10000
):
"""Merges boxes with same coordinates and returns K-hot encoded classes.
Args:
boxes: A tf.float32 tensor with shape [N, 4] holding N boxes. Only
normalized coordinates are allowed.
classes: A tf.int32 tensor with shape [N] holding class indices. The class
index starts at 0.
confidences: A tf.float32 tensor with shape [N] holding class confidences.
num_classes: total number of classes to use for K-hot encoding.
quantization_bins: the number of bins used to quantize the box coordinate.
Returns:
merged_boxes: A tf.float32 tensor with shape [N', 4] holding boxes,
where N' <= N.
class_encodings: A tf.int32 tensor with shape [N', num_classes] holding
K-hot encodings for the merged boxes.
confidence_encodings: A tf.float32 tensor with shape [N', num_classes]
holding encodings of confidences for the merged boxes.
merged_box_indices: A tf.int32 tensor with shape [N'] holding original
indices of the boxes.
"""
quantized_boxes = tf.cast(boxes * (quantization_bins - 1), dtype=tf.int64)
ymin, xmin, ymax, xmax = tf.unstack(quantized_boxes, axis=1)
hashcodes = (
ymin
+ xmin * quantization_bins
+ ymax * quantization_bins * quantization_bins
+ xmax * quantization_bins * quantization_bins * quantization_bins
)
unique_hashcodes, unique_indices = tf.unique(hashcodes)
num_boxes = tf.shape(boxes)[0]
num_unique_boxes = tf.shape(unique_hashcodes)[0]
merged_box_indices = tf.math.unsorted_segment_min(
tf.range(num_boxes), unique_indices, num_unique_boxes
)
merged_boxes = tf.gather(boxes, merged_box_indices)
unique_indices = tf.cast(unique_indices, dtype=tf.int64)
classes = tf.cast(classes, dtype=tf.int64)
def map_box_encodings(i):
"""Produces box K-hot and score encodings for each class index."""
box_mask = tf.equal(unique_indices, i * tf.ones(num_boxes, dtype=tf.int64))
box_mask = tf.reshape(box_mask, [-1])
box_indices = tf.boolean_mask(classes, box_mask)
box_confidences = tf.boolean_mask(confidences, box_mask)
box_indices = tf.cast(box_indices, dtype=tf.int64)
if tf.rank(box_indices) == 1:
box_indices = tf.expand_dims(box_indices, axis=-1)
box_class_encodings = tf.SparseTensor(
box_indices,
tf.squeeze(tf.ones_like(box_indices, dtype=tf.int64), axis=-1),
[num_classes],
)
box_class_encodings = tf.sparse.reorder(box_class_encodings)
box_class_encodings = tf.sparse.to_dense(box_class_encodings)
if tf.rank(box_confidences) > 1:
box_confidences = tf.squeeze(box_confidences, axis=-1)
box_confidence_encodings = tf.SparseTensor(
box_indices,
box_confidences,
[num_classes],
)
box_confidence_encodings = tf.sparse.reorder(box_confidence_encodings)
box_confidence_encodings = tf.sparse.to_dense(box_confidence_encodings)
return box_class_encodings, box_confidence_encodings
# Important to avoid int32 here since there is no GPU kernel for int32.
# int64 and float32 are fine.
class_encodings, confidence_encodings = tf.nest.map_structure(
tf.stop_gradient,
tf.map_fn(
map_box_encodings,
tf.range(tf.cast(num_unique_boxes, dtype=tf.int64)),
dtype=(tf.int64, tf.float32),
),
)
merged_boxes = tf.reshape(merged_boxes, [-1, 4])
class_encodings = tf.cast(class_encodings, dtype=tf.int32)
class_encodings = tf.reshape(class_encodings, [-1, num_classes])
confidence_encodings = tf.reshape(confidence_encodings, [-1, num_classes])
merged_box_indices = tf.reshape(merged_box_indices, [-1])
return (
merged_boxes,
class_encodings,
confidence_encodings,
merged_box_indices,
)