Spaces:
Sleeping
Sleeping
File size: 7,076 Bytes
5672777 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 |
# Copyright 2023 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""A module for helper tensorflow ops.
This is originally implemented in TensorFlow Object Detection API.
"""
import tensorflow as tf, tf_keras
from official.vision.utils.object_detection import shape_utils
def indices_to_dense_vector(indices,
size,
indices_value=1.,
default_value=0,
dtype=tf.float32):
"""Creates dense vector with indices set to specific value and rest to zeros.
This function exists because it is unclear if it is safe to use
tf.sparse_to_dense(indices, [size], 1, validate_indices=False)
with indices which are not ordered.
This function accepts a dynamic size (e.g. tf.shape(tensor)[0])
Args:
indices: 1d Tensor with integer indices which are to be set to
indices_values.
size: scalar with size (integer) of output Tensor.
indices_value: values of elements specified by indices in the output vector
default_value: values of other elements in the output vector.
dtype: data type.
Returns:
dense 1D Tensor of shape [size] with indices set to indices_values and the
rest set to default_value.
"""
size = tf.cast(size, dtype=tf.int32)
zeros = tf.ones([size], dtype=dtype) * default_value
values = tf.ones_like(indices, dtype=dtype) * indices_value
return tf.dynamic_stitch(
[tf.range(size), tf.cast(indices, dtype=tf.int32)], [zeros, values])
def matmul_gather_on_zeroth_axis(params, indices, scope=None):
"""Matrix multiplication based implementation of tf.gather on zeroth axis.
TODO(rathodv, jonathanhuang): enable sparse matmul option.
Args:
params: A float32 Tensor. The tensor from which to gather values. Must be at
least rank 1.
indices: A Tensor. Must be one of the following types: int32, int64. Must be
in range [0, params.shape[0])
scope: A name for the operation (optional).
Returns:
A Tensor. Has the same type as params. Values from params gathered
from indices given by indices, with shape indices.shape + params.shape[1:].
"""
scope = scope or 'MatMulGather'
with tf.name_scope(scope):
params_shape = shape_utils.combined_static_and_dynamic_shape(params)
indices_shape = shape_utils.combined_static_and_dynamic_shape(indices)
params2d = tf.reshape(params, [params_shape[0], -1])
indicator_matrix = tf.one_hot(indices, params_shape[0])
gathered_result_flattened = tf.matmul(indicator_matrix, params2d)
return tf.reshape(gathered_result_flattened,
tf.stack(indices_shape + params_shape[1:]))
def merge_boxes_with_multiple_labels(
boxes, classes, confidences, num_classes, quantization_bins=10000
):
"""Merges boxes with same coordinates and returns K-hot encoded classes.
Args:
boxes: A tf.float32 tensor with shape [N, 4] holding N boxes. Only
normalized coordinates are allowed.
classes: A tf.int32 tensor with shape [N] holding class indices. The class
index starts at 0.
confidences: A tf.float32 tensor with shape [N] holding class confidences.
num_classes: total number of classes to use for K-hot encoding.
quantization_bins: the number of bins used to quantize the box coordinate.
Returns:
merged_boxes: A tf.float32 tensor with shape [N', 4] holding boxes,
where N' <= N.
class_encodings: A tf.int32 tensor with shape [N', num_classes] holding
K-hot encodings for the merged boxes.
confidence_encodings: A tf.float32 tensor with shape [N', num_classes]
holding encodings of confidences for the merged boxes.
merged_box_indices: A tf.int32 tensor with shape [N'] holding original
indices of the boxes.
"""
quantized_boxes = tf.cast(boxes * (quantization_bins - 1), dtype=tf.int64)
ymin, xmin, ymax, xmax = tf.unstack(quantized_boxes, axis=1)
hashcodes = (
ymin
+ xmin * quantization_bins
+ ymax * quantization_bins * quantization_bins
+ xmax * quantization_bins * quantization_bins * quantization_bins
)
unique_hashcodes, unique_indices = tf.unique(hashcodes)
num_boxes = tf.shape(boxes)[0]
num_unique_boxes = tf.shape(unique_hashcodes)[0]
merged_box_indices = tf.math.unsorted_segment_min(
tf.range(num_boxes), unique_indices, num_unique_boxes
)
merged_boxes = tf.gather(boxes, merged_box_indices)
unique_indices = tf.cast(unique_indices, dtype=tf.int64)
classes = tf.cast(classes, dtype=tf.int64)
def map_box_encodings(i):
"""Produces box K-hot and score encodings for each class index."""
box_mask = tf.equal(unique_indices, i * tf.ones(num_boxes, dtype=tf.int64))
box_mask = tf.reshape(box_mask, [-1])
box_indices = tf.boolean_mask(classes, box_mask)
box_confidences = tf.boolean_mask(confidences, box_mask)
box_indices = tf.cast(box_indices, dtype=tf.int64)
if tf.rank(box_indices) == 1:
box_indices = tf.expand_dims(box_indices, axis=-1)
box_class_encodings = tf.SparseTensor(
box_indices,
tf.squeeze(tf.ones_like(box_indices, dtype=tf.int64), axis=-1),
[num_classes],
)
box_class_encodings = tf.sparse.reorder(box_class_encodings)
box_class_encodings = tf.sparse.to_dense(box_class_encodings)
if tf.rank(box_confidences) > 1:
box_confidences = tf.squeeze(box_confidences, axis=-1)
box_confidence_encodings = tf.SparseTensor(
box_indices,
box_confidences,
[num_classes],
)
box_confidence_encodings = tf.sparse.reorder(box_confidence_encodings)
box_confidence_encodings = tf.sparse.to_dense(box_confidence_encodings)
return box_class_encodings, box_confidence_encodings
# Important to avoid int32 here since there is no GPU kernel for int32.
# int64 and float32 are fine.
class_encodings, confidence_encodings = tf.nest.map_structure(
tf.stop_gradient,
tf.map_fn(
map_box_encodings,
tf.range(tf.cast(num_unique_boxes, dtype=tf.int64)),
dtype=(tf.int64, tf.float32),
),
)
merged_boxes = tf.reshape(merged_boxes, [-1, 4])
class_encodings = tf.cast(class_encodings, dtype=tf.int32)
class_encodings = tf.reshape(class_encodings, [-1, num_classes])
confidence_encodings = tf.reshape(confidence_encodings, [-1, num_classes])
merged_box_indices = tf.reshape(merged_box_indices, [-1])
return (
merged_boxes,
class_encodings,
confidence_encodings,
merged_box_indices,
)
|