File size: 7,076 Bytes
5672777
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
# Copyright 2023 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""A module for helper tensorflow ops.

This is originally implemented in TensorFlow Object Detection API.
"""

import tensorflow as tf, tf_keras

from official.vision.utils.object_detection import shape_utils


def indices_to_dense_vector(indices,
                            size,
                            indices_value=1.,
                            default_value=0,
                            dtype=tf.float32):
  """Creates dense vector with indices set to specific value and rest to zeros.

  This function exists because it is unclear if it is safe to use
    tf.sparse_to_dense(indices, [size], 1, validate_indices=False)
  with indices which are not ordered.
  This function accepts a dynamic size (e.g. tf.shape(tensor)[0])

  Args:
    indices: 1d Tensor with integer indices which are to be set to
      indices_values.
    size: scalar with size (integer) of output Tensor.
    indices_value: values of elements specified by indices in the output vector
    default_value: values of other elements in the output vector.
    dtype: data type.

  Returns:
    dense 1D Tensor of shape [size] with indices set to indices_values and the
      rest set to default_value.
  """
  size = tf.cast(size, dtype=tf.int32)
  zeros = tf.ones([size], dtype=dtype) * default_value
  values = tf.ones_like(indices, dtype=dtype) * indices_value

  return tf.dynamic_stitch(
      [tf.range(size), tf.cast(indices, dtype=tf.int32)], [zeros, values])


def matmul_gather_on_zeroth_axis(params, indices, scope=None):
  """Matrix multiplication based implementation of tf.gather on zeroth axis.

  TODO(rathodv, jonathanhuang): enable sparse matmul option.

  Args:
    params: A float32 Tensor. The tensor from which to gather values. Must be at
      least rank 1.
    indices: A Tensor. Must be one of the following types: int32, int64. Must be
      in range [0, params.shape[0])
    scope: A name for the operation (optional).

  Returns:
    A Tensor. Has the same type as params. Values from params gathered
    from indices given by indices, with shape indices.shape + params.shape[1:].
  """
  scope = scope or 'MatMulGather'
  with tf.name_scope(scope):
    params_shape = shape_utils.combined_static_and_dynamic_shape(params)
    indices_shape = shape_utils.combined_static_and_dynamic_shape(indices)
    params2d = tf.reshape(params, [params_shape[0], -1])
    indicator_matrix = tf.one_hot(indices, params_shape[0])
    gathered_result_flattened = tf.matmul(indicator_matrix, params2d)
    return tf.reshape(gathered_result_flattened,
                      tf.stack(indices_shape + params_shape[1:]))


def merge_boxes_with_multiple_labels(
    boxes, classes, confidences, num_classes, quantization_bins=10000
):
  """Merges boxes with same coordinates and returns K-hot encoded classes.

  Args:
    boxes: A tf.float32 tensor with shape [N, 4] holding N boxes. Only
      normalized coordinates are allowed.
    classes: A tf.int32 tensor with shape [N] holding class indices. The class
      index starts at 0.
    confidences: A tf.float32 tensor with shape [N] holding class confidences.
    num_classes: total number of classes to use for K-hot encoding.
    quantization_bins: the number of bins used to quantize the box coordinate.

  Returns:
    merged_boxes: A tf.float32 tensor with shape [N', 4] holding boxes,
      where N' <= N.
    class_encodings: A tf.int32 tensor with shape [N', num_classes] holding
      K-hot encodings for the merged boxes.
    confidence_encodings: A tf.float32 tensor with shape [N', num_classes]
      holding encodings of confidences for the merged boxes.
    merged_box_indices: A tf.int32 tensor with shape [N'] holding original
      indices of the boxes.
  """
  quantized_boxes = tf.cast(boxes * (quantization_bins - 1), dtype=tf.int64)
  ymin, xmin, ymax, xmax = tf.unstack(quantized_boxes, axis=1)
  hashcodes = (
      ymin
      + xmin * quantization_bins
      + ymax * quantization_bins * quantization_bins
      + xmax * quantization_bins * quantization_bins * quantization_bins
  )
  unique_hashcodes, unique_indices = tf.unique(hashcodes)
  num_boxes = tf.shape(boxes)[0]
  num_unique_boxes = tf.shape(unique_hashcodes)[0]
  merged_box_indices = tf.math.unsorted_segment_min(
      tf.range(num_boxes), unique_indices, num_unique_boxes
  )
  merged_boxes = tf.gather(boxes, merged_box_indices)
  unique_indices = tf.cast(unique_indices, dtype=tf.int64)
  classes = tf.cast(classes, dtype=tf.int64)

  def map_box_encodings(i):
    """Produces box K-hot and score encodings for each class index."""
    box_mask = tf.equal(unique_indices, i * tf.ones(num_boxes, dtype=tf.int64))
    box_mask = tf.reshape(box_mask, [-1])
    box_indices = tf.boolean_mask(classes, box_mask)
    box_confidences = tf.boolean_mask(confidences, box_mask)
    box_indices = tf.cast(box_indices, dtype=tf.int64)

    if tf.rank(box_indices) == 1:
      box_indices = tf.expand_dims(box_indices, axis=-1)

    box_class_encodings = tf.SparseTensor(
        box_indices,
        tf.squeeze(tf.ones_like(box_indices, dtype=tf.int64), axis=-1),
        [num_classes],
    )
    box_class_encodings = tf.sparse.reorder(box_class_encodings)
    box_class_encodings = tf.sparse.to_dense(box_class_encodings)

    if tf.rank(box_confidences) > 1:
      box_confidences = tf.squeeze(box_confidences, axis=-1)

    box_confidence_encodings = tf.SparseTensor(
        box_indices,
        box_confidences,
        [num_classes],
    )
    box_confidence_encodings = tf.sparse.reorder(box_confidence_encodings)
    box_confidence_encodings = tf.sparse.to_dense(box_confidence_encodings)

    return box_class_encodings, box_confidence_encodings

  # Important to avoid int32 here since there is no GPU kernel for int32.
  # int64 and float32 are fine.
  class_encodings, confidence_encodings = tf.nest.map_structure(
      tf.stop_gradient,
      tf.map_fn(
          map_box_encodings,
          tf.range(tf.cast(num_unique_boxes, dtype=tf.int64)),
          dtype=(tf.int64, tf.float32),
      ),
  )

  merged_boxes = tf.reshape(merged_boxes, [-1, 4])
  class_encodings = tf.cast(class_encodings, dtype=tf.int32)
  class_encodings = tf.reshape(class_encodings, [-1, num_classes])
  confidence_encodings = tf.reshape(confidence_encodings, [-1, num_classes])
  merged_box_indices = tf.reshape(merged_box_indices, [-1])
  return (
      merged_boxes,
      class_encodings,
      confidence_encodings,
      merged_box_indices,
  )