|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
"""Utils used to manipulate tensor shapes.""" |
|
|
|
from __future__ import absolute_import |
|
from __future__ import division |
|
from __future__ import print_function |
|
|
|
from six.moves import zip |
|
import tensorflow.compat.v1 as tf |
|
|
|
from object_detection.utils import static_shape |
|
|
|
|
|
get_dim_as_int = static_shape.get_dim_as_int |
|
|
|
|
|
def _is_tensor(t): |
|
"""Returns a boolean indicating whether the input is a tensor. |
|
|
|
Args: |
|
t: the input to be tested. |
|
|
|
Returns: |
|
a boolean that indicates whether t is a tensor. |
|
""" |
|
return isinstance(t, (tf.Tensor, tf.SparseTensor, tf.Variable)) |
|
|
|
|
|
def _set_dim_0(t, d0): |
|
"""Sets the 0-th dimension of the input tensor. |
|
|
|
Args: |
|
t: the input tensor, assuming the rank is at least 1. |
|
d0: an integer indicating the 0-th dimension of the input tensor. |
|
|
|
Returns: |
|
the tensor t with the 0-th dimension set. |
|
""" |
|
t_shape = t.get_shape().as_list() |
|
t_shape[0] = d0 |
|
t.set_shape(t_shape) |
|
return t |
|
|
|
|
|
def pad_tensor(t, length): |
|
"""Pads the input tensor with 0s along the first dimension up to the length. |
|
|
|
Args: |
|
t: the input tensor, assuming the rank is at least 1. |
|
length: a tensor of shape [1] or an integer, indicating the first dimension |
|
of the input tensor t after padding, assuming length <= t.shape[0]. |
|
|
|
Returns: |
|
padded_t: the padded tensor, whose first dimension is length. If the length |
|
is an integer, the first dimension of padded_t is set to length |
|
statically. |
|
""" |
|
|
|
|
|
rank = len(t.get_shape()) |
|
paddings = [[0 for _ in range(2)] for _ in range(rank)] |
|
t_d0 = tf.shape(t)[0] |
|
|
|
if isinstance(length, int) or len(length.get_shape()) == 0: |
|
paddings[0][1] = length - t_d0 |
|
else: |
|
paddings[0][1] = length[0] - t_d0 |
|
|
|
return tf.pad(t, paddings) |
|
|
|
|
|
def clip_tensor(t, length): |
|
"""Clips the input tensor along the first dimension up to the length. |
|
|
|
Args: |
|
t: the input tensor, assuming the rank is at least 1. |
|
length: a tensor of shape [1] or an integer, indicating the first dimension |
|
of the input tensor t after clipping, assuming length <= t.shape[0]. |
|
|
|
Returns: |
|
clipped_t: the clipped tensor, whose first dimension is length. If the |
|
length is an integer, the first dimension of clipped_t is set to length |
|
statically. |
|
""" |
|
clipped_t = tf.gather(t, tf.range(length)) |
|
if not _is_tensor(length): |
|
clipped_t = _set_dim_0(clipped_t, length) |
|
return clipped_t |
|
|
|
|
|
def pad_or_clip_tensor(t, length): |
|
"""Pad or clip the input tensor along the first dimension. |
|
|
|
Args: |
|
t: the input tensor, assuming the rank is at least 1. |
|
length: a tensor of shape [1] or an integer, indicating the first dimension |
|
of the input tensor t after processing. |
|
|
|
Returns: |
|
processed_t: the processed tensor, whose first dimension is length. If the |
|
length is an integer, the first dimension of the processed tensor is set |
|
to length statically. |
|
""" |
|
return pad_or_clip_nd(t, [length] + t.shape.as_list()[1:]) |
|
|
|
|
|
def pad_or_clip_nd(tensor, output_shape): |
|
"""Pad or Clip given tensor to the output shape. |
|
|
|
Args: |
|
tensor: Input tensor to pad or clip. |
|
output_shape: A list of integers / scalar tensors (or None for dynamic dim) |
|
representing the size to pad or clip each dimension of the input tensor. |
|
|
|
Returns: |
|
Input tensor padded and clipped to the output shape. |
|
""" |
|
tensor_shape = tf.shape(tensor) |
|
clip_size = [ |
|
tf.where(tensor_shape[i] - shape > 0, shape, -1) |
|
if shape is not None else -1 for i, shape in enumerate(output_shape) |
|
] |
|
clipped_tensor = tf.slice( |
|
tensor, |
|
begin=tf.zeros(len(clip_size), dtype=tf.int32), |
|
size=clip_size) |
|
|
|
|
|
|
|
clipped_tensor_shape = tf.shape(clipped_tensor) |
|
trailing_paddings = [ |
|
shape - clipped_tensor_shape[i] if shape is not None else 0 |
|
for i, shape in enumerate(output_shape) |
|
] |
|
paddings = tf.stack( |
|
[ |
|
tf.zeros(len(trailing_paddings), dtype=tf.int32), |
|
trailing_paddings |
|
], |
|
axis=1) |
|
padded_tensor = tf.pad(clipped_tensor, paddings=paddings) |
|
output_static_shape = [ |
|
dim if not isinstance(dim, tf.Tensor) else None for dim in output_shape |
|
] |
|
padded_tensor.set_shape(output_static_shape) |
|
return padded_tensor |
|
|
|
|
|
def combined_static_and_dynamic_shape(tensor): |
|
"""Returns a list containing static and dynamic values for the dimensions. |
|
|
|
Returns a list of static and dynamic values for shape dimensions. This is |
|
useful to preserve static shapes when available in reshape operation. |
|
|
|
Args: |
|
tensor: A tensor of any type. |
|
|
|
Returns: |
|
A list of size tensor.shape.ndims containing integers or a scalar tensor. |
|
""" |
|
static_tensor_shape = tensor.shape.as_list() |
|
dynamic_tensor_shape = tf.shape(tensor) |
|
combined_shape = [] |
|
for index, dim in enumerate(static_tensor_shape): |
|
if dim is not None: |
|
combined_shape.append(dim) |
|
else: |
|
combined_shape.append(dynamic_tensor_shape[index]) |
|
return combined_shape |
|
|
|
|
|
def static_or_dynamic_map_fn(fn, elems, dtype=None, |
|
parallel_iterations=32, back_prop=True): |
|
"""Runs map_fn as a (static) for loop when possible. |
|
|
|
This function rewrites the map_fn as an explicit unstack input -> for loop |
|
over function calls -> stack result combination. This allows our graphs to |
|
be acyclic when the batch size is static. |
|
For comparison, see https://www.tensorflow.org/api_docs/python/tf/map_fn. |
|
|
|
Note that `static_or_dynamic_map_fn` currently is not *fully* interchangeable |
|
with the default tf.map_fn function as it does not accept nested inputs (only |
|
Tensors or lists of Tensors). Likewise, the output of `fn` can only be a |
|
Tensor or list of Tensors. |
|
|
|
TODO(jonathanhuang): make this function fully interchangeable with tf.map_fn. |
|
|
|
Args: |
|
fn: The callable to be performed. It accepts one argument, which will have |
|
the same structure as elems. Its output must have the |
|
same structure as elems. |
|
elems: A tensor or list of tensors, each of which will |
|
be unpacked along their first dimension. The sequence of the |
|
resulting slices will be applied to fn. |
|
dtype: (optional) The output type(s) of fn. If fn returns a structure of |
|
Tensors differing from the structure of elems, then dtype is not optional |
|
and must have the same structure as the output of fn. |
|
parallel_iterations: (optional) number of batch items to process in |
|
parallel. This flag is only used if the native tf.map_fn is used |
|
and defaults to 32 instead of 10 (unlike the standard tf.map_fn default). |
|
back_prop: (optional) True enables support for back propagation. |
|
This flag is only used if the native tf.map_fn is used. |
|
|
|
Returns: |
|
A tensor or sequence of tensors. Each tensor packs the |
|
results of applying fn to tensors unpacked from elems along the first |
|
dimension, from first to last. |
|
Raises: |
|
ValueError: if `elems` a Tensor or a list of Tensors. |
|
ValueError: if `fn` does not return a Tensor or list of Tensors |
|
""" |
|
if isinstance(elems, list): |
|
for elem in elems: |
|
if not isinstance(elem, tf.Tensor): |
|
raise ValueError('`elems` must be a Tensor or list of Tensors.') |
|
|
|
elem_shapes = [elem.shape.as_list() for elem in elems] |
|
|
|
|
|
for elem_shape in elem_shapes: |
|
if (not elem_shape or not elem_shape[0] |
|
or elem_shape[0] != elem_shapes[0][0]): |
|
return tf.map_fn(fn, elems, dtype, parallel_iterations, back_prop) |
|
arg_tuples = zip(*[tf.unstack(elem) for elem in elems]) |
|
outputs = [fn(arg_tuple) for arg_tuple in arg_tuples] |
|
else: |
|
if not isinstance(elems, tf.Tensor): |
|
raise ValueError('`elems` must be a Tensor or list of Tensors.') |
|
elems_shape = elems.shape.as_list() |
|
if not elems_shape or not elems_shape[0]: |
|
return tf.map_fn(fn, elems, dtype, parallel_iterations, back_prop) |
|
outputs = [fn(arg) for arg in tf.unstack(elems)] |
|
|
|
if all([isinstance(output, tf.Tensor) for output in outputs]): |
|
return tf.stack(outputs) |
|
else: |
|
if all([isinstance(output, list) for output in outputs]): |
|
if all([all( |
|
[isinstance(entry, tf.Tensor) for entry in output_list]) |
|
for output_list in outputs]): |
|
return [tf.stack(output_tuple) for output_tuple in zip(*outputs)] |
|
raise ValueError('`fn` should return a Tensor or a list of Tensors.') |
|
|
|
|
|
def check_min_image_dim(min_dim, image_tensor): |
|
"""Checks that the image width/height are greater than some number. |
|
|
|
This function is used to check that the width and height of an image are above |
|
a certain value. If the image shape is static, this function will perform the |
|
check at graph construction time. Otherwise, if the image shape varies, an |
|
Assertion control dependency will be added to the graph. |
|
|
|
Args: |
|
min_dim: The minimum number of pixels along the width and height of the |
|
image. |
|
image_tensor: The image tensor to check size for. |
|
|
|
Returns: |
|
If `image_tensor` has dynamic size, return `image_tensor` with a Assert |
|
control dependency. Otherwise returns image_tensor. |
|
|
|
Raises: |
|
ValueError: if `image_tensor`'s' width or height is smaller than `min_dim`. |
|
""" |
|
image_shape = image_tensor.get_shape() |
|
image_height = static_shape.get_height(image_shape) |
|
image_width = static_shape.get_width(image_shape) |
|
if image_height is None or image_width is None: |
|
shape_assert = tf.Assert( |
|
tf.logical_and(tf.greater_equal(tf.shape(image_tensor)[1], min_dim), |
|
tf.greater_equal(tf.shape(image_tensor)[2], min_dim)), |
|
['image size must be >= {} in both height and width.'.format(min_dim)]) |
|
with tf.control_dependencies([shape_assert]): |
|
return tf.identity(image_tensor) |
|
|
|
if image_height < min_dim or image_width < min_dim: |
|
raise ValueError( |
|
'image size must be >= %d in both height and width; image dim = %d,%d' % |
|
(min_dim, image_height, image_width)) |
|
|
|
return image_tensor |
|
|
|
|
|
def assert_shape_equal(shape_a, shape_b): |
|
"""Asserts that shape_a and shape_b are equal. |
|
|
|
If the shapes are static, raises a ValueError when the shapes |
|
mismatch. |
|
|
|
If the shapes are dynamic, raises a tf InvalidArgumentError when the shapes |
|
mismatch. |
|
|
|
Args: |
|
shape_a: a list containing shape of the first tensor. |
|
shape_b: a list containing shape of the second tensor. |
|
|
|
Returns: |
|
Either a tf.no_op() when shapes are all static and a tf.assert_equal() op |
|
when the shapes are dynamic. |
|
|
|
Raises: |
|
ValueError: When shapes are both static and unequal. |
|
""" |
|
if (all(isinstance(dim, int) for dim in shape_a) and |
|
all(isinstance(dim, int) for dim in shape_b)): |
|
if shape_a != shape_b: |
|
raise ValueError('Unequal shapes {}, {}'.format(shape_a, shape_b)) |
|
else: return tf.no_op() |
|
else: |
|
return tf.assert_equal(shape_a, shape_b) |
|
|
|
|
|
def assert_shape_equal_along_first_dimension(shape_a, shape_b): |
|
"""Asserts that shape_a and shape_b are the same along the 0th-dimension. |
|
|
|
If the shapes are static, raises a ValueError when the shapes |
|
mismatch. |
|
|
|
If the shapes are dynamic, raises a tf InvalidArgumentError when the shapes |
|
mismatch. |
|
|
|
Args: |
|
shape_a: a list containing shape of the first tensor. |
|
shape_b: a list containing shape of the second tensor. |
|
|
|
Returns: |
|
Either a tf.no_op() when shapes are all static and a tf.assert_equal() op |
|
when the shapes are dynamic. |
|
|
|
Raises: |
|
ValueError: When shapes are both static and unequal. |
|
""" |
|
if isinstance(shape_a[0], int) and isinstance(shape_b[0], int): |
|
if shape_a[0] != shape_b[0]: |
|
raise ValueError('Unequal first dimension {}, {}'.format( |
|
shape_a[0], shape_b[0])) |
|
else: return tf.no_op() |
|
else: |
|
return tf.assert_equal(shape_a[0], shape_b[0]) |
|
|
|
|
|
def assert_box_normalized(boxes, maximum_normalized_coordinate=1.1): |
|
"""Asserts the input box tensor is normalized. |
|
|
|
Args: |
|
boxes: a tensor of shape [N, 4] where N is the number of boxes. |
|
maximum_normalized_coordinate: Maximum coordinate value to be considered |
|
as normalized, default to 1.1. |
|
|
|
Returns: |
|
a tf.Assert op which fails when the input box tensor is not normalized. |
|
|
|
Raises: |
|
ValueError: When the input box tensor is not normalized. |
|
""" |
|
box_minimum = tf.reduce_min(boxes) |
|
box_maximum = tf.reduce_max(boxes) |
|
return tf.Assert( |
|
tf.logical_and( |
|
tf.less_equal(box_maximum, maximum_normalized_coordinate), |
|
tf.greater_equal(box_minimum, 0)), |
|
[boxes]) |
|
|
|
|
|
def flatten_dimensions(inputs, first, last): |
|
"""Flattens `K-d` tensor along [first, last) dimensions. |
|
|
|
Converts `inputs` with shape [D0, D1, ..., D(K-1)] into a tensor of shape |
|
[D0, D1, ..., D(first) * D(first+1) * ... * D(last-1), D(last), ..., D(K-1)]. |
|
|
|
Example: |
|
`inputs` is a tensor with initial shape [10, 5, 20, 20, 3]. |
|
new_tensor = flatten_dimensions(inputs, first=1, last=3) |
|
new_tensor.shape -> [10, 100, 20, 3]. |
|
|
|
Args: |
|
inputs: a tensor with shape [D0, D1, ..., D(K-1)]. |
|
first: first value for the range of dimensions to flatten. |
|
last: last value for the range of dimensions to flatten. Note that the last |
|
dimension itself is excluded. |
|
|
|
Returns: |
|
a tensor with shape |
|
[D0, D1, ..., D(first) * D(first + 1) * ... * D(last - 1), D(last), ..., |
|
D(K-1)]. |
|
|
|
Raises: |
|
ValueError: if first and last arguments are incorrect. |
|
""" |
|
if first >= inputs.shape.ndims or last > inputs.shape.ndims: |
|
raise ValueError('`first` and `last` must be less than inputs.shape.ndims. ' |
|
'found {} and {} respectively while ndims is {}'.format( |
|
first, last, inputs.shape.ndims)) |
|
shape = combined_static_and_dynamic_shape(inputs) |
|
flattened_dim_prod = tf.reduce_prod(shape[first:last], |
|
keepdims=True) |
|
new_shape = tf.concat([shape[:first], flattened_dim_prod, |
|
shape[last:]], axis=0) |
|
return tf.reshape(inputs, new_shape) |
|
|
|
|
|
def flatten_first_n_dimensions(inputs, n): |
|
"""Flattens `K-d` tensor along first n dimension to be a `(K-n+1)-d` tensor. |
|
|
|
Converts `inputs` with shape [D0, D1, ..., D(K-1)] into a tensor of shape |
|
[D0 * D1 * ... * D(n-1), D(n), ... D(K-1)]. |
|
|
|
Example: |
|
`inputs` is a tensor with initial shape [10, 5, 20, 20, 3]. |
|
new_tensor = flatten_first_n_dimensions(inputs, 2) |
|
new_tensor.shape -> [50, 20, 20, 3]. |
|
|
|
Args: |
|
inputs: a tensor with shape [D0, D1, ..., D(K-1)]. |
|
n: The number of dimensions to flatten. |
|
|
|
Returns: |
|
a tensor with shape [D0 * D1 * ... * D(n-1), D(n), ... D(K-1)]. |
|
""" |
|
return flatten_dimensions(inputs, first=0, last=n) |
|
|
|
|
|
def expand_first_dimension(inputs, dims): |
|
"""Expands `K-d` tensor along first dimension to be a `(K+n-1)-d` tensor. |
|
|
|
Converts `inputs` with shape [D0, D1, ..., D(K-1)] into a tensor of shape |
|
[dims[0], dims[1], ..., dims[-1], D1, ..., D(k-1)]. |
|
|
|
Example: |
|
`inputs` is a tensor with shape [50, 20, 20, 3]. |
|
new_tensor = expand_first_dimension(inputs, [10, 5]). |
|
new_tensor.shape -> [10, 5, 20, 20, 3]. |
|
|
|
Args: |
|
inputs: a tensor with shape [D0, D1, ..., D(K-1)]. |
|
dims: List with new dimensions to expand first axis into. The length of |
|
`dims` is typically 2 or larger. |
|
|
|
Returns: |
|
a tensor with shape [dims[0], dims[1], ..., dims[-1], D1, ..., D(k-1)]. |
|
""" |
|
inputs_shape = combined_static_and_dynamic_shape(inputs) |
|
expanded_shape = tf.stack(dims + inputs_shape[1:]) |
|
|
|
|
|
assert_op = tf.assert_equal( |
|
inputs_shape[0], tf.reduce_prod(tf.stack(dims)), |
|
message=('First dimension of `inputs` cannot be expanded into provided ' |
|
'`dims`')) |
|
|
|
with tf.control_dependencies([assert_op]): |
|
inputs_reshaped = tf.reshape(inputs, expanded_shape) |
|
|
|
return inputs_reshaped |
|
|
|
|
|
def resize_images_and_return_shapes(inputs, image_resizer_fn): |
|
"""Resizes images using the given function and returns their true shapes. |
|
|
|
Args: |
|
inputs: a float32 Tensor representing a batch of inputs of shape |
|
[batch_size, height, width, channels]. |
|
image_resizer_fn: a function which takes in a single image and outputs |
|
a resized image and its original shape. |
|
|
|
Returns: |
|
resized_inputs: The inputs resized according to image_resizer_fn. |
|
true_image_shapes: A integer tensor of shape [batch_size, 3] |
|
representing the height, width and number of channels in inputs. |
|
""" |
|
|
|
if inputs.dtype is not tf.float32: |
|
raise ValueError('`resize_images_and_return_shapes` expects a' |
|
' tf.float32 tensor') |
|
|
|
|
|
|
|
outputs = static_or_dynamic_map_fn( |
|
image_resizer_fn, |
|
elems=inputs, |
|
dtype=[tf.float32, tf.int32]) |
|
resized_inputs = outputs[0] |
|
true_image_shapes = outputs[1] |
|
|
|
return resized_inputs, true_image_shapes |
|
|