# Copyright 2023 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Tests for movinet_layers.py.""" from absl.testing import parameterized import tensorflow as tf, tf_keras from official.projects.movinet.modeling import movinet_layers from official.vision.modeling.layers import nn_layers class MovinetLayersTest(parameterized.TestCase, tf.test.TestCase): def test_squeeze3d(self): squeeze = movinet_layers.Squeeze3D() inputs = tf.ones([5, 1, 1, 1, 3]) predicted = squeeze(inputs) expected = tf.ones([5, 3]) self.assertEqual(predicted.shape, expected.shape) self.assertAllEqual(predicted, expected) def test_mobile_conv2d(self): conv2d = movinet_layers.MobileConv2D( filters=3, kernel_size=(3, 3), strides=(1, 1), padding='same', kernel_initializer='ones', use_bias=False, use_depthwise=False, use_temporal=False, use_buffered_input=True, ) inputs = tf.ones([1, 2, 2, 2, 3]) predicted = conv2d(inputs) expected = tf.constant( [[[[[12., 12., 12.], [12., 12., 12.]], [[12., 12., 12.], [12., 12., 12.]]], [[[12., 12., 12.], [12., 12., 12.]], [[12., 12., 12.], [12., 12., 12.]]]]]) self.assertEqual(predicted.shape, expected.shape) self.assertAllClose(predicted, expected) def test_mobile_conv2d_bn(self): batch_norm_op = tf_keras.layers.BatchNormalization( momentum=0.9, epsilon=1., name='bn') conv2d = movinet_layers.MobileConv2D( filters=3, kernel_size=(3, 3), strides=(1, 1), padding='same', kernel_initializer='ones', use_bias=False, use_depthwise=False, use_temporal=False, use_buffered_input=True, batch_norm_op=batch_norm_op, ) inputs = tf.ones([1, 2, 2, 2, 3]) predicted = conv2d(inputs) expected = tf.constant( [[[[[8.48528, 8.48528, 8.48528], [8.48528, 8.48528, 8.48528]], [[8.48528, 8.48528, 8.48528], [8.48528, 8.48528, 8.48528]]], [[[8.48528, 8.48528, 8.48528], [8.48528, 8.48528, 8.48528]], [[8.48528, 8.48528, 8.48528], [8.48528, 8.48528, 8.48528]]]]]) self.assertEqual(predicted.shape, expected.shape) self.assertAllClose(predicted, expected) def test_mobile_conv2d_activation(self): conv2d = movinet_layers.MobileConv2D( filters=3, kernel_size=(3, 3), strides=(1, 1), padding='same', kernel_initializer='ones', use_bias=False, use_depthwise=False, use_temporal=False, use_buffered_input=True, activation_op=tf.nn.relu6, ) inputs = tf.ones([1, 2, 2, 2, 3]) predicted = conv2d(inputs) expected = tf.constant( [[[[[6., 6., 6.], [6., 6., 6.]], [[6., 6., 6.], [6., 6., 6.]]], [[[6., 6., 6.], [6., 6., 6.]], [[6., 6., 6.], [6., 6., 6.]]]]]) self.assertEqual(predicted.shape, expected.shape) self.assertAllClose(predicted, expected) def test_mobile_conv2d_temporal(self): conv2d = movinet_layers.MobileConv2D( filters=3, kernel_size=(3, 1), strides=(1, 1), padding='causal', kernel_initializer='ones', use_bias=False, use_depthwise=True, use_temporal=True, use_buffered_input=True, ) inputs = tf.ones([1, 2, 2, 1, 3]) paddings = [[0, 0], [2, 0], [0, 0], [0, 0], [0, 0]] padded_inputs = tf.pad(inputs, paddings) predicted = conv2d(padded_inputs) expected = tf.constant( [[[[[1., 1., 1.]], [[1., 1., 1.]]], [[[2., 2., 2.]], [[2., 2., 2.]]]]]) self.assertEqual(predicted.shape, expected.shape) self.assertAllClose(predicted, expected) def test_stream_buffer(self): conv3d_stream = nn_layers.Conv3D( filters=3, kernel_size=(3, 3, 3), strides=(1, 2, 2), padding='causal', kernel_initializer='ones', use_bias=False, use_buffered_input=True, ) buffer = movinet_layers.StreamBuffer(buffer_size=2) conv3d = nn_layers.Conv3D( filters=3, kernel_size=(3, 3, 3), strides=(1, 2, 2), padding='causal', kernel_initializer='ones', use_bias=False, use_buffered_input=False, ) inputs = tf.ones([1, 4, 2, 2, 3]) expected = conv3d(inputs) for num_splits in [1, 2, 4]: frames = tf.split(inputs, inputs.shape[1] // num_splits, axis=1) states = {} predicted = [] for frame in frames: x, states = buffer(frame, states=states) x = conv3d_stream(x) predicted.append(x) predicted = tf.concat(predicted, axis=1) self.assertEqual(predicted.shape, expected.shape) self.assertAllClose(predicted, expected) self.assertAllClose( predicted, [[[[[12., 12., 12.]]], [[[24., 24., 24.]]], [[[36., 36., 36.]]], [[[36., 36., 36.]]]]]) def test_stream_conv_block_2plus1d(self): conv_block = movinet_layers.ConvBlock( filters=3, kernel_size=(3, 3, 3), strides=(1, 2, 2), causal=True, kernel_initializer='ones', use_bias=False, activation='relu', conv_type='2plus1d', ) stream_conv_block = movinet_layers.StreamConvBlock( filters=3, kernel_size=(3, 3, 3), strides=(1, 2, 2), causal=True, kernel_initializer='ones', use_bias=False, activation='relu', conv_type='2plus1d', ) inputs = tf.ones([1, 4, 2, 2, 3]) expected = conv_block(inputs) predicted_disabled, _ = stream_conv_block(inputs) self.assertEqual(predicted_disabled.shape, expected.shape) self.assertAllClose(predicted_disabled, expected) for num_splits in [1, 2, 4]: frames = tf.split(inputs, inputs.shape[1] // num_splits, axis=1) states = {} predicted = [] for frame in frames: x, states = stream_conv_block(frame, states=states) predicted.append(x) predicted = tf.concat(predicted, axis=1) self.assertEqual(predicted.shape, expected.shape) self.assertAllClose(predicted, expected) self.assertAllClose( predicted, [[[[[35.9640400, 35.9640400, 35.9640400]]], [[[71.9280700, 71.9280700, 71.9280700]]], [[[107.892105, 107.892105, 107.892105]]], [[[107.892105, 107.892105, 107.892105]]]]]) def test_stream_conv_block_3d_2plus1d(self): conv_block = movinet_layers.ConvBlock( filters=3, kernel_size=(3, 3, 3), strides=(1, 2, 2), causal=True, kernel_initializer='ones', use_bias=False, activation='relu', conv_type='3d_2plus1d', ) stream_conv_block = movinet_layers.StreamConvBlock( filters=3, kernel_size=(3, 3, 3), strides=(1, 2, 2), causal=True, kernel_initializer='ones', use_bias=False, activation='relu', conv_type='3d_2plus1d', ) inputs = tf.ones([1, 4, 2, 2, 3]) expected = conv_block(inputs) predicted_disabled, _ = stream_conv_block(inputs) self.assertEqual(predicted_disabled.shape, expected.shape) self.assertAllClose(predicted_disabled, expected) for num_splits in [1, 2, 4]: frames = tf.split(inputs, inputs.shape[1] // num_splits, axis=1) states = {} predicted = [] for frame in frames: x, states = stream_conv_block(frame, states=states) predicted.append(x) predicted = tf.concat(predicted, axis=1) self.assertEqual(predicted.shape, expected.shape) self.assertAllClose(predicted, expected) self.assertAllClose( predicted, [[[[[35.9640400, 35.9640400, 35.9640400]]], [[[71.9280700, 71.9280700, 71.9280700]]], [[[107.892105, 107.892105, 107.892105]]], [[[107.892105, 107.892105, 107.892105]]]]]) def test_stream_conv_block(self): conv_block = movinet_layers.ConvBlock( filters=3, kernel_size=(3, 3, 3), strides=(1, 2, 2), causal=True, kernel_initializer='ones', use_bias=False, activation='relu', ) stream_conv_block = movinet_layers.StreamConvBlock( filters=3, kernel_size=(3, 3, 3), strides=(1, 2, 2), causal=True, kernel_initializer='ones', use_bias=False, activation='relu', ) inputs = tf.ones([1, 4, 2, 2, 3]) expected = conv_block(inputs) predicted_disabled, _ = stream_conv_block(inputs) self.assertEqual(predicted_disabled.shape, expected.shape) self.assertAllClose(predicted_disabled, expected) for num_splits in [1, 2, 4]: frames = tf.split(inputs, inputs.shape[1] // num_splits, axis=1) states = {} predicted = [] for frame in frames: x, states = stream_conv_block(frame, states=states) predicted.append(x) predicted = tf.concat(predicted, axis=1) self.assertEqual(predicted.shape, expected.shape) self.assertAllClose(predicted, expected) self.assertAllClose( predicted, [[[[[11.994005, 11.994005, 11.994005]]], [[[23.988010, 23.988010, 23.988010]]], [[[35.982014, 35.982014, 35.982014]]], [[[35.982014, 35.982014, 35.982014]]]]]) def test_stream_squeeze_excitation(self): se = movinet_layers.StreamSqueezeExcitation( 3, causal=True, kernel_initializer='ones') inputs = tf.range(4, dtype=tf.float32) + 1. inputs = tf.reshape(inputs, [1, 4, 1, 1, 1]) inputs = tf.tile(inputs, [1, 1, 2, 1, 3]) expected, _ = se(inputs) for num_splits in [1, 2, 4]: frames = tf.split(inputs, inputs.shape[1] // num_splits, axis=1) states = {} predicted = [] for frame in frames: x, states = se(frame, states=states) predicted.append(x) predicted = tf.concat(predicted, axis=1) self.assertEqual(predicted.shape, expected.shape) self.assertAllClose(predicted, expected, 1e-5, 1e-5) self.assertAllClose( predicted, [[[[[0.9998109, 0.9998109, 0.9998109]], [[0.9998109, 0.9998109, 0.9998109]]], [[[1.9999969, 1.9999969, 1.9999969]], [[1.9999969, 1.9999969, 1.9999969]]], [[[3., 3., 3.]], [[3., 3., 3.]]], [[[4., 4., 4.]], [[4., 4., 4.]]]]], 1e-5, 1e-5) def test_stream_squeeze_excitation_2plus3d(self): se = movinet_layers.StreamSqueezeExcitation( 3, se_type='2plus3d', causal=True, activation='hard_swish', gating_activation='hard_sigmoid', kernel_initializer='ones') inputs = tf.range(4, dtype=tf.float32) + 1. inputs = tf.reshape(inputs, [1, 4, 1, 1, 1]) inputs = tf.tile(inputs, [1, 1, 2, 1, 3]) expected, _ = se(inputs) for num_splits in [1, 2, 4]: frames = tf.split(inputs, inputs.shape[1] // num_splits, axis=1) states = {} predicted = [] for frame in frames: x, states = se(frame, states=states) predicted.append(x) predicted = tf.concat(predicted, axis=1) self.assertEqual(predicted.shape, expected.shape) self.assertAllClose(predicted, expected, atol=1e-4) self.assertAllClose( predicted, [[[[[1., 1., 1.]], [[1., 1., 1.]]], [[[2., 2., 2.]], [[2., 2., 2.]]], [[[3., 3., 3.]], [[3., 3., 3.]]], [[[4., 4., 4.]], [[4., 4., 4.]]]]], atol=1e-4) def test_stream_movinet_block(self): block = movinet_layers.MovinetBlock( out_filters=3, expand_filters=6, kernel_size=(3, 3, 3), strides=(1, 2, 2), causal=True, ) inputs = tf.range(4, dtype=tf.float32) + 1. inputs = tf.reshape(inputs, [1, 4, 1, 1, 1]) inputs = tf.tile(inputs, [1, 1, 2, 1, 3]) expected, _ = block(inputs) for num_splits in [1, 2, 4]: frames = tf.split(inputs, inputs.shape[1] // num_splits, axis=1) states = {} predicted = [] for frame in frames: x, states = block(frame, states=states) predicted.append(x) predicted = tf.concat(predicted, axis=1) self.assertEqual(predicted.shape, expected.shape) self.assertAllClose(predicted, expected) def test_stream_movinet_block_none_se(self): block = movinet_layers.MovinetBlock( out_filters=3, expand_filters=6, kernel_size=(3, 3, 3), strides=(1, 2, 2), causal=True, se_type='none', state_prefix='test', ) inputs = tf.range(4, dtype=tf.float32) + 1. inputs = tf.reshape(inputs, [1, 4, 1, 1, 1]) inputs = tf.tile(inputs, [1, 1, 2, 1, 3]) expected, expected_states = block(inputs) for num_splits in [1, 2, 4]: frames = tf.split(inputs, inputs.shape[1] // num_splits, axis=1) states = {} predicted = [] for frame in frames: x, states = block(frame, states=states) predicted.append(x) predicted = tf.concat(predicted, axis=1) self.assertEqual(predicted.shape, expected.shape) self.assertAllClose(predicted, expected) self.assertAllEqual(list(expected_states.keys()), ['test_stream_buffer']) def test_stream_classifier_head(self): head = movinet_layers.Head(project_filters=5) classifier_head = movinet_layers.ClassifierHead( head_filters=10, num_classes=4) inputs = tf.range(4, dtype=tf.float32) + 1. inputs = tf.reshape(inputs, [1, 4, 1, 1, 1]) inputs = tf.tile(inputs, [1, 1, 2, 1, 3]) x, _ = head(inputs) expected = classifier_head(x) for num_splits in [1, 2, 4]: frames = tf.split(inputs, inputs.shape[1] // num_splits, axis=1) states = {} for frame in frames: x, states = head(frame, states=states) predicted = classifier_head(x) self.assertEqual(predicted.shape, expected.shape) self.assertAllClose(predicted, expected) if __name__ == '__main__': tf.test.main()