Spaces:
Runtime error
Runtime error
# Copyright 2023 The TensorFlow Authors. All Rights Reserved. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
"""Tests for movinet_layers.py.""" | |
from absl.testing import parameterized | |
import tensorflow as tf, tf_keras | |
from official.projects.movinet.modeling import movinet_layers | |
from official.vision.modeling.layers import nn_layers | |
class MovinetLayersTest(parameterized.TestCase, tf.test.TestCase): | |
def test_squeeze3d(self): | |
squeeze = movinet_layers.Squeeze3D() | |
inputs = tf.ones([5, 1, 1, 1, 3]) | |
predicted = squeeze(inputs) | |
expected = tf.ones([5, 3]) | |
self.assertEqual(predicted.shape, expected.shape) | |
self.assertAllEqual(predicted, expected) | |
def test_mobile_conv2d(self): | |
conv2d = movinet_layers.MobileConv2D( | |
filters=3, | |
kernel_size=(3, 3), | |
strides=(1, 1), | |
padding='same', | |
kernel_initializer='ones', | |
use_bias=False, | |
use_depthwise=False, | |
use_temporal=False, | |
use_buffered_input=True, | |
) | |
inputs = tf.ones([1, 2, 2, 2, 3]) | |
predicted = conv2d(inputs) | |
expected = tf.constant( | |
[[[[[12., 12., 12.], | |
[12., 12., 12.]], | |
[[12., 12., 12.], | |
[12., 12., 12.]]], | |
[[[12., 12., 12.], | |
[12., 12., 12.]], | |
[[12., 12., 12.], | |
[12., 12., 12.]]]]]) | |
self.assertEqual(predicted.shape, expected.shape) | |
self.assertAllClose(predicted, expected) | |
def test_mobile_conv2d_bn(self): | |
batch_norm_op = tf_keras.layers.BatchNormalization( | |
momentum=0.9, | |
epsilon=1., | |
name='bn') | |
conv2d = movinet_layers.MobileConv2D( | |
filters=3, | |
kernel_size=(3, 3), | |
strides=(1, 1), | |
padding='same', | |
kernel_initializer='ones', | |
use_bias=False, | |
use_depthwise=False, | |
use_temporal=False, | |
use_buffered_input=True, | |
batch_norm_op=batch_norm_op, | |
) | |
inputs = tf.ones([1, 2, 2, 2, 3]) | |
predicted = conv2d(inputs) | |
expected = tf.constant( | |
[[[[[8.48528, 8.48528, 8.48528], | |
[8.48528, 8.48528, 8.48528]], | |
[[8.48528, 8.48528, 8.48528], | |
[8.48528, 8.48528, 8.48528]]], | |
[[[8.48528, 8.48528, 8.48528], | |
[8.48528, 8.48528, 8.48528]], | |
[[8.48528, 8.48528, 8.48528], | |
[8.48528, 8.48528, 8.48528]]]]]) | |
self.assertEqual(predicted.shape, expected.shape) | |
self.assertAllClose(predicted, expected) | |
def test_mobile_conv2d_activation(self): | |
conv2d = movinet_layers.MobileConv2D( | |
filters=3, | |
kernel_size=(3, 3), | |
strides=(1, 1), | |
padding='same', | |
kernel_initializer='ones', | |
use_bias=False, | |
use_depthwise=False, | |
use_temporal=False, | |
use_buffered_input=True, | |
activation_op=tf.nn.relu6, | |
) | |
inputs = tf.ones([1, 2, 2, 2, 3]) | |
predicted = conv2d(inputs) | |
expected = tf.constant( | |
[[[[[6., 6., 6.], | |
[6., 6., 6.]], | |
[[6., 6., 6.], | |
[6., 6., 6.]]], | |
[[[6., 6., 6.], | |
[6., 6., 6.]], | |
[[6., 6., 6.], | |
[6., 6., 6.]]]]]) | |
self.assertEqual(predicted.shape, expected.shape) | |
self.assertAllClose(predicted, expected) | |
def test_mobile_conv2d_temporal(self): | |
conv2d = movinet_layers.MobileConv2D( | |
filters=3, | |
kernel_size=(3, 1), | |
strides=(1, 1), | |
padding='causal', | |
kernel_initializer='ones', | |
use_bias=False, | |
use_depthwise=True, | |
use_temporal=True, | |
use_buffered_input=True, | |
) | |
inputs = tf.ones([1, 2, 2, 1, 3]) | |
paddings = [[0, 0], [2, 0], [0, 0], [0, 0], [0, 0]] | |
padded_inputs = tf.pad(inputs, paddings) | |
predicted = conv2d(padded_inputs) | |
expected = tf.constant( | |
[[[[[1., 1., 1.]], | |
[[1., 1., 1.]]], | |
[[[2., 2., 2.]], | |
[[2., 2., 2.]]]]]) | |
self.assertEqual(predicted.shape, expected.shape) | |
self.assertAllClose(predicted, expected) | |
def test_stream_buffer(self): | |
conv3d_stream = nn_layers.Conv3D( | |
filters=3, | |
kernel_size=(3, 3, 3), | |
strides=(1, 2, 2), | |
padding='causal', | |
kernel_initializer='ones', | |
use_bias=False, | |
use_buffered_input=True, | |
) | |
buffer = movinet_layers.StreamBuffer(buffer_size=2) | |
conv3d = nn_layers.Conv3D( | |
filters=3, | |
kernel_size=(3, 3, 3), | |
strides=(1, 2, 2), | |
padding='causal', | |
kernel_initializer='ones', | |
use_bias=False, | |
use_buffered_input=False, | |
) | |
inputs = tf.ones([1, 4, 2, 2, 3]) | |
expected = conv3d(inputs) | |
for num_splits in [1, 2, 4]: | |
frames = tf.split(inputs, inputs.shape[1] // num_splits, axis=1) | |
states = {} | |
predicted = [] | |
for frame in frames: | |
x, states = buffer(frame, states=states) | |
x = conv3d_stream(x) | |
predicted.append(x) | |
predicted = tf.concat(predicted, axis=1) | |
self.assertEqual(predicted.shape, expected.shape) | |
self.assertAllClose(predicted, expected) | |
self.assertAllClose( | |
predicted, | |
[[[[[12., 12., 12.]]], | |
[[[24., 24., 24.]]], | |
[[[36., 36., 36.]]], | |
[[[36., 36., 36.]]]]]) | |
def test_stream_conv_block_2plus1d(self): | |
conv_block = movinet_layers.ConvBlock( | |
filters=3, | |
kernel_size=(3, 3, 3), | |
strides=(1, 2, 2), | |
causal=True, | |
kernel_initializer='ones', | |
use_bias=False, | |
activation='relu', | |
conv_type='2plus1d', | |
) | |
stream_conv_block = movinet_layers.StreamConvBlock( | |
filters=3, | |
kernel_size=(3, 3, 3), | |
strides=(1, 2, 2), | |
causal=True, | |
kernel_initializer='ones', | |
use_bias=False, | |
activation='relu', | |
conv_type='2plus1d', | |
) | |
inputs = tf.ones([1, 4, 2, 2, 3]) | |
expected = conv_block(inputs) | |
predicted_disabled, _ = stream_conv_block(inputs) | |
self.assertEqual(predicted_disabled.shape, expected.shape) | |
self.assertAllClose(predicted_disabled, expected) | |
for num_splits in [1, 2, 4]: | |
frames = tf.split(inputs, inputs.shape[1] // num_splits, axis=1) | |
states = {} | |
predicted = [] | |
for frame in frames: | |
x, states = stream_conv_block(frame, states=states) | |
predicted.append(x) | |
predicted = tf.concat(predicted, axis=1) | |
self.assertEqual(predicted.shape, expected.shape) | |
self.assertAllClose(predicted, expected) | |
self.assertAllClose( | |
predicted, | |
[[[[[35.9640400, 35.9640400, 35.9640400]]], | |
[[[71.9280700, 71.9280700, 71.9280700]]], | |
[[[107.892105, 107.892105, 107.892105]]], | |
[[[107.892105, 107.892105, 107.892105]]]]]) | |
def test_stream_conv_block_3d_2plus1d(self): | |
conv_block = movinet_layers.ConvBlock( | |
filters=3, | |
kernel_size=(3, 3, 3), | |
strides=(1, 2, 2), | |
causal=True, | |
kernel_initializer='ones', | |
use_bias=False, | |
activation='relu', | |
conv_type='3d_2plus1d', | |
) | |
stream_conv_block = movinet_layers.StreamConvBlock( | |
filters=3, | |
kernel_size=(3, 3, 3), | |
strides=(1, 2, 2), | |
causal=True, | |
kernel_initializer='ones', | |
use_bias=False, | |
activation='relu', | |
conv_type='3d_2plus1d', | |
) | |
inputs = tf.ones([1, 4, 2, 2, 3]) | |
expected = conv_block(inputs) | |
predicted_disabled, _ = stream_conv_block(inputs) | |
self.assertEqual(predicted_disabled.shape, expected.shape) | |
self.assertAllClose(predicted_disabled, expected) | |
for num_splits in [1, 2, 4]: | |
frames = tf.split(inputs, inputs.shape[1] // num_splits, axis=1) | |
states = {} | |
predicted = [] | |
for frame in frames: | |
x, states = stream_conv_block(frame, states=states) | |
predicted.append(x) | |
predicted = tf.concat(predicted, axis=1) | |
self.assertEqual(predicted.shape, expected.shape) | |
self.assertAllClose(predicted, expected) | |
self.assertAllClose( | |
predicted, | |
[[[[[35.9640400, 35.9640400, 35.9640400]]], | |
[[[71.9280700, 71.9280700, 71.9280700]]], | |
[[[107.892105, 107.892105, 107.892105]]], | |
[[[107.892105, 107.892105, 107.892105]]]]]) | |
def test_stream_conv_block(self): | |
conv_block = movinet_layers.ConvBlock( | |
filters=3, | |
kernel_size=(3, 3, 3), | |
strides=(1, 2, 2), | |
causal=True, | |
kernel_initializer='ones', | |
use_bias=False, | |
activation='relu', | |
) | |
stream_conv_block = movinet_layers.StreamConvBlock( | |
filters=3, | |
kernel_size=(3, 3, 3), | |
strides=(1, 2, 2), | |
causal=True, | |
kernel_initializer='ones', | |
use_bias=False, | |
activation='relu', | |
) | |
inputs = tf.ones([1, 4, 2, 2, 3]) | |
expected = conv_block(inputs) | |
predicted_disabled, _ = stream_conv_block(inputs) | |
self.assertEqual(predicted_disabled.shape, expected.shape) | |
self.assertAllClose(predicted_disabled, expected) | |
for num_splits in [1, 2, 4]: | |
frames = tf.split(inputs, inputs.shape[1] // num_splits, axis=1) | |
states = {} | |
predicted = [] | |
for frame in frames: | |
x, states = stream_conv_block(frame, states=states) | |
predicted.append(x) | |
predicted = tf.concat(predicted, axis=1) | |
self.assertEqual(predicted.shape, expected.shape) | |
self.assertAllClose(predicted, expected) | |
self.assertAllClose( | |
predicted, | |
[[[[[11.994005, 11.994005, 11.994005]]], | |
[[[23.988010, 23.988010, 23.988010]]], | |
[[[35.982014, 35.982014, 35.982014]]], | |
[[[35.982014, 35.982014, 35.982014]]]]]) | |
def test_stream_squeeze_excitation(self): | |
se = movinet_layers.StreamSqueezeExcitation( | |
3, causal=True, kernel_initializer='ones') | |
inputs = tf.range(4, dtype=tf.float32) + 1. | |
inputs = tf.reshape(inputs, [1, 4, 1, 1, 1]) | |
inputs = tf.tile(inputs, [1, 1, 2, 1, 3]) | |
expected, _ = se(inputs) | |
for num_splits in [1, 2, 4]: | |
frames = tf.split(inputs, inputs.shape[1] // num_splits, axis=1) | |
states = {} | |
predicted = [] | |
for frame in frames: | |
x, states = se(frame, states=states) | |
predicted.append(x) | |
predicted = tf.concat(predicted, axis=1) | |
self.assertEqual(predicted.shape, expected.shape) | |
self.assertAllClose(predicted, expected, 1e-5, 1e-5) | |
self.assertAllClose( | |
predicted, | |
[[[[[0.9998109, 0.9998109, 0.9998109]], | |
[[0.9998109, 0.9998109, 0.9998109]]], | |
[[[1.9999969, 1.9999969, 1.9999969]], | |
[[1.9999969, 1.9999969, 1.9999969]]], | |
[[[3., 3., 3.]], | |
[[3., 3., 3.]]], | |
[[[4., 4., 4.]], | |
[[4., 4., 4.]]]]], | |
1e-5, 1e-5) | |
def test_stream_squeeze_excitation_2plus3d(self): | |
se = movinet_layers.StreamSqueezeExcitation( | |
3, | |
se_type='2plus3d', | |
causal=True, | |
activation='hard_swish', | |
gating_activation='hard_sigmoid', | |
kernel_initializer='ones') | |
inputs = tf.range(4, dtype=tf.float32) + 1. | |
inputs = tf.reshape(inputs, [1, 4, 1, 1, 1]) | |
inputs = tf.tile(inputs, [1, 1, 2, 1, 3]) | |
expected, _ = se(inputs) | |
for num_splits in [1, 2, 4]: | |
frames = tf.split(inputs, inputs.shape[1] // num_splits, axis=1) | |
states = {} | |
predicted = [] | |
for frame in frames: | |
x, states = se(frame, states=states) | |
predicted.append(x) | |
predicted = tf.concat(predicted, axis=1) | |
self.assertEqual(predicted.shape, expected.shape) | |
self.assertAllClose(predicted, expected, atol=1e-4) | |
self.assertAllClose( | |
predicted, | |
[[[[[1., 1., 1.]], | |
[[1., 1., 1.]]], | |
[[[2., 2., 2.]], | |
[[2., 2., 2.]]], | |
[[[3., 3., 3.]], | |
[[3., 3., 3.]]], | |
[[[4., 4., 4.]], | |
[[4., 4., 4.]]]]], | |
atol=1e-4) | |
def test_stream_movinet_block(self): | |
block = movinet_layers.MovinetBlock( | |
out_filters=3, | |
expand_filters=6, | |
kernel_size=(3, 3, 3), | |
strides=(1, 2, 2), | |
causal=True, | |
) | |
inputs = tf.range(4, dtype=tf.float32) + 1. | |
inputs = tf.reshape(inputs, [1, 4, 1, 1, 1]) | |
inputs = tf.tile(inputs, [1, 1, 2, 1, 3]) | |
expected, _ = block(inputs) | |
for num_splits in [1, 2, 4]: | |
frames = tf.split(inputs, inputs.shape[1] // num_splits, axis=1) | |
states = {} | |
predicted = [] | |
for frame in frames: | |
x, states = block(frame, states=states) | |
predicted.append(x) | |
predicted = tf.concat(predicted, axis=1) | |
self.assertEqual(predicted.shape, expected.shape) | |
self.assertAllClose(predicted, expected) | |
def test_stream_movinet_block_none_se(self): | |
block = movinet_layers.MovinetBlock( | |
out_filters=3, | |
expand_filters=6, | |
kernel_size=(3, 3, 3), | |
strides=(1, 2, 2), | |
causal=True, | |
se_type='none', | |
state_prefix='test', | |
) | |
inputs = tf.range(4, dtype=tf.float32) + 1. | |
inputs = tf.reshape(inputs, [1, 4, 1, 1, 1]) | |
inputs = tf.tile(inputs, [1, 1, 2, 1, 3]) | |
expected, expected_states = block(inputs) | |
for num_splits in [1, 2, 4]: | |
frames = tf.split(inputs, inputs.shape[1] // num_splits, axis=1) | |
states = {} | |
predicted = [] | |
for frame in frames: | |
x, states = block(frame, states=states) | |
predicted.append(x) | |
predicted = tf.concat(predicted, axis=1) | |
self.assertEqual(predicted.shape, expected.shape) | |
self.assertAllClose(predicted, expected) | |
self.assertAllEqual(list(expected_states.keys()), ['test_stream_buffer']) | |
def test_stream_classifier_head(self): | |
head = movinet_layers.Head(project_filters=5) | |
classifier_head = movinet_layers.ClassifierHead( | |
head_filters=10, num_classes=4) | |
inputs = tf.range(4, dtype=tf.float32) + 1. | |
inputs = tf.reshape(inputs, [1, 4, 1, 1, 1]) | |
inputs = tf.tile(inputs, [1, 1, 2, 1, 3]) | |
x, _ = head(inputs) | |
expected = classifier_head(x) | |
for num_splits in [1, 2, 4]: | |
frames = tf.split(inputs, inputs.shape[1] // num_splits, axis=1) | |
states = {} | |
for frame in frames: | |
x, states = head(frame, states=states) | |
predicted = classifier_head(x) | |
self.assertEqual(predicted.shape, expected.shape) | |
self.assertAllClose(predicted, expected) | |
if __name__ == '__main__': | |
tf.test.main() | |