Spaces:
Runtime error
Runtime error
# Copyright 2023 The TensorFlow Authors. All Rights Reserved. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
"""Tests for movinet.py.""" | |
from absl.testing import parameterized | |
import tensorflow as tf, tf_keras | |
from official.projects.movinet.modeling import movinet | |
class MoViNetTest(parameterized.TestCase, tf.test.TestCase): | |
def test_network_creation(self): | |
"""Test creation of MoViNet family models.""" | |
tf_keras.backend.set_image_data_format('channels_last') | |
network = movinet.Movinet( | |
model_id='a0', | |
causal=True, | |
) | |
inputs = tf_keras.Input(shape=(8, 128, 128, 3), batch_size=1) | |
endpoints, states = network(inputs) | |
self.assertAllEqual(endpoints['stem'].shape, [1, 8, 64, 64, 8]) | |
self.assertAllEqual(endpoints['block0_layer0'].shape, [1, 8, 32, 32, 8]) | |
self.assertAllEqual(endpoints['block1_layer0'].shape, [1, 8, 16, 16, 32]) | |
self.assertAllEqual(endpoints['block2_layer0'].shape, [1, 8, 8, 8, 56]) | |
self.assertAllEqual(endpoints['block3_layer0'].shape, [1, 8, 8, 8, 56]) | |
self.assertAllEqual(endpoints['block4_layer0'].shape, [1, 8, 4, 4, 104]) | |
self.assertAllEqual(endpoints['head'].shape, [1, 1, 1, 1, 480]) | |
self.assertNotEmpty(states) | |
def test_network_with_states(self): | |
"""Test creation of MoViNet family models with states.""" | |
tf_keras.backend.set_image_data_format('channels_last') | |
backbone = movinet.Movinet( | |
model_id='a0', | |
causal=True, | |
use_external_states=True, | |
) | |
inputs = tf.ones([1, 8, 128, 128, 3]) | |
init_states = backbone.init_states(tf.shape(inputs)) | |
endpoints, new_states = backbone({**init_states, 'image': inputs}) | |
self.assertAllEqual(endpoints['stem'].shape, [1, 8, 64, 64, 8]) | |
self.assertAllEqual(endpoints['block0_layer0'].shape, [1, 8, 32, 32, 8]) | |
self.assertAllEqual(endpoints['block1_layer0'].shape, [1, 8, 16, 16, 32]) | |
self.assertAllEqual(endpoints['block2_layer0'].shape, [1, 8, 8, 8, 56]) | |
self.assertAllEqual(endpoints['block3_layer0'].shape, [1, 8, 8, 8, 56]) | |
self.assertAllEqual(endpoints['block4_layer0'].shape, [1, 8, 4, 4, 104]) | |
self.assertAllEqual(endpoints['head'].shape, [1, 1, 1, 1, 480]) | |
self.assertNotEmpty(init_states) | |
self.assertNotEmpty(new_states) | |
def test_movinet_stream(self): | |
"""Test if the backbone can be run in streaming mode.""" | |
tf_keras.backend.set_image_data_format('channels_last') | |
backbone = movinet.Movinet( | |
model_id='a0', | |
causal=True, | |
use_external_states=True, | |
) | |
inputs = tf.ones([1, 5, 128, 128, 3]) | |
init_states = backbone.init_states(tf.shape(inputs)) | |
expected_endpoints, _ = backbone({**init_states, 'image': inputs}) | |
frames = tf.split(inputs, inputs.shape[1], axis=1) | |
states = init_states | |
for frame in frames: | |
output, states = backbone({**states, 'image': frame}) | |
predicted_endpoints = output | |
predicted = predicted_endpoints['head'] | |
# The expected final output is simply the mean across frames | |
expected = expected_endpoints['head'] | |
expected = tf.reduce_mean(expected, 1, keepdims=True) | |
self.assertEqual(predicted.shape, expected.shape) | |
self.assertAllClose(predicted, expected, 1e-5, 1e-5) | |
def test_movinet_stream_nse(self): | |
"""Test if the backbone can be run in streaming mode w/o SE layer.""" | |
tf_keras.backend.set_image_data_format('channels_last') | |
backbone = movinet.Movinet( | |
model_id='a0', | |
causal=True, | |
use_external_states=True, | |
se_type='none', | |
) | |
inputs = tf.ones([1, 5, 128, 128, 3]) | |
init_states = backbone.init_states(tf.shape(inputs)) | |
expected_endpoints, _ = backbone({**init_states, 'image': inputs}) | |
frames = tf.split(inputs, inputs.shape[1], axis=1) | |
states = init_states | |
for frame in frames: | |
output, states = backbone({**states, 'image': frame}) | |
predicted_endpoints = output | |
predicted = predicted_endpoints['head'] | |
# The expected final output is simply the mean across frames | |
expected = expected_endpoints['head'] | |
expected = tf.reduce_mean(expected, 1, keepdims=True) | |
self.assertEqual(predicted.shape, expected.shape) | |
self.assertAllClose(predicted, expected, 1e-5, 1e-5) | |
# Check contents in the states dictionary. | |
state_keys = list(init_states.keys()) | |
self.assertIn('state_head_pool_buffer', state_keys) | |
self.assertIn('state_head_pool_frame_count', state_keys) | |
state_keys.remove('state_head_pool_buffer') | |
state_keys.remove('state_head_pool_frame_count') | |
# From now on, there are only 'stream_buffer' for the convolutions. | |
for state_key in state_keys: | |
self.assertIn( | |
'stream_buffer', state_key, | |
msg=f'Expecting stream_buffer only, found {state_key}') | |
def test_movinet_2plus1d_stream(self): | |
tf_keras.backend.set_image_data_format('channels_last') | |
backbone = movinet.Movinet( | |
model_id='a0', | |
causal=True, | |
conv_type='2plus1d', | |
use_external_states=True, | |
) | |
inputs = tf.ones([1, 5, 128, 128, 3]) | |
init_states = backbone.init_states(tf.shape(inputs)) | |
expected_endpoints, _ = backbone({**init_states, 'image': inputs}) | |
frames = tf.split(inputs, inputs.shape[1], axis=1) | |
states = init_states | |
for frame in frames: | |
output, states = backbone({**states, 'image': frame}) | |
predicted_endpoints = output | |
predicted = predicted_endpoints['head'] | |
# The expected final output is simply the mean across frames | |
expected = expected_endpoints['head'] | |
expected = tf.reduce_mean(expected, 1, keepdims=True) | |
self.assertEqual(predicted.shape, expected.shape) | |
self.assertAllClose(predicted, expected, 1e-5, 1e-5) | |
def test_movinet_3d_2plus1d_stream(self): | |
tf_keras.backend.set_image_data_format('channels_last') | |
backbone = movinet.Movinet( | |
model_id='a0', | |
causal=True, | |
conv_type='3d_2plus1d', | |
use_external_states=True, | |
) | |
inputs = tf.ones([1, 5, 128, 128, 3]) | |
init_states = backbone.init_states(tf.shape(inputs)) | |
expected_endpoints, _ = backbone({**init_states, 'image': inputs}) | |
frames = tf.split(inputs, inputs.shape[1], axis=1) | |
states = init_states | |
for frame in frames: | |
output, states = backbone({**states, 'image': frame}) | |
predicted_endpoints = output | |
predicted = predicted_endpoints['head'] | |
# The expected final output is simply the mean across frames | |
expected = expected_endpoints['head'] | |
expected = tf.reduce_mean(expected, 1, keepdims=True) | |
self.assertEqual(predicted.shape, expected.shape) | |
self.assertAllClose(predicted, expected, 1e-5, 1e-5) | |
def test_serialize_deserialize(self): | |
# Create a network object that sets all of its config options. | |
kwargs = dict( | |
model_id='a0', | |
causal=True, | |
use_positional_encoding=True, | |
use_external_states=True, | |
) | |
network = movinet.Movinet(**kwargs) | |
# Create another network object from the first object's config. | |
new_network = movinet.Movinet.from_config(network.get_config()) | |
# Validate that the config can be forced to JSON. | |
_ = new_network.to_json() | |
# If the serialization was successful, the new config should match the old. | |
self.assertAllEqual(network.get_config(), new_network.get_config()) | |
if __name__ == '__main__': | |
tf.test.main() | |