Spaces:
Runtime error
Runtime error
# Copyright 2023 The TensorFlow Authors. All Rights Reserved. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
"""Tests for nn_blocks.""" | |
from typing import Any, Iterable, Tuple | |
# Import libraries | |
from absl.testing import parameterized | |
import numpy as np | |
import tensorflow as tf, tf_keras | |
from tensorflow.python.distribute import combinations | |
from tensorflow.python.distribute import strategy_combinations | |
from official.vision.modeling.layers import nn_blocks | |
from official.vision.modeling.layers import nn_layers | |
def distribution_strategy_combinations() -> Iterable[Tuple[Any, ...]]: | |
"""Returns the combinations of end-to-end tests to run.""" | |
return combinations.combine( | |
distribution=[ | |
strategy_combinations.default_strategy, | |
strategy_combinations.cloud_tpu_strategy, | |
strategy_combinations.one_device_strategy_gpu, | |
],) | |
class NNBlocksTest(parameterized.TestCase, tf.test.TestCase): | |
def test_residual_block_creation(self, block_fn, strides, use_projection, | |
stochastic_depth_drop_rate, se_ratio): | |
input_size = 128 | |
filter_size = 256 | |
inputs = tf_keras.Input( | |
shape=(input_size, input_size, filter_size), batch_size=1) | |
block = block_fn( | |
filter_size, | |
strides, | |
use_projection=use_projection, | |
se_ratio=se_ratio, | |
stochastic_depth_drop_rate=stochastic_depth_drop_rate, | |
) | |
features = block(inputs) | |
self.assertAllEqual( | |
[1, input_size // strides, input_size // strides, filter_size], | |
features.shape.as_list()) | |
def test_layerscale_call(self): | |
# Set up test inputs | |
input_shape = (2, 3, 4) | |
init_values = 1e-4 | |
inputs = tf.ones(input_shape, dtype=tf.float32) | |
# Instantiate LayerScale object | |
layer_scale = nn_blocks.LayerScale(init_values) | |
# Call LayerScale object on test inputs | |
output = layer_scale(inputs) | |
# Check output shape | |
expected_output_shape = input_shape | |
self.assertAllEqual(output.shape, expected_output_shape) | |
# Check that output values are correct | |
expected_output_values = init_values * np.ones(input_shape) | |
self.assertAllClose( | |
output.numpy(), expected_output_values, rtol=1e-5, atol=1e-5) | |
def test_layerscale_training(self): | |
# Verify that gamma values have changed from their initial values in one | |
# step forward pass. | |
# Set up test inputs | |
input_shape = (1, 3, 4) | |
init_values = 1e-4 | |
inputs = tf.ones(input_shape, dtype=tf.float32) | |
targets = tf.ones(input_shape, dtype=tf.float32) | |
# Instantiate LayerScale object | |
layer_scale = nn_blocks.LayerScale(init_values) | |
# Define optimizer and loss function | |
optimizer = tf_keras.optimizers.Adam() | |
loss_fn = tf_keras.losses.MeanSquaredError() | |
# Train the model for one step | |
with tf.GradientTape() as tape: | |
predictions = layer_scale(inputs) | |
loss = loss_fn(targets, predictions) | |
grads = tape.gradient(loss, layer_scale.trainable_variables) | |
optimizer.apply_gradients(zip(grads, layer_scale.trainable_variables)) | |
# Check that gamma values have changed | |
updated_gamma = layer_scale.gamma.numpy()[0, 0, 0] | |
self.assertNotEqual(updated_gamma, init_values) | |
def test_bottleneck_block_creation(self, block_fn, strides, use_projection, | |
stochastic_depth_drop_rate, se_ratio): | |
input_size = 128 | |
filter_size = 256 | |
inputs = tf_keras.Input( | |
shape=(input_size, input_size, filter_size * 4), batch_size=1) | |
block = block_fn( | |
filter_size, | |
strides, | |
use_projection=use_projection, | |
se_ratio=se_ratio, | |
stochastic_depth_drop_rate=stochastic_depth_drop_rate) | |
features = block(inputs) | |
self.assertAllEqual( | |
[1, input_size // strides, input_size // strides, filter_size * 4], | |
features.shape.as_list()) | |
def test_invertedbottleneck_block_creation(self, block_fn, expand_ratio, | |
strides, se_ratio, | |
stochastic_depth_drop_rate): | |
input_size = 128 | |
in_filters = 24 | |
out_filters = 40 | |
inputs = tf_keras.Input( | |
shape=(input_size, input_size, in_filters), batch_size=1) | |
block = block_fn( | |
in_filters=in_filters, | |
out_filters=out_filters, | |
expand_ratio=expand_ratio, | |
strides=strides, | |
se_ratio=se_ratio, | |
stochastic_depth_drop_rate=stochastic_depth_drop_rate) | |
features = block(inputs) | |
self.assertAllEqual( | |
[1, input_size // strides, input_size // strides, out_filters], | |
features.shape.as_list()) | |
def test_tucker_conv_block(self, block_fn, strides, input_compression_ratio, | |
output_compression_ratio): | |
input_size = 128 | |
in_filters = 24 | |
out_filters = 24 | |
inputs = tf_keras.Input( | |
shape=(input_size, input_size, in_filters), batch_size=1) | |
block = block_fn( | |
in_filters=in_filters, | |
out_filters=out_filters, | |
input_compression_ratio=input_compression_ratio, | |
output_compression_ratio=output_compression_ratio, | |
strides=strides) | |
features = block(inputs) | |
self.assertAllEqual( | |
[1, input_size // strides, input_size // strides, out_filters], | |
features.shape.as_list()) | |
class ResidualInnerTest(parameterized.TestCase, tf.test.TestCase): | |
def test_shape(self, distribution): | |
bsz, h, w, c = 8, 32, 32, 32 | |
filters = 64 | |
strides = 2 | |
input_tensor = tf.random.uniform(shape=[bsz, h, w, c]) | |
with distribution.scope(): | |
test_layer = nn_blocks.ResidualInner(filters, strides) | |
output = test_layer(input_tensor) | |
expected_output_shape = [bsz, h // strides, w // strides, filters] | |
self.assertEqual(expected_output_shape, output.shape.as_list()) | |
class BottleneckResidualInnerTest(parameterized.TestCase, tf.test.TestCase): | |
def test_shape(self, distribution): | |
bsz, h, w, c = 8, 32, 32, 32 | |
filters = 64 | |
strides = 2 | |
input_tensor = tf.random.uniform(shape=[bsz, h, w, c]) | |
with distribution.scope(): | |
test_layer = nn_blocks.BottleneckResidualInner(filters, strides) | |
output = test_layer(input_tensor) | |
expected_output_shape = [bsz, h // strides, w // strides, filters * 4] | |
self.assertEqual(expected_output_shape, output.shape.as_list()) | |
class DepthwiseSeparableConvBlockTest(parameterized.TestCase, tf.test.TestCase): | |
def test_shape(self, distribution): | |
batch_size, height, width, num_channels = 8, 32, 32, 32 | |
num_filters = 64 | |
strides = 2 | |
input_tensor = tf.random.normal( | |
shape=[batch_size, height, width, num_channels]) | |
with distribution.scope(): | |
block = nn_blocks.DepthwiseSeparableConvBlock( | |
num_filters, strides=strides) | |
config_dict = block.get_config() | |
recreate_block = nn_blocks.DepthwiseSeparableConvBlock(**config_dict) | |
output_tensor = block(input_tensor) | |
expected_output_shape = [ | |
batch_size, height // strides, width // strides, num_filters | |
] | |
self.assertEqual(output_tensor.shape.as_list(), expected_output_shape) | |
output_tensor = recreate_block(input_tensor) | |
self.assertEqual(output_tensor.shape.as_list(), expected_output_shape) | |
class ReversibleLayerTest(parameterized.TestCase, tf.test.TestCase): | |
def test_downsampling_non_reversible_step(self, distribution): | |
bsz, h, w, c = 8, 32, 32, 32 | |
filters = 64 | |
strides = 2 | |
input_tensor = tf.random.uniform(shape=[bsz, h, w, c]) | |
with distribution.scope(): | |
f = nn_blocks.ResidualInner( | |
filters=filters // 2, strides=strides, batch_norm_first=True) | |
g = nn_blocks.ResidualInner( | |
filters=filters // 2, strides=1, batch_norm_first=True) | |
test_layer = nn_blocks.ReversibleLayer(f, g) | |
test_layer.build(input_tensor.shape) | |
optimizer = tf_keras.optimizers.SGD(learning_rate=0.01) | |
def step_fn(): | |
with tf.GradientTape() as tape: | |
output = test_layer(input_tensor, training=True) | |
grads = tape.gradient(output, test_layer.trainable_variables) | |
# Test applying gradients with optimizer works | |
optimizer.apply_gradients(zip(grads, test_layer.trainable_variables)) | |
return output | |
replica_output = distribution.run(step_fn) | |
outputs = distribution.experimental_local_results(replica_output) | |
# Assert forward pass shape | |
expected_output_shape = [bsz, h // strides, w // strides, filters] | |
for output in outputs: | |
self.assertEqual(expected_output_shape, output.shape.as_list()) | |
def test_reversible_step(self, distribution): | |
# Reversible layers satisfy: (a) strides = 1 (b) in_filter = out_filter | |
bsz, h, w, c = 8, 32, 32, 32 | |
filters = c | |
strides = 1 | |
input_tensor = tf.random.uniform(shape=[bsz, h, w, c]) | |
with distribution.scope(): | |
f = nn_blocks.ResidualInner( | |
filters=filters // 2, strides=strides, batch_norm_first=False) | |
g = nn_blocks.ResidualInner( | |
filters=filters // 2, strides=1, batch_norm_first=False) | |
test_layer = nn_blocks.ReversibleLayer(f, g) | |
test_layer(input_tensor, training=False) # init weights | |
optimizer = tf_keras.optimizers.SGD(learning_rate=0.01) | |
def step_fn(): | |
with tf.GradientTape() as tape: | |
output = test_layer(input_tensor, training=True) | |
grads = tape.gradient(output, test_layer.trainable_variables) | |
# Test applying gradients with optimizer works | |
optimizer.apply_gradients(zip(grads, test_layer.trainable_variables)) | |
return output | |
def fwd(): | |
test_layer(input_tensor) | |
distribution.run(fwd) # Initialize variables | |
prev_variables = tf.identity_n(test_layer.trainable_variables) | |
replica_output = distribution.run(step_fn) | |
outputs = distribution.experimental_local_results(replica_output) | |
# Assert variables values have changed values | |
for v0, v1 in zip(prev_variables, test_layer.trainable_variables): | |
self.assertNotAllEqual(v0, v1) | |
# Assert forward pass shape | |
expected_output_shape = [bsz, h // strides, w // strides, filters] | |
for output in outputs: | |
self.assertEqual(expected_output_shape, output.shape.as_list()) | |
def test_manual_gradients_correctness(self, distribution): | |
bsz, h, w, c = 8, 32, 32, 32 | |
filters = c | |
strides = 1 | |
input_tensor = tf.random.uniform(shape=[bsz, h, w, c * 4]) # bottleneck | |
with distribution.scope(): | |
f_manual = nn_blocks.BottleneckResidualInner( | |
filters=filters // 2, strides=strides, batch_norm_first=False) | |
g_manual = nn_blocks.BottleneckResidualInner( | |
filters=filters // 2, strides=1, batch_norm_first=False) | |
manual_grad_layer = nn_blocks.ReversibleLayer(f_manual, g_manual) | |
manual_grad_layer(input_tensor, training=False) # init weights | |
f_auto = nn_blocks.BottleneckResidualInner( | |
filters=filters // 2, strides=strides, batch_norm_first=False) | |
g_auto = nn_blocks.BottleneckResidualInner( | |
filters=filters // 2, strides=1, batch_norm_first=False) | |
auto_grad_layer = nn_blocks.ReversibleLayer( | |
f_auto, g_auto, manual_grads=False) | |
auto_grad_layer(input_tensor) # init weights | |
# Clone all weights (tf_keras.layers.Layer has no .clone()) | |
auto_grad_layer._f.set_weights(manual_grad_layer._f.get_weights()) | |
auto_grad_layer._g.set_weights(manual_grad_layer._g.get_weights()) | |
def manual_fn(): | |
with tf.GradientTape() as tape: | |
output = manual_grad_layer(input_tensor, training=True) | |
grads = tape.gradient(output, manual_grad_layer.trainable_variables) | |
return grads | |
def auto_fn(): | |
with tf.GradientTape() as tape: | |
output = auto_grad_layer(input_tensor, training=True) | |
grads = tape.gradient(output, auto_grad_layer.trainable_variables) | |
return grads | |
manual_grads = distribution.run(manual_fn) | |
auto_grads = distribution.run(auto_fn) | |
# Assert gradients calculated manually are close to that from autograd | |
for manual_grad, auto_grad in zip(manual_grads, auto_grads): | |
self.assertAllClose( | |
distribution.experimental_local_results(manual_grad), | |
distribution.experimental_local_results(auto_grad), | |
atol=5e-3, | |
rtol=5e-3) | |
# Verify that BN moving mean and variance is correct. | |
for manual_var, auto_var in zip(manual_grad_layer.non_trainable_variables, | |
auto_grad_layer.non_trainable_variables): | |
self.assertAllClose(manual_var, auto_var) | |
# Test class that wraps a standard attention layer. If this layer is called | |
# at any point, the list passed to the config object will be filled with a | |
# boolean 'True'. We register this class as a Keras serializable so we can | |
# test serialization below. | |
class ValidatedAttentionLayer(nn_layers.MultiHeadAttention): | |
def __init__(self, call_list, **kwargs): | |
super(ValidatedAttentionLayer, self).__init__(**kwargs) | |
self.list = call_list | |
def call( | |
self, | |
query, | |
value, | |
attention_mask=None, | |
return_attention_scores=False, | |
): | |
self.list.append(True) | |
return super(ValidatedAttentionLayer, self).call( | |
query, | |
value, | |
attention_mask=attention_mask, | |
return_attention_scores=return_attention_scores) | |
def get_config(self): | |
config = super(ValidatedAttentionLayer, self).get_config() | |
config['call_list'] = self.list | |
return config | |
# Test class implements a simple feedforward layer. If this layer is called | |
# at any point, the list passed to the config object will be filled with a | |
# boolean 'True'. We register this class as a Keras serializable so we can | |
# test serialization below. | |
class ValidatedFeedforwardLayer(tf_keras.layers.Layer): | |
def __init__(self, call_list, activation, **kwargs): | |
super(ValidatedFeedforwardLayer, self).__init__(**kwargs) | |
self.list = call_list | |
self.activation = activation | |
def build(self, input_shape): | |
hidden_size = input_shape[-1] | |
self._feedforward_dense = tf_keras.layers.EinsumDense( | |
'...x,xy->...y', | |
output_shape=hidden_size, | |
bias_axes='y', | |
activation=self.activation, | |
name='feedforward') | |
def call(self, inputs): | |
self.list.append(True) | |
return self._feedforward_dense(inputs) | |
def get_config(self): | |
config = super(ValidatedFeedforwardLayer, self).get_config() | |
config['call_list'] = [] | |
config['activation'] = self.activation | |
return config | |
class TransformerLayerTest(tf.test.TestCase, parameterized.TestCase): | |
def tearDown(self): | |
super(TransformerLayerTest, self).tearDown() | |
tf_keras.mixed_precision.set_global_policy('float32') | |
def test_layer_creation(self, max_attention_inference_parallelism): | |
sequence_length = 21 | |
width = 80 | |
attention_layer_cfg = { | |
'num_heads': 10, | |
'key_dim': 8, | |
'call_list': [] | |
} | |
test_layer = nn_blocks.TransformerScaffold( | |
attention_cls=ValidatedAttentionLayer, | |
attention_cfg=attention_layer_cfg, | |
num_attention_heads=10, | |
inner_dim=2048, | |
inner_activation='relu', | |
max_attention_inference_parallelism=max_attention_inference_parallelism, | |
) | |
# Create a 3-dimensional input (the first dimension is implicit). | |
data_tensor = tf_keras.Input(shape=(sequence_length, width)) | |
output_tensor = test_layer(data_tensor) | |
# The default output of a transformer layer should be the same as the input. | |
self.assertEqual(data_tensor.shape.as_list(), output_tensor.shape.as_list()) | |
call_list = test_layer._attention_layer.get_config()['call_list'] | |
# If call_list[0] exists and is True, the passed layer class was | |
# instantiated from the given config properly. | |
self.assertNotEmpty(call_list) | |
self.assertTrue(call_list[0], "The passed layer class wasn't instantiated.") | |
def test_layer_creation_with_feedforward_cls(self): | |
sequence_length = 21 | |
width = 80 | |
call_list = [] | |
attention_layer_cfg = { | |
'num_heads': 10, | |
'key_dim': 8, | |
'call_list': call_list, | |
} | |
feedforward_call_list = [] | |
feedforward_layer_cfg = { | |
'activation': 'relu', | |
'call_list': feedforward_call_list, | |
} | |
test_layer = nn_blocks.TransformerScaffold( | |
attention_cls=ValidatedAttentionLayer, | |
attention_cfg=attention_layer_cfg, | |
feedforward_cls=ValidatedFeedforwardLayer, | |
feedforward_cfg=feedforward_layer_cfg, | |
num_attention_heads=10, | |
inner_dim=None, | |
inner_activation=None) | |
# Create a 3-dimensional input (the first dimension is implicit). | |
data_tensor = tf_keras.Input(shape=(sequence_length, width)) | |
output_tensor = test_layer(data_tensor) | |
# The default output of a transformer layer should be the same as the input. | |
self.assertEqual(data_tensor.shape.as_list(), output_tensor.shape.as_list()) | |
# If call_list[0] exists and is True, the passed layer class was | |
# instantiated from the given config properly. | |
self.assertNotEmpty(call_list) | |
self.assertTrue(call_list[0], "The passed layer class wasn't instantiated.") | |
self.assertNotEmpty(feedforward_call_list) | |
self.assertTrue(feedforward_call_list[0], | |
"The passed layer class wasn't instantiated.") | |
def test_layer_creation_with_mask(self): | |
sequence_length = 21 | |
width = 80 | |
call_list = [] | |
attention_layer_cfg = { | |
'num_heads': 10, | |
'key_dim': 8, | |
'call_list': call_list, | |
} | |
test_layer = nn_blocks.TransformerScaffold( | |
attention_cls=ValidatedAttentionLayer, | |
attention_cfg=attention_layer_cfg, | |
num_attention_heads=10, | |
inner_dim=2048, | |
inner_activation='relu') | |
# Create a 3-dimensional input (the first dimension is implicit). | |
data_tensor = tf_keras.Input(shape=(sequence_length, width)) | |
# Create a 2-dimensional input (the first dimension is implicit). | |
mask_tensor = tf_keras.Input(shape=(sequence_length, sequence_length)) | |
output_tensor = test_layer([data_tensor, mask_tensor]) | |
# The default output of a transformer layer should be the same as the input. | |
self.assertEqual(data_tensor.shape.as_list(), output_tensor.shape.as_list()) | |
# If call_list[0] exists and is True, the passed layer class was | |
# instantiated from the given config properly. | |
self.assertNotEmpty(call_list) | |
self.assertTrue(call_list[0], "The passed layer class wasn't instantiated.") | |
def test_layer_invocation(self, max_attention_inference_parallelism): | |
sequence_length = 21 | |
width = 80 | |
attention_layer_cfg = { | |
'num_heads': 10, | |
'key_dim': 8, | |
'call_list': [], | |
} | |
test_layer = nn_blocks.TransformerScaffold( | |
attention_cls=ValidatedAttentionLayer, | |
attention_cfg=attention_layer_cfg, | |
num_attention_heads=10, | |
inner_dim=2048, | |
inner_activation='relu', | |
max_attention_inference_parallelism=max_attention_inference_parallelism) | |
# Create a 3-dimensional input (the first dimension is implicit). | |
data_tensor = tf_keras.Input(shape=(sequence_length, width)) | |
output_tensor = test_layer(data_tensor) | |
# Create a model from the test layer. | |
model = tf_keras.Model(data_tensor, output_tensor) | |
# Invoke the model on test data. We can't validate the output data itself | |
# (the NN is too complex) but this will rule out structural runtime errors. | |
batch_size = 6 | |
input_data = 10 * np.random.random_sample( | |
(batch_size, sequence_length, width)) | |
_ = model.predict(input_data) | |
call_list = test_layer._attention_layer.get_config()['call_list'] | |
# If call_list[0] exists and is True, the passed layer class was | |
# instantiated from the given config properly. | |
self.assertNotEmpty(call_list) | |
self.assertTrue(call_list[0], "The passed layer class wasn't instantiated.") | |
def test_layer_invocation_with_feedforward_cls(self): | |
sequence_length = 21 | |
width = 80 | |
call_list = [] | |
attention_layer_cfg = { | |
'num_heads': 10, | |
'key_dim': 8, | |
'call_list': call_list, | |
} | |
feedforward_call_list = [] | |
feedforward_layer_cfg = { | |
'activation': 'relu', | |
'call_list': feedforward_call_list, | |
} | |
feedforward_layer = ValidatedFeedforwardLayer(**feedforward_layer_cfg) | |
test_layer = nn_blocks.TransformerScaffold( | |
attention_cls=ValidatedAttentionLayer, | |
attention_cfg=attention_layer_cfg, | |
feedforward_cls=feedforward_layer, | |
num_attention_heads=10, | |
inner_dim=None, | |
inner_activation=None) | |
# Create a 3-dimensional input (the first dimension is implicit). | |
data_tensor = tf_keras.Input(shape=(sequence_length, width)) | |
# Create a 2-dimensional input (the first dimension is implicit). | |
mask_tensor = tf_keras.Input(shape=(sequence_length, sequence_length)) | |
output_tensor = test_layer([data_tensor, mask_tensor]) | |
# Create a model from the test layer. | |
model = tf_keras.Model([data_tensor, mask_tensor], output_tensor) | |
# Invoke the model on test data. We can't validate the output data itself | |
# (the NN is too complex) but this will rule out structural runtime errors. | |
batch_size = 6 | |
input_data = 10 * np.random.random_sample( | |
(batch_size, sequence_length, width)) | |
# The attention mask should be of shape (batch, from_seq_len, to_seq_len), | |
# which here is (batch, sequence_length, sequence_length) | |
mask_data = np.random.randint( | |
2, size=(batch_size, sequence_length, sequence_length)) | |
_ = model.predict([input_data, mask_data]) | |
# If call_list[0] exists and is True, the passed layer class was | |
# instantiated from the given config properly. | |
self.assertNotEmpty(call_list) | |
self.assertTrue(call_list[0], "The passed layer class wasn't instantiated.") | |
self.assertNotEmpty(feedforward_call_list) | |
self.assertTrue(feedforward_call_list[0], | |
"The passed layer class wasn't instantiated.") | |
def test_layer_invocation_with_mask(self): | |
sequence_length = 21 | |
width = 80 | |
call_list = [] | |
attention_layer_cfg = { | |
'num_heads': 10, | |
'key_dim': 8, | |
'call_list': call_list, | |
} | |
test_layer = nn_blocks.TransformerScaffold( | |
attention_cls=ValidatedAttentionLayer, | |
attention_cfg=attention_layer_cfg, | |
num_attention_heads=10, | |
inner_dim=2048, | |
inner_activation='relu') | |
# Create a 3-dimensional input (the first dimension is implicit). | |
data_tensor = tf_keras.Input(shape=(sequence_length, width)) | |
# Create a 2-dimensional input (the first dimension is implicit). | |
mask_tensor = tf_keras.Input(shape=(sequence_length, sequence_length)) | |
output_tensor = test_layer([data_tensor, mask_tensor]) | |
# Create a model from the test layer. | |
model = tf_keras.Model([data_tensor, mask_tensor], output_tensor) | |
# Invoke the model on test data. We can't validate the output data itself | |
# (the NN is too complex) but this will rule out structural runtime errors. | |
batch_size = 6 | |
input_data = 10 * np.random.random_sample( | |
(batch_size, sequence_length, width)) | |
# The attention mask should be of shape (batch, from_seq_len, to_seq_len), | |
# which here is (batch, sequence_length, sequence_length) | |
mask_data = np.random.randint( | |
2, size=(batch_size, sequence_length, sequence_length)) | |
_ = model.predict([input_data, mask_data]) | |
# If call_list[0] exists and is True, the passed layer class was | |
# instantiated from the given config properly. | |
self.assertNotEmpty(call_list) | |
self.assertTrue(call_list[0], "The passed layer class wasn't instantiated.") | |
def test_layer_invocation_with_float16_dtype(self): | |
tf_keras.mixed_precision.set_global_policy('mixed_float16') | |
sequence_length = 21 | |
width = 80 | |
call_list = [] | |
attention_layer_cfg = { | |
'num_heads': 10, | |
'key_dim': 8, | |
'call_list': call_list, | |
} | |
test_layer = nn_blocks.TransformerScaffold( | |
attention_cls=ValidatedAttentionLayer, | |
attention_cfg=attention_layer_cfg, | |
num_attention_heads=10, | |
inner_dim=2048, | |
inner_activation='relu') | |
# Create a 3-dimensional input (the first dimension is implicit). | |
data_tensor = tf_keras.Input(shape=(sequence_length, width)) | |
# Create a 2-dimensional input (the first dimension is implicit). | |
mask_tensor = tf_keras.Input(shape=(sequence_length, sequence_length)) | |
output_tensor = test_layer([data_tensor, mask_tensor]) | |
# Create a model from the test layer. | |
model = tf_keras.Model([data_tensor, mask_tensor], output_tensor) | |
# Invoke the model on test data. We can't validate the output data itself | |
# (the NN is too complex) but this will rule out structural runtime errors. | |
batch_size = 6 | |
input_data = (10 * np.random.random_sample( | |
(batch_size, sequence_length, width))) | |
# The attention mask should be of shape (batch, from_seq_len, to_seq_len), | |
# which here is (batch, sequence_length, sequence_length) | |
mask_data = np.random.randint( | |
2, size=(batch_size, sequence_length, sequence_length)) | |
_ = model.predict([input_data, mask_data]) | |
# If call_list[0] exists and is True, the passed layer class was | |
# instantiated from the given config properly. | |
self.assertNotEmpty(call_list) | |
self.assertTrue(call_list[0], "The passed layer class wasn't instantiated.") | |
def test_transform_with_initializer(self): | |
sequence_length = 21 | |
width = 80 | |
call_list = [] | |
attention_layer_cfg = { | |
'num_heads': 10, | |
'key_dim': 8, | |
'call_list': call_list, | |
} | |
test_layer = nn_blocks.TransformerScaffold( | |
attention_cls=ValidatedAttentionLayer, | |
attention_cfg=attention_layer_cfg, | |
num_attention_heads=10, | |
inner_dim=2048, | |
inner_activation='relu', | |
kernel_initializer=tf_keras.initializers.TruncatedNormal(stddev=0.02)) | |
# Create a 3-dimensional input (the first dimension is implicit). | |
data_tensor = tf_keras.Input(shape=(sequence_length, width)) | |
output = test_layer(data_tensor) | |
# The default output of a transformer layer should be the same as the input. | |
self.assertEqual(data_tensor.shape.as_list(), output.shape.as_list()) | |
# If call_list[0] exists and is True, the passed layer class was | |
# instantiated from the given config properly. | |
self.assertNotEmpty(call_list) | |
self.assertTrue(call_list[0]) | |
def test_layer_restoration_from_config(self): | |
sequence_length = 21 | |
width = 80 | |
call_list = [] | |
attention_layer_cfg = { | |
'num_heads': 10, | |
'key_dim': 8, | |
'call_list': call_list, | |
'name': 'test_layer', | |
} | |
test_layer = nn_blocks.TransformerScaffold( | |
attention_cls=ValidatedAttentionLayer, | |
attention_cfg=attention_layer_cfg, | |
num_attention_heads=10, | |
inner_dim=2048, | |
inner_activation='relu') | |
# Create a 3-dimensional input (the first dimension is implicit). | |
data_tensor = tf_keras.Input(shape=(sequence_length, width)) | |
# Create a 2-dimensional input (the first dimension is implicit). | |
mask_tensor = tf_keras.Input(shape=(sequence_length, sequence_length)) | |
output_tensor = test_layer([data_tensor, mask_tensor]) | |
# Create a model from the test layer. | |
model = tf_keras.Model([data_tensor, mask_tensor], output_tensor) | |
# Invoke the model on test data. We can't validate the output data itself | |
# (the NN is too complex) but this will rule out structural runtime errors. | |
batch_size = 6 | |
input_data = 10 * np.random.random_sample( | |
(batch_size, sequence_length, width)) | |
# The attention mask should be of shape (batch, from_seq_len, to_seq_len), | |
# which here is (batch, sequence_length, sequence_length) | |
mask_data = np.random.randint( | |
2, size=(batch_size, sequence_length, sequence_length)) | |
pre_serialization_output = model.predict([input_data, mask_data]) | |
# Serialize the model config. Pass the serialized data through json to | |
# ensure that we can serialize this layer to disk. | |
serialized_data = model.get_config() | |
# Create a new model from the old config, and copy the weights. These models | |
# should have identical outputs. | |
new_model = tf_keras.Model.from_config(serialized_data) | |
new_model.set_weights(model.get_weights()) | |
output = new_model.predict([input_data, mask_data]) | |
self.assertAllClose(pre_serialization_output, output) | |
# If the layer was configured correctly, it should have a list attribute | |
# (since it should have the custom class and config passed to it). | |
new_model.summary() | |
new_call_list = new_model.get_layer( | |
name='transformer_scaffold')._attention_layer.list | |
self.assertNotEmpty(new_call_list) | |
self.assertTrue(new_call_list[0], | |
"The passed layer class wasn't instantiated.") | |
def test_layer_with_feedforward_cls_restoration_from_config(self): | |
sequence_length = 21 | |
width = 80 | |
call_list = [] | |
attention_layer_cfg = { | |
'num_heads': 10, | |
'key_dim': 8, | |
'call_list': call_list, | |
'name': 'test_layer', | |
} | |
feedforward_call_list = [] | |
feedforward_layer_cfg = { | |
'activation': 'relu', | |
'call_list': feedforward_call_list, | |
} | |
test_layer = nn_blocks.TransformerScaffold( | |
attention_cls=ValidatedAttentionLayer, | |
attention_cfg=attention_layer_cfg, | |
feedforward_cls=ValidatedFeedforwardLayer, | |
feedforward_cfg=feedforward_layer_cfg, | |
num_attention_heads=10, | |
inner_dim=None, | |
inner_activation=None) | |
# Create a 3-dimensional input (the first dimension is implicit). | |
data_tensor = tf_keras.Input(shape=(sequence_length, width)) | |
# Create a 2-dimensional input (the first dimension is implicit). | |
mask_tensor = tf_keras.Input(shape=(sequence_length, sequence_length)) | |
output_tensor = test_layer([data_tensor, mask_tensor]) | |
# Create a model from the test layer. | |
model = tf_keras.Model([data_tensor, mask_tensor], output_tensor) | |
# Invoke the model on test data. We can't validate the output data itself | |
# (the NN is too complex) but this will rule out structural runtime errors. | |
batch_size = 6 | |
input_data = 10 * np.random.random_sample( | |
(batch_size, sequence_length, width)) | |
# The attention mask should be of shape (batch, from_seq_len, to_seq_len), | |
# which here is (batch, sequence_length, sequence_length) | |
mask_data = np.random.randint( | |
2, size=(batch_size, sequence_length, sequence_length)) | |
pre_serialization_output = model.predict([input_data, mask_data]) | |
serialized_data = model.get_config() | |
# Create a new model from the old config, and copy the weights. These models | |
# should have identical outputs. | |
new_model = tf_keras.Model.from_config(serialized_data) | |
new_model.set_weights(model.get_weights()) | |
output = new_model.predict([input_data, mask_data]) | |
self.assertAllClose(pre_serialization_output, output) | |
# If the layer was configured correctly, it should have a list attribute | |
# (since it should have the custom class and config passed to it). | |
new_model.summary() | |
new_call_list = new_model.get_layer( | |
name='transformer_scaffold')._attention_layer.list | |
self.assertNotEmpty(new_call_list) | |
self.assertTrue(new_call_list[0], | |
"The passed layer class wasn't instantiated.") | |
new_feedforward_call_list = new_model.get_layer( | |
name='transformer_scaffold')._feedforward_block.list | |
self.assertNotEmpty(new_feedforward_call_list) | |
self.assertTrue(new_feedforward_call_list[0], | |
"The passed layer class wasn't instantiated.") | |
if __name__ == '__main__': | |
tf.test.main() | |