deanna-emery's picture
updates
93528c6
raw
history blame
14.4 kB
# Copyright 2023 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains definitions of NAS-FPN."""
from typing import Any, List, Mapping, Optional, Tuple
# Import libraries
from absl import logging
import tensorflow as tf, tf_keras
from official.modeling import hyperparams
from official.modeling import tf_utils
from official.vision.modeling.decoders import factory
from official.vision.ops import spatial_transform_ops
# The fixed NAS-FPN architecture discovered by NAS.
# Each element represents a specification of a building block:
# (block_level, combine_fn, (input_offset0, input_offset1), is_output).
NASFPN_BLOCK_SPECS = [
(4, 'attention', (1, 3), False),
(4, 'sum', (1, 5), False),
(3, 'sum', (0, 6), True),
(4, 'sum', (6, 7), True),
(5, 'attention', (7, 8), True),
(7, 'attention', (6, 9), True),
(6, 'attention', (9, 10), True),
]
class BlockSpec():
"""A container class that specifies the block configuration for NAS-FPN."""
def __init__(self, level: int, combine_fn: str,
input_offsets: Tuple[int, int], is_output: bool):
self.level = level
self.combine_fn = combine_fn
self.input_offsets = input_offsets
self.is_output = is_output
def build_block_specs(
block_specs: Optional[List[Tuple[Any, ...]]] = None) -> List[BlockSpec]:
"""Builds the list of BlockSpec objects for NAS-FPN."""
if not block_specs:
block_specs = NASFPN_BLOCK_SPECS
logging.info('Building NAS-FPN block specs: %s', block_specs)
return [BlockSpec(*b) for b in block_specs]
@tf_keras.utils.register_keras_serializable(package='Vision')
class NASFPN(tf_keras.Model):
"""Creates a NAS-FPN model.
This implements the paper:
Golnaz Ghiasi, Tsung-Yi Lin, Ruoming Pang, Quoc V. Le.
NAS-FPN: Learning Scalable Feature Pyramid Architecture for Object Detection.
(https://arxiv.org/abs/1904.07392)
"""
def __init__(
self,
input_specs: Mapping[str, tf.TensorShape],
min_level: int = 3,
max_level: int = 7,
block_specs: Optional[List[BlockSpec]] = None,
num_filters: int = 256,
num_repeats: int = 5,
use_separable_conv: bool = False,
activation: str = 'relu',
use_sync_bn: bool = False,
norm_momentum: float = 0.99,
norm_epsilon: float = 0.001,
kernel_initializer: str = 'VarianceScaling',
kernel_regularizer: Optional[tf_keras.regularizers.Regularizer] = None,
bias_regularizer: Optional[tf_keras.regularizers.Regularizer] = None,
**kwargs):
"""Initializes a NAS-FPN model.
Args:
input_specs: A `dict` of input specifications. A dictionary consists of
{level: TensorShape} from a backbone.
min_level: An `int` of minimum level in FPN output feature maps.
max_level: An `int` of maximum level in FPN output feature maps.
block_specs: a list of BlockSpec objects that specifies the NAS-FPN
network topology. By default, the previously discovered architecture is
used.
num_filters: An `int` number of filters in FPN layers.
num_repeats: number of repeats for feature pyramid network.
use_separable_conv: A `bool`. If True use separable convolution for
convolution in FPN layers.
activation: A `str` name of the activation function.
use_sync_bn: A `bool`. If True, use synchronized batch normalization.
norm_momentum: A `float` of normalization momentum for the moving average.
norm_epsilon: A `float` added to variance to avoid dividing by zero.
kernel_initializer: A `str` name of kernel_initializer for convolutional
layers.
kernel_regularizer: A `tf_keras.regularizers.Regularizer` object for
Conv2D. Default is None.
bias_regularizer: A `tf_keras.regularizers.Regularizer` object for Conv2D.
**kwargs: Additional keyword arguments to be passed.
"""
self._config_dict = {
'input_specs': input_specs,
'min_level': min_level,
'max_level': max_level,
'num_filters': num_filters,
'num_repeats': num_repeats,
'use_separable_conv': use_separable_conv,
'activation': activation,
'use_sync_bn': use_sync_bn,
'norm_momentum': norm_momentum,
'norm_epsilon': norm_epsilon,
'kernel_initializer': kernel_initializer,
'kernel_regularizer': kernel_regularizer,
'bias_regularizer': bias_regularizer,
}
self._min_level = min_level
self._max_level = max_level
self._block_specs = (
build_block_specs() if block_specs is None else block_specs
)
self._num_repeats = num_repeats
self._conv_op = (tf_keras.layers.SeparableConv2D
if self._config_dict['use_separable_conv']
else tf_keras.layers.Conv2D)
self._norm_op = tf_keras.layers.BatchNormalization
if tf_keras.backend.image_data_format() == 'channels_last':
self._bn_axis = -1
else:
self._bn_axis = 1
self._norm_kwargs = {
'axis': self._bn_axis,
'momentum': self._config_dict['norm_momentum'],
'epsilon': self._config_dict['norm_epsilon'],
'synchronized': self._config_dict['use_sync_bn'],
}
self._activation = tf_utils.get_activation(activation)
# Gets input feature pyramid from backbone.
inputs = self._build_input_pyramid(input_specs, min_level)
# Projects the input features.
feats = []
for level in range(self._min_level, self._max_level + 1):
if str(level) in inputs.keys():
feats.append(self._resample_feature_map(
inputs[str(level)], level, level, self._config_dict['num_filters']))
else:
feats.append(self._resample_feature_map(
feats[-1], level - 1, level, self._config_dict['num_filters']))
# Repeatly builds the NAS-FPN modules.
for _ in range(self._num_repeats):
output_feats = self._build_feature_pyramid(feats)
feats = [output_feats[level]
for level in range(self._min_level, self._max_level + 1)]
self._output_specs = {
str(level): output_feats[level].get_shape()
for level in range(min_level, max_level + 1)
}
output_feats = {str(level): output_feats[level]
for level in output_feats.keys()}
super(NASFPN, self).__init__(inputs=inputs, outputs=output_feats, **kwargs)
def _build_input_pyramid(self, input_specs: Mapping[str, tf.TensorShape],
min_level: int):
assert isinstance(input_specs, dict)
if min(input_specs.keys()) > str(min_level):
raise ValueError(
'Backbone min level should be less or equal to FPN min level')
inputs = {}
for level, spec in input_specs.items():
inputs[level] = tf_keras.Input(shape=spec[1:])
return inputs
def _resample_feature_map(self,
inputs,
input_level,
target_level,
target_num_filters=256):
x = inputs
_, _, _, input_num_filters = x.get_shape().as_list()
if input_num_filters != target_num_filters:
x = self._conv_op(
filters=target_num_filters,
kernel_size=1,
padding='same',
**self._conv_kwargs)(x)
x = self._norm_op(**self._norm_kwargs)(x)
if input_level < target_level:
stride = int(2 ** (target_level - input_level))
return tf_keras.layers.MaxPool2D(
pool_size=stride, strides=stride, padding='same')(x)
if input_level > target_level:
scale = int(2 ** (input_level - target_level))
return spatial_transform_ops.nearest_upsampling(x, scale=scale)
# Force output x to be the same dtype as mixed precision policy. This avoids
# dtype mismatch when one input (by default float32 dtype) does not meet all
# the above conditions and is output unchanged, while other inputs are
# processed to have different dtype, e.g., using bfloat16 on TPU.
compute_dtype = tf_keras.layers.Layer().dtype_policy.compute_dtype
if (compute_dtype is not None) and (x.dtype != compute_dtype):
return tf.cast(x, dtype=compute_dtype)
else:
return x
@property
def _conv_kwargs(self):
if self._config_dict['use_separable_conv']:
return {
'depthwise_initializer': tf_keras.initializers.VarianceScaling(
scale=2, mode='fan_out', distribution='untruncated_normal'),
'pointwise_initializer': tf_keras.initializers.VarianceScaling(
scale=2, mode='fan_out', distribution='untruncated_normal'),
'bias_initializer': tf.zeros_initializer(),
'depthwise_regularizer': self._config_dict['kernel_regularizer'],
'pointwise_regularizer': self._config_dict['kernel_regularizer'],
'bias_regularizer': self._config_dict['bias_regularizer'],
}
else:
return {
'kernel_initializer': tf_keras.initializers.VarianceScaling(
scale=2, mode='fan_out', distribution='untruncated_normal'),
'bias_initializer': tf.zeros_initializer(),
'kernel_regularizer': self._config_dict['kernel_regularizer'],
'bias_regularizer': self._config_dict['bias_regularizer'],
}
def _global_attention(self, feat0, feat1):
m = tf.math.reduce_max(feat0, axis=[1, 2], keepdims=True)
m = tf.math.sigmoid(m)
return feat0 + feat1 * m
def _build_feature_pyramid(self, feats):
num_output_connections = [0] * len(feats)
num_output_levels = self._max_level - self._min_level + 1
feat_levels = list(range(self._min_level, self._max_level + 1))
for i, block_spec in enumerate(self._block_specs):
new_level = block_spec.level
# Checks the range of input_offsets.
for input_offset in block_spec.input_offsets:
if input_offset >= len(feats):
raise ValueError(
'input_offset ({}) is larger than num feats({})'.format(
input_offset, len(feats)))
input0 = block_spec.input_offsets[0]
input1 = block_spec.input_offsets[1]
# Update graph with inputs.
node0 = feats[input0]
node0_level = feat_levels[input0]
num_output_connections[input0] += 1
node0 = self._resample_feature_map(node0, node0_level, new_level)
node1 = feats[input1]
node1_level = feat_levels[input1]
num_output_connections[input1] += 1
node1 = self._resample_feature_map(node1, node1_level, new_level)
# Combine node0 and node1 to create new feat.
if block_spec.combine_fn == 'sum':
new_node = node0 + node1
elif block_spec.combine_fn == 'attention':
if node0_level >= node1_level:
new_node = self._global_attention(node0, node1)
else:
new_node = self._global_attention(node1, node0)
else:
raise ValueError('unknown combine_fn `{}`.'
.format(block_spec.combine_fn))
# Add intermediate nodes that do not have any connections to output.
if block_spec.is_output:
for j, (feat, feat_level, num_output) in enumerate(
zip(feats, feat_levels, num_output_connections)):
if num_output == 0 and feat_level == new_level:
num_output_connections[j] += 1
feat_ = self._resample_feature_map(feat, feat_level, new_level)
new_node += feat_
new_node = self._activation(new_node)
new_node = self._conv_op(
filters=self._config_dict['num_filters'],
kernel_size=(3, 3),
padding='same',
**self._conv_kwargs)(new_node)
new_node = self._norm_op(**self._norm_kwargs)(new_node)
feats.append(new_node)
feat_levels.append(new_level)
num_output_connections.append(0)
output_feats = {}
for i in range(len(feats) - num_output_levels, len(feats)):
level = feat_levels[i]
output_feats[level] = feats[i]
logging.info('Output feature pyramid: %s', output_feats)
return output_feats
def get_config(self) -> Mapping[str, Any]:
return self._config_dict
@classmethod
def from_config(cls, config, custom_objects=None):
return cls(**config)
@property
def output_specs(self) -> Mapping[str, tf.TensorShape]:
"""A dict of {level: TensorShape} pairs for the model output."""
return self._output_specs
@factory.register_decoder_builder('nasfpn')
def build_nasfpn_decoder(
input_specs: Mapping[str, tf.TensorShape],
model_config: hyperparams.Config,
l2_regularizer: Optional[tf_keras.regularizers.Regularizer] = None
) -> tf_keras.Model:
"""Builds NASFPN decoder from a config.
Args:
input_specs: A `dict` of input specifications. A dictionary consists of
{level: TensorShape} from a backbone.
model_config: A OneOfConfig. Model config.
l2_regularizer: A `tf_keras.regularizers.Regularizer` instance. Default to
None.
Returns:
A `tf_keras.Model` instance of the NASFPN decoder.
Raises:
ValueError: If the model_config.decoder.type is not `nasfpn`.
"""
decoder_type = model_config.decoder.type
decoder_cfg = model_config.decoder.get()
if decoder_type != 'nasfpn':
raise ValueError(f'Inconsistent decoder type {decoder_type}. '
'Need to be `nasfpn`.')
norm_activation_config = model_config.norm_activation
return NASFPN(
input_specs=input_specs,
min_level=model_config.min_level,
max_level=model_config.max_level,
num_filters=decoder_cfg.num_filters,
num_repeats=decoder_cfg.num_repeats,
use_separable_conv=decoder_cfg.use_separable_conv,
activation=norm_activation_config.activation,
use_sync_bn=norm_activation_config.use_sync_bn,
norm_momentum=norm_activation_config.norm_momentum,
norm_epsilon=norm_activation_config.norm_epsilon,
kernel_regularizer=l2_regularizer)