|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
r"""Xception model. |
|
|
|
"Xception: Deep Learning with Depthwise Separable Convolutions" |
|
Fran{\c{c}}ois Chollet |
|
https://arxiv.org/abs/1610.02357 |
|
|
|
We implement the modified version by Jifeng Dai et al. for their COCO 2017 |
|
detection challenge submission, where the model is made deeper and has aligned |
|
features for dense prediction tasks. See their slides for details: |
|
|
|
"Deformable Convolutional Networks -- COCO Detection and Segmentation Challenge |
|
2017 Entry" |
|
Haozhi Qi, Zheng Zhang, Bin Xiao, Han Hu, Bowen Cheng, Yichen Wei and Jifeng Dai |
|
ICCV 2017 COCO Challenge workshop |
|
http://presentations.cocodataset.org/COCO17-Detect-MSRA.pdf |
|
|
|
We made a few more changes on top of MSRA's modifications: |
|
1. Fully convolutional: All the max-pooling layers are replaced with separable |
|
conv2d with stride = 2. This allows us to use atrous convolution to extract |
|
feature maps at any resolution. |
|
|
|
2. We support adding ReLU and BatchNorm after depthwise convolution, motivated |
|
by the design of MobileNetv1. |
|
|
|
"MobileNets: Efficient Convolutional Neural Networks for Mobile Vision |
|
Applications" |
|
Andrew G. Howard, Menglong Zhu, Bo Chen, Dmitry Kalenichenko, Weijun Wang, |
|
Tobias Weyand, Marco Andreetto, Hartwig Adam |
|
https://arxiv.org/abs/1704.04861 |
|
""" |
|
from __future__ import absolute_import |
|
from __future__ import division |
|
from __future__ import print_function |
|
|
|
import collections |
|
from six.moves import range |
|
import tensorflow as tf |
|
from tensorflow.contrib import slim as contrib_slim |
|
|
|
from deeplab.core import utils |
|
from tensorflow.contrib.slim.nets import resnet_utils |
|
from nets.mobilenet import conv_blocks as mobilenet_v3_ops |
|
|
|
slim = contrib_slim |
|
|
|
|
|
_DEFAULT_MULTI_GRID = [1, 1, 1] |
|
|
|
_CLIP_CAP = 6 |
|
|
|
|
|
class Block(collections.namedtuple('Block', ['scope', 'unit_fn', 'args'])): |
|
"""A named tuple describing an Xception block. |
|
|
|
Its parts are: |
|
scope: The scope of the block. |
|
unit_fn: The Xception unit function which takes as input a tensor and |
|
returns another tensor with the output of the Xception unit. |
|
args: A list of length equal to the number of units in the block. The list |
|
contains one dictionary for each unit in the block to serve as argument to |
|
unit_fn. |
|
""" |
|
|
|
|
|
def fixed_padding(inputs, kernel_size, rate=1): |
|
"""Pads the input along the spatial dimensions independently of input size. |
|
|
|
Args: |
|
inputs: A tensor of size [batch, height_in, width_in, channels]. |
|
kernel_size: The kernel to be used in the conv2d or max_pool2d operation. |
|
Should be a positive integer. |
|
rate: An integer, rate for atrous convolution. |
|
|
|
Returns: |
|
output: A tensor of size [batch, height_out, width_out, channels] with the |
|
input, either intact (if kernel_size == 1) or padded (if kernel_size > 1). |
|
""" |
|
kernel_size_effective = kernel_size + (kernel_size - 1) * (rate - 1) |
|
pad_total = kernel_size_effective - 1 |
|
pad_beg = pad_total // 2 |
|
pad_end = pad_total - pad_beg |
|
padded_inputs = tf.pad(inputs, [[0, 0], [pad_beg, pad_end], |
|
[pad_beg, pad_end], [0, 0]]) |
|
return padded_inputs |
|
|
|
|
|
@slim.add_arg_scope |
|
def separable_conv2d_same(inputs, |
|
num_outputs, |
|
kernel_size, |
|
depth_multiplier, |
|
stride, |
|
rate=1, |
|
use_explicit_padding=True, |
|
regularize_depthwise=False, |
|
scope=None, |
|
**kwargs): |
|
"""Strided 2-D separable convolution with 'SAME' padding. |
|
|
|
If stride > 1 and use_explicit_padding is True, then we do explicit zero- |
|
padding, followed by conv2d with 'VALID' padding. |
|
|
|
Note that |
|
|
|
net = separable_conv2d_same(inputs, num_outputs, 3, |
|
depth_multiplier=1, stride=stride) |
|
|
|
is equivalent to |
|
|
|
net = slim.separable_conv2d(inputs, num_outputs, 3, |
|
depth_multiplier=1, stride=1, padding='SAME') |
|
net = resnet_utils.subsample(net, factor=stride) |
|
|
|
whereas |
|
|
|
net = slim.separable_conv2d(inputs, num_outputs, 3, stride=stride, |
|
depth_multiplier=1, padding='SAME') |
|
|
|
is different when the input's height or width is even, which is why we add the |
|
current function. |
|
|
|
Consequently, if the input feature map has even height or width, setting |
|
`use_explicit_padding=False` will result in feature misalignment by one pixel |
|
along the corresponding dimension. |
|
|
|
Args: |
|
inputs: A 4-D tensor of size [batch, height_in, width_in, channels]. |
|
num_outputs: An integer, the number of output filters. |
|
kernel_size: An int with the kernel_size of the filters. |
|
depth_multiplier: The number of depthwise convolution output channels for |
|
each input channel. The total number of depthwise convolution output |
|
channels will be equal to `num_filters_in * depth_multiplier`. |
|
stride: An integer, the output stride. |
|
rate: An integer, rate for atrous convolution. |
|
use_explicit_padding: If True, use explicit padding to make the model fully |
|
compatible with the open source version, otherwise use the native |
|
Tensorflow 'SAME' padding. |
|
regularize_depthwise: Whether or not apply L2-norm regularization on the |
|
depthwise convolution weights. |
|
scope: Scope. |
|
**kwargs: additional keyword arguments to pass to slim.conv2d |
|
|
|
Returns: |
|
output: A 4-D tensor of size [batch, height_out, width_out, channels] with |
|
the convolution output. |
|
""" |
|
def _separable_conv2d(padding): |
|
"""Wrapper for separable conv2d.""" |
|
return slim.separable_conv2d(inputs, |
|
num_outputs, |
|
kernel_size, |
|
depth_multiplier=depth_multiplier, |
|
stride=stride, |
|
rate=rate, |
|
padding=padding, |
|
scope=scope, |
|
**kwargs) |
|
def _split_separable_conv2d(padding): |
|
"""Splits separable conv2d into depthwise and pointwise conv2d.""" |
|
outputs = slim.separable_conv2d(inputs, |
|
None, |
|
kernel_size, |
|
depth_multiplier=depth_multiplier, |
|
stride=stride, |
|
rate=rate, |
|
padding=padding, |
|
scope=scope + '_depthwise', |
|
**kwargs) |
|
return slim.conv2d(outputs, |
|
num_outputs, |
|
1, |
|
scope=scope + '_pointwise', |
|
**kwargs) |
|
if stride == 1 or not use_explicit_padding: |
|
if regularize_depthwise: |
|
outputs = _separable_conv2d(padding='SAME') |
|
else: |
|
outputs = _split_separable_conv2d(padding='SAME') |
|
else: |
|
inputs = fixed_padding(inputs, kernel_size, rate) |
|
if regularize_depthwise: |
|
outputs = _separable_conv2d(padding='VALID') |
|
else: |
|
outputs = _split_separable_conv2d(padding='VALID') |
|
return outputs |
|
|
|
|
|
@slim.add_arg_scope |
|
def xception_module(inputs, |
|
depth_list, |
|
skip_connection_type, |
|
stride, |
|
kernel_size=3, |
|
unit_rate_list=None, |
|
rate=1, |
|
activation_fn_in_separable_conv=False, |
|
regularize_depthwise=False, |
|
outputs_collections=None, |
|
scope=None, |
|
use_bounded_activation=False, |
|
use_explicit_padding=True, |
|
use_squeeze_excite=False, |
|
se_pool_size=None): |
|
"""An Xception module. |
|
|
|
The output of one Xception module is equal to the sum of `residual` and |
|
`shortcut`, where `residual` is the feature computed by three separable |
|
convolution. The `shortcut` is the feature computed by 1x1 convolution with |
|
or without striding. In some cases, the `shortcut` path could be a simple |
|
identity function or none (i.e, no shortcut). |
|
|
|
Note that we replace the max pooling operations in the Xception module with |
|
another separable convolution with striding, since atrous rate is not properly |
|
supported in current TensorFlow max pooling implementation. |
|
|
|
Args: |
|
inputs: A tensor of size [batch, height, width, channels]. |
|
depth_list: A list of three integers specifying the depth values of one |
|
Xception module. |
|
skip_connection_type: Skip connection type for the residual path. Only |
|
supports 'conv', 'sum', or 'none'. |
|
stride: The block unit's stride. Determines the amount of downsampling of |
|
the units output compared to its input. |
|
kernel_size: Integer, convolution kernel size. |
|
unit_rate_list: A list of three integers, determining the unit rate for |
|
each separable convolution in the xception module. |
|
rate: An integer, rate for atrous convolution. |
|
activation_fn_in_separable_conv: Includes activation function in the |
|
separable convolution or not. |
|
regularize_depthwise: Whether or not apply L2-norm regularization on the |
|
depthwise convolution weights. |
|
outputs_collections: Collection to add the Xception unit output. |
|
scope: Optional variable_scope. |
|
use_bounded_activation: Whether or not to use bounded activations. Bounded |
|
activations better lend themselves to quantized inference. |
|
use_explicit_padding: If True, use explicit padding to make the model fully |
|
compatible with the open source version, otherwise use the native |
|
Tensorflow 'SAME' padding. |
|
use_squeeze_excite: Boolean, use squeeze-and-excitation or not. |
|
se_pool_size: None or integer specifying the pooling size used in SE module. |
|
|
|
Returns: |
|
The Xception module's output. |
|
|
|
Raises: |
|
ValueError: If depth_list and unit_rate_list do not contain three elements, |
|
or if stride != 1 for the third separable convolution operation in the |
|
residual path, or unsupported skip connection type. |
|
""" |
|
if len(depth_list) != 3: |
|
raise ValueError('Expect three elements in depth_list.') |
|
if unit_rate_list: |
|
if len(unit_rate_list) != 3: |
|
raise ValueError('Expect three elements in unit_rate_list.') |
|
|
|
with tf.variable_scope(scope, 'xception_module', [inputs]) as sc: |
|
residual = inputs |
|
|
|
def _separable_conv(features, depth, kernel_size, depth_multiplier, |
|
regularize_depthwise, rate, stride, scope): |
|
"""Separable conv block.""" |
|
if activation_fn_in_separable_conv: |
|
activation_fn = tf.nn.relu6 if use_bounded_activation else tf.nn.relu |
|
else: |
|
if use_bounded_activation: |
|
|
|
|
|
activation_fn = lambda x: tf.clip_by_value(x, -_CLIP_CAP, _CLIP_CAP) |
|
features = tf.nn.relu6(features) |
|
else: |
|
|
|
activation_fn = None |
|
features = tf.nn.relu(features) |
|
return separable_conv2d_same(features, |
|
depth, |
|
kernel_size, |
|
depth_multiplier=depth_multiplier, |
|
stride=stride, |
|
rate=rate, |
|
activation_fn=activation_fn, |
|
use_explicit_padding=use_explicit_padding, |
|
regularize_depthwise=regularize_depthwise, |
|
scope=scope) |
|
for i in range(3): |
|
residual = _separable_conv(residual, |
|
depth_list[i], |
|
kernel_size=kernel_size, |
|
depth_multiplier=1, |
|
regularize_depthwise=regularize_depthwise, |
|
rate=rate*unit_rate_list[i], |
|
stride=stride if i == 2 else 1, |
|
scope='separable_conv' + str(i+1)) |
|
if use_squeeze_excite: |
|
residual = mobilenet_v3_ops.squeeze_excite( |
|
input_tensor=residual, |
|
squeeze_factor=16, |
|
inner_activation_fn=tf.nn.relu, |
|
gating_fn=lambda x: tf.nn.relu6(x+3)*0.16667, |
|
pool=se_pool_size) |
|
|
|
if skip_connection_type == 'conv': |
|
shortcut = slim.conv2d(inputs, |
|
depth_list[-1], |
|
[1, 1], |
|
stride=stride, |
|
activation_fn=None, |
|
scope='shortcut') |
|
if use_bounded_activation: |
|
residual = tf.clip_by_value(residual, -_CLIP_CAP, _CLIP_CAP) |
|
shortcut = tf.clip_by_value(shortcut, -_CLIP_CAP, _CLIP_CAP) |
|
outputs = residual + shortcut |
|
if use_bounded_activation: |
|
outputs = tf.nn.relu6(outputs) |
|
elif skip_connection_type == 'sum': |
|
if use_bounded_activation: |
|
residual = tf.clip_by_value(residual, -_CLIP_CAP, _CLIP_CAP) |
|
inputs = tf.clip_by_value(inputs, -_CLIP_CAP, _CLIP_CAP) |
|
outputs = residual + inputs |
|
if use_bounded_activation: |
|
outputs = tf.nn.relu6(outputs) |
|
elif skip_connection_type == 'none': |
|
outputs = residual |
|
else: |
|
raise ValueError('Unsupported skip connection type.') |
|
|
|
return slim.utils.collect_named_outputs(outputs_collections, |
|
sc.name, |
|
outputs) |
|
|
|
|
|
@slim.add_arg_scope |
|
def stack_blocks_dense(net, |
|
blocks, |
|
output_stride=None, |
|
outputs_collections=None): |
|
"""Stacks Xception blocks and controls output feature density. |
|
|
|
First, this function creates scopes for the Xception in the form of |
|
'block_name/unit_1', 'block_name/unit_2', etc. |
|
|
|
Second, this function allows the user to explicitly control the output |
|
stride, which is the ratio of the input to output spatial resolution. This |
|
is useful for dense prediction tasks such as semantic segmentation or |
|
object detection. |
|
|
|
Control of the output feature density is implemented by atrous convolution. |
|
|
|
Args: |
|
net: A tensor of size [batch, height, width, channels]. |
|
blocks: A list of length equal to the number of Xception blocks. Each |
|
element is an Xception Block object describing the units in the block. |
|
output_stride: If None, then the output will be computed at the nominal |
|
network stride. If output_stride is not None, it specifies the requested |
|
ratio of input to output spatial resolution, which needs to be equal to |
|
the product of unit strides from the start up to some level of Xception. |
|
For example, if the Xception employs units with strides 1, 2, 1, 3, 4, 1, |
|
then valid values for the output_stride are 1, 2, 6, 24 or None (which |
|
is equivalent to output_stride=24). |
|
outputs_collections: Collection to add the Xception block outputs. |
|
|
|
Returns: |
|
net: Output tensor with stride equal to the specified output_stride. |
|
|
|
Raises: |
|
ValueError: If the target output_stride is not valid. |
|
""" |
|
|
|
|
|
|
|
|
|
current_stride = 1 |
|
|
|
|
|
rate = 1 |
|
|
|
for block in blocks: |
|
with tf.variable_scope(block.scope, 'block', [net]) as sc: |
|
for i, unit in enumerate(block.args): |
|
if output_stride is not None and current_stride > output_stride: |
|
raise ValueError('The target output_stride cannot be reached.') |
|
with tf.variable_scope('unit_%d' % (i + 1), values=[net]): |
|
|
|
|
|
|
|
if output_stride is not None and current_stride == output_stride: |
|
net = block.unit_fn(net, rate=rate, **dict(unit, stride=1)) |
|
rate *= unit.get('stride', 1) |
|
else: |
|
net = block.unit_fn(net, rate=1, **unit) |
|
current_stride *= unit.get('stride', 1) |
|
|
|
|
|
net = slim.utils.collect_named_outputs(outputs_collections, sc.name, net) |
|
|
|
if output_stride is not None and current_stride != output_stride: |
|
raise ValueError('The target output_stride cannot be reached.') |
|
|
|
return net |
|
|
|
|
|
def xception(inputs, |
|
blocks, |
|
num_classes=None, |
|
is_training=True, |
|
global_pool=True, |
|
keep_prob=0.5, |
|
output_stride=None, |
|
reuse=None, |
|
scope=None, |
|
sync_batch_norm_method='None'): |
|
"""Generator for Xception models. |
|
|
|
This function generates a family of Xception models. See the xception_*() |
|
methods for specific model instantiations, obtained by selecting different |
|
block instantiations that produce Xception of various depths. |
|
|
|
Args: |
|
inputs: A tensor of size [batch, height_in, width_in, channels]. Must be |
|
floating point. If a pretrained checkpoint is used, pixel values should be |
|
the same as during training (see go/slim-classification-models for |
|
specifics). |
|
blocks: A list of length equal to the number of Xception blocks. Each |
|
element is an Xception Block object describing the units in the block. |
|
num_classes: Number of predicted classes for classification tasks. |
|
If 0 or None, we return the features before the logit layer. |
|
is_training: whether batch_norm layers are in training mode. |
|
global_pool: If True, we perform global average pooling before computing the |
|
logits. Set to True for image classification, False for dense prediction. |
|
keep_prob: Keep probability used in the pre-logits dropout layer. |
|
output_stride: If None, then the output will be computed at the nominal |
|
network stride. If output_stride is not None, it specifies the requested |
|
ratio of input to output spatial resolution. |
|
reuse: whether or not the network and its variables should be reused. To be |
|
able to reuse 'scope' must be given. |
|
scope: Optional variable_scope. |
|
sync_batch_norm_method: String, sync batchnorm method. Currently only |
|
support `None`. |
|
|
|
Returns: |
|
net: A rank-4 tensor of size [batch, height_out, width_out, channels_out]. |
|
If global_pool is False, then height_out and width_out are reduced by a |
|
factor of output_stride compared to the respective height_in and width_in, |
|
else both height_out and width_out equal one. If num_classes is 0 or None, |
|
then net is the output of the last Xception block, potentially after |
|
global average pooling. If num_classes is a non-zero integer, net contains |
|
the pre-softmax activations. |
|
end_points: A dictionary from components of the network to the corresponding |
|
activation. |
|
|
|
Raises: |
|
ValueError: If the target output_stride is not valid. |
|
""" |
|
with tf.variable_scope( |
|
scope, 'xception', [inputs], reuse=reuse) as sc: |
|
end_points_collection = sc.original_name_scope + 'end_points' |
|
batch_norm = utils.get_batch_norm_fn(sync_batch_norm_method) |
|
with slim.arg_scope([slim.conv2d, |
|
slim.separable_conv2d, |
|
xception_module, |
|
stack_blocks_dense], |
|
outputs_collections=end_points_collection): |
|
with slim.arg_scope([batch_norm], is_training=is_training): |
|
net = inputs |
|
if output_stride is not None: |
|
if output_stride % 2 != 0: |
|
raise ValueError('The output_stride needs to be a multiple of 2.') |
|
output_stride //= 2 |
|
|
|
net = resnet_utils.conv2d_same(net, 32, 3, stride=2, |
|
scope='entry_flow/conv1_1') |
|
net = resnet_utils.conv2d_same(net, 64, 3, stride=1, |
|
scope='entry_flow/conv1_2') |
|
|
|
|
|
net = stack_blocks_dense(net, blocks, output_stride) |
|
|
|
|
|
end_points = slim.utils.convert_collection_to_dict( |
|
end_points_collection, clear_collection=True) |
|
|
|
if global_pool: |
|
|
|
net = tf.reduce_mean(net, [1, 2], name='global_pool', keepdims=True) |
|
end_points['global_pool'] = net |
|
if num_classes: |
|
net = slim.dropout(net, keep_prob=keep_prob, is_training=is_training, |
|
scope='prelogits_dropout') |
|
net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, |
|
normalizer_fn=None, scope='logits') |
|
end_points[sc.name + '/logits'] = net |
|
end_points['predictions'] = slim.softmax(net, scope='predictions') |
|
return net, end_points |
|
|
|
|
|
def xception_block(scope, |
|
depth_list, |
|
skip_connection_type, |
|
activation_fn_in_separable_conv, |
|
regularize_depthwise, |
|
num_units, |
|
stride, |
|
kernel_size=3, |
|
unit_rate_list=None, |
|
use_squeeze_excite=False, |
|
se_pool_size=None): |
|
"""Helper function for creating a Xception block. |
|
|
|
Args: |
|
scope: The scope of the block. |
|
depth_list: The depth of the bottleneck layer for each unit. |
|
skip_connection_type: Skip connection type for the residual path. Only |
|
supports 'conv', 'sum', or 'none'. |
|
activation_fn_in_separable_conv: Includes activation function in the |
|
separable convolution or not. |
|
regularize_depthwise: Whether or not apply L2-norm regularization on the |
|
depthwise convolution weights. |
|
num_units: The number of units in the block. |
|
stride: The stride of the block, implemented as a stride in the last unit. |
|
All other units have stride=1. |
|
kernel_size: Integer, convolution kernel size. |
|
unit_rate_list: A list of three integers, determining the unit rate in the |
|
corresponding xception block. |
|
use_squeeze_excite: Boolean, use squeeze-and-excitation or not. |
|
se_pool_size: None or integer specifying the pooling size used in SE module. |
|
|
|
Returns: |
|
An Xception block. |
|
""" |
|
if unit_rate_list is None: |
|
unit_rate_list = _DEFAULT_MULTI_GRID |
|
return Block(scope, xception_module, [{ |
|
'depth_list': depth_list, |
|
'skip_connection_type': skip_connection_type, |
|
'activation_fn_in_separable_conv': activation_fn_in_separable_conv, |
|
'regularize_depthwise': regularize_depthwise, |
|
'stride': stride, |
|
'kernel_size': kernel_size, |
|
'unit_rate_list': unit_rate_list, |
|
'use_squeeze_excite': use_squeeze_excite, |
|
'se_pool_size': se_pool_size, |
|
}] * num_units) |
|
|
|
|
|
def xception_41(inputs, |
|
num_classes=None, |
|
is_training=True, |
|
global_pool=True, |
|
keep_prob=0.5, |
|
output_stride=None, |
|
regularize_depthwise=False, |
|
multi_grid=None, |
|
reuse=None, |
|
scope='xception_41', |
|
sync_batch_norm_method='None'): |
|
"""Xception-41 model.""" |
|
blocks = [ |
|
xception_block('entry_flow/block1', |
|
depth_list=[128, 128, 128], |
|
skip_connection_type='conv', |
|
activation_fn_in_separable_conv=False, |
|
regularize_depthwise=regularize_depthwise, |
|
num_units=1, |
|
stride=2), |
|
xception_block('entry_flow/block2', |
|
depth_list=[256, 256, 256], |
|
skip_connection_type='conv', |
|
activation_fn_in_separable_conv=False, |
|
regularize_depthwise=regularize_depthwise, |
|
num_units=1, |
|
stride=2), |
|
xception_block('entry_flow/block3', |
|
depth_list=[728, 728, 728], |
|
skip_connection_type='conv', |
|
activation_fn_in_separable_conv=False, |
|
regularize_depthwise=regularize_depthwise, |
|
num_units=1, |
|
stride=2), |
|
xception_block('middle_flow/block1', |
|
depth_list=[728, 728, 728], |
|
skip_connection_type='sum', |
|
activation_fn_in_separable_conv=False, |
|
regularize_depthwise=regularize_depthwise, |
|
num_units=8, |
|
stride=1), |
|
xception_block('exit_flow/block1', |
|
depth_list=[728, 1024, 1024], |
|
skip_connection_type='conv', |
|
activation_fn_in_separable_conv=False, |
|
regularize_depthwise=regularize_depthwise, |
|
num_units=1, |
|
stride=2), |
|
xception_block('exit_flow/block2', |
|
depth_list=[1536, 1536, 2048], |
|
skip_connection_type='none', |
|
activation_fn_in_separable_conv=True, |
|
regularize_depthwise=regularize_depthwise, |
|
num_units=1, |
|
stride=1, |
|
unit_rate_list=multi_grid), |
|
] |
|
return xception(inputs, |
|
blocks=blocks, |
|
num_classes=num_classes, |
|
is_training=is_training, |
|
global_pool=global_pool, |
|
keep_prob=keep_prob, |
|
output_stride=output_stride, |
|
reuse=reuse, |
|
scope=scope, |
|
sync_batch_norm_method=sync_batch_norm_method) |
|
|
|
|
|
def xception_65_factory(inputs, |
|
num_classes=None, |
|
is_training=True, |
|
global_pool=True, |
|
keep_prob=0.5, |
|
output_stride=None, |
|
regularize_depthwise=False, |
|
kernel_size=3, |
|
multi_grid=None, |
|
reuse=None, |
|
use_squeeze_excite=False, |
|
se_pool_size=None, |
|
scope='xception_65', |
|
sync_batch_norm_method='None'): |
|
"""Xception-65 model factory.""" |
|
blocks = [ |
|
xception_block('entry_flow/block1', |
|
depth_list=[128, 128, 128], |
|
skip_connection_type='conv', |
|
activation_fn_in_separable_conv=False, |
|
regularize_depthwise=regularize_depthwise, |
|
num_units=1, |
|
stride=2, |
|
kernel_size=kernel_size, |
|
use_squeeze_excite=False, |
|
se_pool_size=se_pool_size), |
|
xception_block('entry_flow/block2', |
|
depth_list=[256, 256, 256], |
|
skip_connection_type='conv', |
|
activation_fn_in_separable_conv=False, |
|
regularize_depthwise=regularize_depthwise, |
|
num_units=1, |
|
stride=2, |
|
kernel_size=kernel_size, |
|
use_squeeze_excite=False, |
|
se_pool_size=se_pool_size), |
|
xception_block('entry_flow/block3', |
|
depth_list=[728, 728, 728], |
|
skip_connection_type='conv', |
|
activation_fn_in_separable_conv=False, |
|
regularize_depthwise=regularize_depthwise, |
|
num_units=1, |
|
stride=2, |
|
kernel_size=kernel_size, |
|
use_squeeze_excite=use_squeeze_excite, |
|
se_pool_size=se_pool_size), |
|
xception_block('middle_flow/block1', |
|
depth_list=[728, 728, 728], |
|
skip_connection_type='sum', |
|
activation_fn_in_separable_conv=False, |
|
regularize_depthwise=regularize_depthwise, |
|
num_units=16, |
|
stride=1, |
|
kernel_size=kernel_size, |
|
use_squeeze_excite=use_squeeze_excite, |
|
se_pool_size=se_pool_size), |
|
xception_block('exit_flow/block1', |
|
depth_list=[728, 1024, 1024], |
|
skip_connection_type='conv', |
|
activation_fn_in_separable_conv=False, |
|
regularize_depthwise=regularize_depthwise, |
|
num_units=1, |
|
stride=2, |
|
kernel_size=kernel_size, |
|
use_squeeze_excite=use_squeeze_excite, |
|
se_pool_size=se_pool_size), |
|
xception_block('exit_flow/block2', |
|
depth_list=[1536, 1536, 2048], |
|
skip_connection_type='none', |
|
activation_fn_in_separable_conv=True, |
|
regularize_depthwise=regularize_depthwise, |
|
num_units=1, |
|
stride=1, |
|
kernel_size=kernel_size, |
|
unit_rate_list=multi_grid, |
|
use_squeeze_excite=False, |
|
se_pool_size=se_pool_size), |
|
] |
|
return xception(inputs, |
|
blocks=blocks, |
|
num_classes=num_classes, |
|
is_training=is_training, |
|
global_pool=global_pool, |
|
keep_prob=keep_prob, |
|
output_stride=output_stride, |
|
reuse=reuse, |
|
scope=scope, |
|
sync_batch_norm_method=sync_batch_norm_method) |
|
|
|
|
|
def xception_65(inputs, |
|
num_classes=None, |
|
is_training=True, |
|
global_pool=True, |
|
keep_prob=0.5, |
|
output_stride=None, |
|
regularize_depthwise=False, |
|
multi_grid=None, |
|
reuse=None, |
|
scope='xception_65', |
|
sync_batch_norm_method='None'): |
|
"""Xception-65 model.""" |
|
return xception_65_factory( |
|
inputs=inputs, |
|
num_classes=num_classes, |
|
is_training=is_training, |
|
global_pool=global_pool, |
|
keep_prob=keep_prob, |
|
output_stride=output_stride, |
|
regularize_depthwise=regularize_depthwise, |
|
multi_grid=multi_grid, |
|
reuse=reuse, |
|
scope=scope, |
|
use_squeeze_excite=False, |
|
se_pool_size=None, |
|
sync_batch_norm_method=sync_batch_norm_method) |
|
|
|
|
|
def xception_71_factory(inputs, |
|
num_classes=None, |
|
is_training=True, |
|
global_pool=True, |
|
keep_prob=0.5, |
|
output_stride=None, |
|
regularize_depthwise=False, |
|
kernel_size=3, |
|
multi_grid=None, |
|
reuse=None, |
|
scope='xception_71', |
|
use_squeeze_excite=False, |
|
se_pool_size=None, |
|
sync_batch_norm_method='None'): |
|
"""Xception-71 model factory.""" |
|
blocks = [ |
|
xception_block('entry_flow/block1', |
|
depth_list=[128, 128, 128], |
|
skip_connection_type='conv', |
|
activation_fn_in_separable_conv=False, |
|
regularize_depthwise=regularize_depthwise, |
|
num_units=1, |
|
stride=2, |
|
kernel_size=kernel_size, |
|
use_squeeze_excite=False, |
|
se_pool_size=se_pool_size), |
|
xception_block('entry_flow/block2', |
|
depth_list=[256, 256, 256], |
|
skip_connection_type='conv', |
|
activation_fn_in_separable_conv=False, |
|
regularize_depthwise=regularize_depthwise, |
|
num_units=1, |
|
stride=1, |
|
kernel_size=kernel_size, |
|
use_squeeze_excite=False, |
|
se_pool_size=se_pool_size), |
|
xception_block('entry_flow/block3', |
|
depth_list=[256, 256, 256], |
|
skip_connection_type='conv', |
|
activation_fn_in_separable_conv=False, |
|
regularize_depthwise=regularize_depthwise, |
|
num_units=1, |
|
stride=2, |
|
kernel_size=kernel_size, |
|
use_squeeze_excite=False, |
|
se_pool_size=se_pool_size), |
|
xception_block('entry_flow/block4', |
|
depth_list=[728, 728, 728], |
|
skip_connection_type='conv', |
|
activation_fn_in_separable_conv=False, |
|
regularize_depthwise=regularize_depthwise, |
|
num_units=1, |
|
stride=1, |
|
kernel_size=kernel_size, |
|
use_squeeze_excite=use_squeeze_excite, |
|
se_pool_size=se_pool_size), |
|
xception_block('entry_flow/block5', |
|
depth_list=[728, 728, 728], |
|
skip_connection_type='conv', |
|
activation_fn_in_separable_conv=False, |
|
regularize_depthwise=regularize_depthwise, |
|
num_units=1, |
|
stride=2, |
|
kernel_size=kernel_size, |
|
use_squeeze_excite=use_squeeze_excite, |
|
se_pool_size=se_pool_size), |
|
xception_block('middle_flow/block1', |
|
depth_list=[728, 728, 728], |
|
skip_connection_type='sum', |
|
activation_fn_in_separable_conv=False, |
|
regularize_depthwise=regularize_depthwise, |
|
num_units=16, |
|
stride=1, |
|
kernel_size=kernel_size, |
|
use_squeeze_excite=use_squeeze_excite, |
|
se_pool_size=se_pool_size), |
|
xception_block('exit_flow/block1', |
|
depth_list=[728, 1024, 1024], |
|
skip_connection_type='conv', |
|
activation_fn_in_separable_conv=False, |
|
regularize_depthwise=regularize_depthwise, |
|
num_units=1, |
|
stride=2, |
|
kernel_size=kernel_size, |
|
use_squeeze_excite=use_squeeze_excite, |
|
se_pool_size=se_pool_size), |
|
xception_block('exit_flow/block2', |
|
depth_list=[1536, 1536, 2048], |
|
skip_connection_type='none', |
|
activation_fn_in_separable_conv=True, |
|
regularize_depthwise=regularize_depthwise, |
|
num_units=1, |
|
stride=1, |
|
kernel_size=kernel_size, |
|
unit_rate_list=multi_grid, |
|
use_squeeze_excite=False, |
|
se_pool_size=se_pool_size), |
|
] |
|
return xception(inputs, |
|
blocks=blocks, |
|
num_classes=num_classes, |
|
is_training=is_training, |
|
global_pool=global_pool, |
|
keep_prob=keep_prob, |
|
output_stride=output_stride, |
|
reuse=reuse, |
|
scope=scope, |
|
sync_batch_norm_method=sync_batch_norm_method) |
|
|
|
|
|
def xception_71(inputs, |
|
num_classes=None, |
|
is_training=True, |
|
global_pool=True, |
|
keep_prob=0.5, |
|
output_stride=None, |
|
regularize_depthwise=False, |
|
multi_grid=None, |
|
reuse=None, |
|
scope='xception_71', |
|
sync_batch_norm_method='None'): |
|
"""Xception-71 model.""" |
|
return xception_71_factory( |
|
inputs=inputs, |
|
num_classes=num_classes, |
|
is_training=is_training, |
|
global_pool=global_pool, |
|
keep_prob=keep_prob, |
|
output_stride=output_stride, |
|
regularize_depthwise=regularize_depthwise, |
|
multi_grid=multi_grid, |
|
reuse=reuse, |
|
scope=scope, |
|
use_squeeze_excite=False, |
|
se_pool_size=None, |
|
sync_batch_norm_method=sync_batch_norm_method) |
|
|
|
|
|
def xception_arg_scope(weight_decay=0.00004, |
|
batch_norm_decay=0.9997, |
|
batch_norm_epsilon=0.001, |
|
batch_norm_scale=True, |
|
weights_initializer_stddev=0.09, |
|
regularize_depthwise=False, |
|
use_batch_norm=True, |
|
use_bounded_activation=False, |
|
sync_batch_norm_method='None'): |
|
"""Defines the default Xception arg scope. |
|
|
|
Args: |
|
weight_decay: The weight decay to use for regularizing the model. |
|
batch_norm_decay: The moving average decay when estimating layer activation |
|
statistics in batch normalization. |
|
batch_norm_epsilon: Small constant to prevent division by zero when |
|
normalizing activations by their variance in batch normalization. |
|
batch_norm_scale: If True, uses an explicit `gamma` multiplier to scale the |
|
activations in the batch normalization layer. |
|
weights_initializer_stddev: The standard deviation of the trunctated normal |
|
weight initializer. |
|
regularize_depthwise: Whether or not apply L2-norm regularization on the |
|
depthwise convolution weights. |
|
use_batch_norm: Whether or not to use batch normalization. |
|
use_bounded_activation: Whether or not to use bounded activations. Bounded |
|
activations better lend themselves to quantized inference. |
|
sync_batch_norm_method: String, sync batchnorm method. Currently only |
|
support `None`. Also, it is only effective for Xception. |
|
|
|
Returns: |
|
An `arg_scope` to use for the Xception models. |
|
""" |
|
batch_norm_params = { |
|
'decay': batch_norm_decay, |
|
'epsilon': batch_norm_epsilon, |
|
'scale': batch_norm_scale, |
|
} |
|
if regularize_depthwise: |
|
depthwise_regularizer = slim.l2_regularizer(weight_decay) |
|
else: |
|
depthwise_regularizer = None |
|
activation_fn = tf.nn.relu6 if use_bounded_activation else tf.nn.relu |
|
batch_norm = utils.get_batch_norm_fn(sync_batch_norm_method) |
|
with slim.arg_scope( |
|
[slim.conv2d, slim.separable_conv2d], |
|
weights_initializer=tf.truncated_normal_initializer( |
|
stddev=weights_initializer_stddev), |
|
activation_fn=activation_fn, |
|
normalizer_fn=batch_norm if use_batch_norm else None): |
|
with slim.arg_scope([batch_norm], **batch_norm_params): |
|
with slim.arg_scope( |
|
[slim.conv2d], |
|
weights_regularizer=slim.l2_regularizer(weight_decay)): |
|
with slim.arg_scope( |
|
[slim.separable_conv2d], |
|
weights_regularizer=depthwise_regularizer): |
|
with slim.arg_scope( |
|
[xception_module], |
|
use_bounded_activation=use_bounded_activation, |
|
use_explicit_padding=not use_bounded_activation) as arg_sc: |
|
return arg_sc |
|
|