Spaces:
Runtime error
Runtime error
# Copyright 2023 The TensorFlow Authors. All Rights Reserved. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
"""Contains definitions of EfficientNet Networks.""" | |
import math | |
from typing import Any, List, Tuple | |
# Import libraries | |
import tensorflow as tf, tf_keras | |
from official.modeling import hyperparams | |
from official.modeling import tf_utils | |
from official.vision.modeling.backbones import factory | |
from official.vision.modeling.layers import nn_blocks | |
from official.vision.modeling.layers import nn_layers | |
layers = tf_keras.layers | |
# The fixed EfficientNet-B0 architecture discovered by NAS. | |
# Each element represents a specification of a building block: | |
# (block_fn, block_repeats, kernel_size, strides, expand_ratio, in_filters, | |
# out_filters, is_output) | |
EN_B0_BLOCK_SPECS = [ | |
('mbconv', 1, 3, 1, 1, 32, 16, False), | |
('mbconv', 2, 3, 2, 6, 16, 24, True), | |
('mbconv', 2, 5, 2, 6, 24, 40, True), | |
('mbconv', 3, 3, 2, 6, 40, 80, False), | |
('mbconv', 3, 5, 1, 6, 80, 112, True), | |
('mbconv', 4, 5, 2, 6, 112, 192, False), | |
('mbconv', 1, 3, 1, 6, 192, 320, True), | |
] | |
SCALING_MAP = { | |
'b0': dict(width_scale=1.0, depth_scale=1.0), | |
'b1': dict(width_scale=1.0, depth_scale=1.1), | |
'b2': dict(width_scale=1.1, depth_scale=1.2), | |
'b3': dict(width_scale=1.2, depth_scale=1.4), | |
'b4': dict(width_scale=1.4, depth_scale=1.8), | |
'b5': dict(width_scale=1.6, depth_scale=2.2), | |
'b6': dict(width_scale=1.8, depth_scale=2.6), | |
'b7': dict(width_scale=2.0, depth_scale=3.1), | |
} | |
class BlockSpec(): | |
"""A container class that specifies the block configuration for MnasNet.""" | |
def __init__(self, block_fn: str, block_repeats: int, kernel_size: int, | |
strides: int, expand_ratio: float, in_filters: int, | |
out_filters: int, is_output: bool, width_scale: float, | |
depth_scale: float): | |
self.block_fn = block_fn | |
self.block_repeats = round_repeats(block_repeats, depth_scale) | |
self.kernel_size = kernel_size | |
self.strides = strides | |
self.expand_ratio = expand_ratio | |
self.in_filters = nn_layers.round_filters(in_filters, width_scale) | |
self.out_filters = nn_layers.round_filters(out_filters, width_scale) | |
self.is_output = is_output | |
def round_repeats(repeats: int, multiplier: float, skip: bool = False) -> int: | |
"""Returns rounded number of filters based on depth multiplier.""" | |
if skip or not multiplier: | |
return repeats | |
return int(math.ceil(multiplier * repeats)) | |
def block_spec_decoder(specs: List[Tuple[Any, ...]], width_scale: float, | |
depth_scale: float) -> List[BlockSpec]: | |
"""Decodes and returns specs for a block.""" | |
decoded_specs = [] | |
for s in specs: | |
s = s + ( | |
width_scale, | |
depth_scale, | |
) | |
decoded_specs.append(BlockSpec(*s)) | |
return decoded_specs | |
class EfficientNet(tf_keras.Model): | |
"""Creates an EfficientNet family model. | |
This implements the EfficientNet model from: | |
Mingxing Tan, Quoc V. Le. | |
EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks. | |
(https://arxiv.org/pdf/1905.11946) | |
""" | |
def __init__(self, | |
model_id: str, | |
input_specs: tf_keras.layers.InputSpec = layers.InputSpec( | |
shape=[None, None, None, 3]), | |
se_ratio: float = 0.0, | |
stochastic_depth_drop_rate: float = 0.0, | |
kernel_initializer: str = 'VarianceScaling', | |
kernel_regularizer: tf_keras.regularizers.Regularizer = None, | |
bias_regularizer: tf_keras.regularizers.Regularizer = None, | |
activation: str = 'relu', | |
se_inner_activation: str = 'relu', | |
use_sync_bn: bool = False, | |
norm_momentum: float = 0.99, | |
norm_epsilon: float = 0.001, # pytype: disable=annotation-type-mismatch # typed-keras | |
**kwargs): | |
"""Initializes an EfficientNet model. | |
Args: | |
model_id: A `str` of model ID of EfficientNet. | |
input_specs: A `tf_keras.layers.InputSpec` of the input tensor. | |
se_ratio: A `float` of squeeze and excitation ratio for inverted | |
bottleneck blocks. | |
stochastic_depth_drop_rate: A `float` of drop rate for drop connect layer. | |
kernel_initializer: A `str` for kernel initializer of convolutional | |
layers. | |
kernel_regularizer: A `tf_keras.regularizers.Regularizer` object for | |
Conv2D. Default to None. | |
bias_regularizer: A `tf_keras.regularizers.Regularizer` object for Conv2D. | |
Default to None. | |
activation: A `str` of name of the activation function. | |
se_inner_activation: A `str` of name of the activation function used in | |
Sequeeze and Excitation layer. | |
use_sync_bn: If True, use synchronized batch normalization. | |
norm_momentum: A `float` of normalization momentum for the moving average. | |
norm_epsilon: A `float` added to variance to avoid dividing by zero. | |
**kwargs: Additional keyword arguments to be passed. | |
""" | |
self._model_id = model_id | |
self._input_specs = input_specs | |
self._se_ratio = se_ratio | |
self._stochastic_depth_drop_rate = stochastic_depth_drop_rate | |
self._use_sync_bn = use_sync_bn | |
self._activation = activation | |
self._se_inner_activation = se_inner_activation | |
self._kernel_initializer = kernel_initializer | |
self._norm_momentum = norm_momentum | |
self._norm_epsilon = norm_epsilon | |
self._kernel_regularizer = kernel_regularizer | |
self._bias_regularizer = bias_regularizer | |
self._norm = layers.BatchNormalization | |
if tf_keras.backend.image_data_format() == 'channels_last': | |
bn_axis = -1 | |
else: | |
bn_axis = 1 | |
# Build EfficientNet. | |
inputs = tf_keras.Input(shape=input_specs.shape[1:]) | |
width_scale = SCALING_MAP[model_id]['width_scale'] | |
depth_scale = SCALING_MAP[model_id]['depth_scale'] | |
# Build stem. | |
x = layers.Conv2D( | |
filters=nn_layers.round_filters(32, width_scale), | |
kernel_size=3, | |
strides=2, | |
use_bias=False, | |
padding='same', | |
kernel_initializer=self._kernel_initializer, | |
kernel_regularizer=self._kernel_regularizer, | |
bias_regularizer=self._bias_regularizer)( | |
inputs) | |
x = self._norm( | |
axis=bn_axis, | |
momentum=norm_momentum, | |
epsilon=norm_epsilon, | |
synchronized=use_sync_bn)( | |
x) | |
x = tf_utils.get_activation(activation)(x) | |
# Build intermediate blocks. | |
endpoints = {} | |
endpoint_level = 2 | |
decoded_specs = block_spec_decoder(EN_B0_BLOCK_SPECS, width_scale, | |
depth_scale) | |
for i, specs in enumerate(decoded_specs): | |
x = self._block_group( | |
inputs=x, specs=specs, name='block_group_{}'.format(i)) | |
if specs.is_output: | |
endpoints[str(endpoint_level)] = x | |
endpoint_level += 1 | |
# Build output specs for downstream tasks. | |
self._output_specs = {l: endpoints[l].get_shape() for l in endpoints} | |
# Build the final conv for classification. | |
x = layers.Conv2D( | |
filters=nn_layers.round_filters(1280, width_scale), | |
kernel_size=1, | |
strides=1, | |
use_bias=False, | |
padding='same', | |
kernel_initializer=self._kernel_initializer, | |
kernel_regularizer=self._kernel_regularizer, | |
bias_regularizer=self._bias_regularizer)( | |
x) | |
x = self._norm( | |
axis=bn_axis, | |
momentum=norm_momentum, | |
epsilon=norm_epsilon, | |
synchronized=use_sync_bn)( | |
x) | |
endpoints[str(endpoint_level)] = tf_utils.get_activation(activation)(x) | |
super(EfficientNet, self).__init__( | |
inputs=inputs, outputs=endpoints, **kwargs) | |
def _block_group(self, | |
inputs: tf.Tensor, | |
specs: BlockSpec, | |
name: str = 'block_group'): | |
"""Creates one group of blocks for the EfficientNet model. | |
Args: | |
inputs: A `tf.Tensor` of size `[batch, channels, height, width]`. | |
specs: The specifications for one inverted bottleneck block group. | |
name: A `str` name for the block. | |
Returns: | |
The output `tf.Tensor` of the block layer. | |
""" | |
if specs.block_fn == 'mbconv': | |
block_fn = nn_blocks.InvertedBottleneckBlock | |
else: | |
raise ValueError('Block func {} not supported.'.format(specs.block_fn)) | |
x = block_fn( | |
in_filters=specs.in_filters, | |
out_filters=specs.out_filters, | |
expand_ratio=specs.expand_ratio, | |
strides=specs.strides, | |
kernel_size=specs.kernel_size, | |
se_ratio=self._se_ratio, | |
stochastic_depth_drop_rate=self._stochastic_depth_drop_rate, | |
kernel_initializer=self._kernel_initializer, | |
kernel_regularizer=self._kernel_regularizer, | |
bias_regularizer=self._bias_regularizer, | |
activation=self._activation, | |
se_inner_activation=self._se_inner_activation, | |
use_sync_bn=self._use_sync_bn, | |
norm_momentum=self._norm_momentum, | |
norm_epsilon=self._norm_epsilon)( | |
inputs) | |
for _ in range(1, specs.block_repeats): | |
x = block_fn( | |
in_filters=specs.out_filters, # Set 'in_filters' to 'out_filters'. | |
out_filters=specs.out_filters, | |
expand_ratio=specs.expand_ratio, | |
strides=1, # Fix strides to 1. | |
kernel_size=specs.kernel_size, | |
se_ratio=self._se_ratio, | |
stochastic_depth_drop_rate=self._stochastic_depth_drop_rate, | |
kernel_initializer=self._kernel_initializer, | |
kernel_regularizer=self._kernel_regularizer, | |
bias_regularizer=self._bias_regularizer, | |
activation=self._activation, | |
se_inner_activation=self._se_inner_activation, | |
use_sync_bn=self._use_sync_bn, | |
norm_momentum=self._norm_momentum, | |
norm_epsilon=self._norm_epsilon)( | |
x) | |
return tf.identity(x, name=name) | |
def get_config(self): | |
config_dict = { | |
'model_id': self._model_id, | |
'se_ratio': self._se_ratio, | |
'stochastic_depth_drop_rate': self._stochastic_depth_drop_rate, | |
'kernel_initializer': self._kernel_initializer, | |
'kernel_regularizer': self._kernel_regularizer, | |
'bias_regularizer': self._bias_regularizer, | |
'activation': self._activation, | |
'use_sync_bn': self._use_sync_bn, | |
'norm_momentum': self._norm_momentum, | |
'norm_epsilon': self._norm_epsilon | |
} | |
return config_dict | |
def from_config(cls, config, custom_objects=None): | |
return cls(**config) | |
def output_specs(self): | |
"""A dict of {level: TensorShape} pairs for the model output.""" | |
return self._output_specs | |
def build_efficientnet( | |
input_specs: tf_keras.layers.InputSpec, | |
backbone_config: hyperparams.Config, | |
norm_activation_config: hyperparams.Config, | |
l2_regularizer: tf_keras.regularizers.Regularizer = None, | |
se_inner_activation: str = 'relu') -> tf_keras.Model: # pytype: disable=annotation-type-mismatch # typed-keras | |
"""Builds EfficientNet backbone from a config.""" | |
backbone_type = backbone_config.type | |
backbone_cfg = backbone_config.get() | |
assert backbone_type == 'efficientnet', (f'Inconsistent backbone type ' | |
f'{backbone_type}') | |
return EfficientNet( | |
model_id=backbone_cfg.model_id, | |
input_specs=input_specs, | |
stochastic_depth_drop_rate=backbone_cfg.stochastic_depth_drop_rate, | |
se_ratio=backbone_cfg.se_ratio, | |
activation=norm_activation_config.activation, | |
use_sync_bn=norm_activation_config.use_sync_bn, | |
norm_momentum=norm_activation_config.norm_momentum, | |
norm_epsilon=norm_activation_config.norm_epsilon, | |
kernel_regularizer=l2_regularizer, | |
se_inner_activation=se_inner_activation) | |