Spaces:
Sleeping
Sleeping
""" | |
This implementation of ResNet is a bit modification version of `https://github.com/rwightman/pytorch-image-models.git` | |
""" | |
from typing import List, Callable, Optional, Tuple, Type, Dict, Union | |
import math | |
import numpy as np | |
import torch | |
import torch.nn as nn | |
import torch.nn.functional as F | |
from .nn_module import Flatten | |
def to_2tuple(item: int) -> tuple: | |
""" | |
Overview: | |
Convert a scalar to a 2-tuple or return the item if it's not a scalar. | |
Arguments: | |
- item (:obj:`int`): An item to be converted to a 2-tuple. | |
Returns: | |
- (:obj:`tuple`): A 2-tuple of the item. | |
""" | |
if np.isscalar(item): | |
return (item, item) | |
else: | |
return item | |
# Calculate asymmetric TensorFlow-like 'SAME' padding for a convolution | |
def get_same_padding(x: int, k: int, s: int, d: int) -> int: | |
""" | |
Overview: | |
Calculate asymmetric TensorFlow-like 'SAME' padding for a convolution. | |
Arguments: | |
- x (:obj:`int`): The size of the input. | |
- k (:obj:`int`): The size of the kernel. | |
- s (:obj:`int`): The stride of the convolution. | |
- d (:obj:`int`): The dilation of the convolution. | |
Returns: | |
- (:obj:`int`): The size of the padding. | |
""" | |
return max((math.ceil(x / s) - 1) * s + (k - 1) * d + 1 - x, 0) | |
# Dynamically pad input x with 'SAME' padding for conv with specified args | |
def pad_same(x, k: List[int], s: List[int], d: List[int] = (1, 1), value: float = 0): | |
""" | |
Overview: | |
Dynamically pad input x with 'SAME' padding for conv with specified args. | |
Arguments: | |
- x (:obj:`Tensor`): The input tensor. | |
- k (:obj:`List[int]`): The size of the kernel. | |
- s (:obj:`List[int]`): The stride of the convolution. | |
- d (:obj:`List[int]`): The dilation of the convolution. | |
- value (:obj:`float`): Value to fill the padding. | |
Returns: | |
- (:obj:`Tensor`): The padded tensor. | |
""" | |
ih, iw = x.size()[-2:] | |
pad_h, pad_w = get_same_padding(ih, k[0], s[0], d[0]), get_same_padding(iw, k[1], s[1], d[1]) | |
if pad_h > 0 or pad_w > 0: | |
x = F.pad(x, [pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2], value=value) | |
return x | |
def avg_pool2d_same( | |
x, | |
kernel_size: List[int], | |
stride: List[int], | |
padding: List[int] = (0, 0), | |
ceil_mode: bool = False, | |
count_include_pad: bool = True | |
): | |
""" | |
Overview: | |
Apply average pooling with 'SAME' padding on the input tensor. | |
Arguments: | |
- x (:obj:`Tensor`): The input tensor. | |
- kernel_size (:obj:`List[int]`): The size of the kernel. | |
- stride (:obj:`List[int]`): The stride of the convolution. | |
- padding (:obj:`List[int]`): The size of the padding. | |
- ceil_mode (:obj:`bool`): When True, will use ceil instead of floor to compute the output shape. | |
- count_include_pad (:obj:`bool`): When True, will include the zero-padding in the averaging calculation. | |
Returns: | |
- (:obj:`Tensor`): The tensor after average pooling. | |
""" | |
# FIXME how to deal with count_include_pad vs not for external padding? | |
x = pad_same(x, kernel_size, stride) | |
return F.avg_pool2d(x, kernel_size, stride, (0, 0), ceil_mode, count_include_pad) | |
class AvgPool2dSame(nn.AvgPool2d): | |
""" | |
Overview: | |
Tensorflow-like 'SAME' wrapper for 2D average pooling. | |
Interfaces: | |
``__init__``, ``forward`` | |
""" | |
def __init__( | |
self, | |
kernel_size: int, | |
stride: Optional[Tuple[int, int]] = None, | |
padding: int = 0, | |
ceil_mode: bool = False, | |
count_include_pad: bool = True | |
) -> None: | |
""" | |
Overview: | |
Initialize the AvgPool2dSame with given arguments. | |
Arguments: | |
- kernel_size (:obj:`int`): The size of the window to take an average over. | |
- stride (:obj:`Optional[Tuple[int, int]]`): The stride of the window. If None, default to kernel_size. | |
- padding (:obj:`int`): Implicit zero padding to be added on both sides. | |
- ceil_mode (:obj:`bool`): When True, will use `ceil` instead of `floor` to compute the output shape. | |
- count_include_pad (:obj:`bool`): When True, will include the zero-padding in the averaging calculation. | |
""" | |
kernel_size = to_2tuple(kernel_size) | |
stride = to_2tuple(stride) | |
super(AvgPool2dSame, self).__init__(kernel_size, stride, (0, 0), ceil_mode, count_include_pad) | |
def forward(self, x: torch.Tensor) -> torch.Tensor: | |
""" | |
Overview: | |
Forward pass of the AvgPool2dSame. | |
Argument: | |
- x (:obj:`torch.Tensor`): Input tensor. | |
Returns: | |
- (:obj:`torch.Tensor`): Output tensor after average pooling. | |
""" | |
x = pad_same(x, self.kernel_size, self.stride) | |
return F.avg_pool2d(x, self.kernel_size, self.stride, self.padding, self.ceil_mode, self.count_include_pad) | |
def _create_pool(num_features: int, | |
num_classes: int, | |
pool_type: str = 'avg', | |
use_conv: bool = False) -> Tuple[nn.Module, int]: | |
""" | |
Overview: | |
Create a global pooling layer based on the given arguments. | |
Arguments: | |
- num_features (:obj:`int`): Number of input features. | |
- num_classes (:obj:`int`): Number of output classes. | |
- pool_type (:obj:`str`): Type of the pooling operation. Defaults to 'avg'. | |
- use_conv (:obj:`bool`): Whether to use convolutional layer after pooling. Defaults to False. | |
Returns: | |
- (:obj:`Tuple[nn.Module, int]`): The created global pooling layer and the number of pooled features. | |
""" | |
flatten_in_pool = not use_conv # flatten when we use a Linear layer after pooling | |
if not pool_type: | |
assert num_classes == 0 or use_conv, \ | |
'Pooling can only be disabled if classifier is also removed or conv classifier is used' | |
flatten_in_pool = False # disable flattening if pooling is pass-through (no pooling) | |
assert flatten_in_pool | |
global_pool = nn.AdaptiveAvgPool2d(1) | |
num_pooled_features = num_features * 1 | |
return global_pool, num_pooled_features | |
def _create_fc(num_features: int, num_classes: int, use_conv: bool = False) -> nn.Module: | |
""" | |
Overview: | |
Create a fully connected layer based on the given arguments. | |
Arguments: | |
- num_features (:obj:`int`): Number of input features. | |
- num_classes (:obj:`int`): Number of output classes. | |
- use_conv (:obj:`bool`): Whether to use convolutional layer. Defaults to False. | |
Returns: | |
- (:obj:`nn.Module`): The created fully connected layer. | |
""" | |
if num_classes <= 0: | |
fc = nn.Identity() # pass-through (no classifier) | |
elif use_conv: | |
fc = nn.Conv2d(num_features, num_classes, 1, bias=True) | |
else: | |
# use nn.Linear for simplification | |
fc = nn.Linear(num_features, num_classes, bias=True) | |
return fc | |
def create_classifier(num_features: int, | |
num_classes: int, | |
pool_type: str = 'avg', | |
use_conv: bool = False) -> Tuple[nn.Module, nn.Module]: | |
""" | |
Overview: | |
Create a classifier with global pooling layer and fully connected layer. | |
Arguments: | |
- num_features (:obj:`int`): The number of features. | |
- num_classes (:obj:`int`): The number of classes for the final classification. | |
- pool_type (:obj:`str`): The type of pooling to use; 'avg' for Average Pooling. | |
- use_conv (:obj:`bool`): Whether to use convolution or not. | |
Returns: | |
- global_pool (:obj:`nn.Module`): The created global pooling layer. | |
- fc (:obj:`nn.Module`): The created fully connected layer. | |
""" | |
assert pool_type == 'avg' | |
global_pool, num_pooled_features = _create_pool(num_features, num_classes, pool_type, use_conv=use_conv) | |
fc = _create_fc(num_pooled_features, num_classes, use_conv=use_conv) | |
return global_pool, fc | |
class ClassifierHead(nn.Module): | |
""" | |
Overview: | |
Classifier head with configurable global pooling and dropout. | |
Interfaces: | |
``__init__``, ``forward`` | |
""" | |
def __init__( | |
self, | |
in_chs: int, | |
num_classes: int, | |
pool_type: str = 'avg', | |
drop_rate: float = 0., | |
use_conv: bool = False | |
) -> None: | |
""" | |
Overview: | |
Initialize the ClassifierHead with given arguments. | |
Arguments: | |
- in_chs (:obj:`int`): Number of input channels. | |
- num_classes (:obj:`int`): Number of classes for the final classification. | |
- pool_type (:obj:`str`): The type of pooling to use; 'avg' for Average Pooling. | |
- drop_rate (:obj:`float`): The dropout rate. | |
- use_conv (:obj:`bool`): Whether to use convolution or not. | |
""" | |
super(ClassifierHead, self).__init__() | |
self.drop_rate = drop_rate | |
self.global_pool, num_pooled_features = _create_pool(in_chs, num_classes, pool_type, use_conv=use_conv) | |
self.fc = _create_fc(num_pooled_features, num_classes, use_conv=use_conv) | |
self.flatten = Flatten(1) if use_conv and pool_type else nn.Identity() | |
def forward(self, x: torch.Tensor) -> torch.Tensor: | |
""" | |
Overview: | |
Forward pass of the ClassifierHead. | |
Argument: | |
- x (:obj:`torch.Tensor`): Input tensor. | |
Returns: | |
- (:obj:`torch.Tensor`): Output tensor after classification. | |
""" | |
x = self.global_pool(x) | |
if self.drop_rate: | |
x = F.dropout(x, p=float(self.drop_rate), training=self.training) | |
x = self.fc(x) | |
x = self.flatten(x) | |
return x | |
def create_attn(layer: nn.Module, plane: int) -> None: | |
""" | |
Overview: | |
Create an attention mechanism. | |
Arguments: | |
- layer (:obj:`nn.Module`): The layer where the attention is to be applied. | |
- plane (:obj:`int`): The plane on which the attention is to be applied. | |
Returns: | |
- None | |
""" | |
return None | |
def get_padding(kernel_size: int, stride: int, dilation: int = 1) -> int: | |
""" | |
Overview: | |
Compute the padding based on the kernel size, stride and dilation. | |
Arguments: | |
- kernel_size (:obj:`int`): The size of the kernel. | |
- stride (:obj:`int`): The stride of the convolution. | |
- dilation (:obj:`int`): The dilation factor. | |
Returns: | |
- padding (:obj:`int`): The computed padding. | |
""" | |
padding = ((stride - 1) + dilation * (kernel_size - 1)) // 2 | |
return padding | |
class BasicBlock(nn.Module): | |
""" | |
Overview: | |
The basic building block for models like ResNet. This class extends pytorch's Module class. | |
It represents a standard block of layers including two convolutions, batch normalization, | |
an optional attention mechanism, and activation functions. | |
Interfaces: | |
``__init__``, ``forward``, ``zero_init_last_bn`` | |
Properties: | |
- expansion (:obj:int): Specifies the expansion factor for the planes of the conv layers. | |
""" | |
expansion = 1 | |
def __init__( | |
self, | |
inplanes: int, | |
planes: int, | |
stride: int = 1, | |
downsample: Callable = None, | |
cardinality: int = 1, | |
base_width: int = 64, | |
reduce_first: int = 1, | |
dilation: int = 1, | |
first_dilation: int = None, | |
act_layer: Callable = nn.ReLU, | |
norm_layer: Callable = nn.BatchNorm2d, | |
attn_layer: Callable = None, | |
aa_layer: Callable = None, | |
drop_block: Callable = None, | |
drop_path: Callable = None | |
) -> None: | |
""" | |
Overview: | |
Initialize the BasicBlock with given parameters. | |
Arguments: | |
- inplanes (:obj:`int`): Number of input channels. | |
- planes (:obj:`int`): Number of output channels. | |
- stride (:obj:`int`): The stride of the convolutional layer. | |
- downsample (:obj:`Callable`): Function for downsampling the inputs. | |
- cardinality (:obj:`int`): Group size for grouped convolution. | |
- base_width (:obj:`int`): Base width of the convolutions. | |
- reduce_first (:obj:`int`): Reduction factor for first convolution of each block. | |
- dilation (:obj:`int`): Spacing between kernel points. | |
- first_dilation (:obj:`int`): First dilation value. | |
- act_layer (:obj:`Callable`): Function for activation layer. | |
- norm_layer (:obj:`Callable`): Function for normalization layer. | |
- attn_layer (:obj:`Callable`): Function for attention layer. | |
- aa_layer (:obj:`Callable`): Function for anti-aliasing layer. | |
- drop_block (:obj:`Callable`): Method for dropping block. | |
- drop_path (:obj:`Callable`): Method for dropping path. | |
""" | |
super(BasicBlock, self).__init__() | |
assert cardinality == 1, 'BasicBlock only supports cardinality of 1' | |
assert base_width == 64, 'BasicBlock does not support changing base width' | |
first_planes = planes // reduce_first | |
outplanes = planes * self.expansion | |
first_dilation = first_dilation or dilation | |
use_aa = aa_layer is not None and (stride == 2 or first_dilation != dilation) | |
self.conv1 = nn.Conv2d( | |
inplanes, | |
first_planes, | |
kernel_size=3, | |
stride=1 if use_aa else stride, | |
padding=first_dilation, | |
dilation=first_dilation, | |
bias=False | |
) | |
self.bn1 = norm_layer(first_planes) | |
self.act1 = act_layer(inplace=True) | |
self.aa = aa_layer(channels=first_planes, stride=stride) if use_aa else None | |
self.conv2 = nn.Conv2d(first_planes, outplanes, kernel_size=3, padding=dilation, dilation=dilation, bias=False) | |
self.bn2 = norm_layer(outplanes) | |
self.se = create_attn(attn_layer, outplanes) | |
self.act2 = act_layer(inplace=True) | |
self.downsample = downsample | |
self.stride = stride | |
self.dilation = dilation | |
self.drop_block = drop_block | |
self.drop_path = drop_path | |
def zero_init_last_bn(self) -> None: | |
""" | |
Overview: | |
Initialize the batch normalization layer with zeros. | |
""" | |
nn.init.zeros_(self.bn2.weight) | |
def forward(self, x: torch.Tensor) -> torch.Tensor: | |
""" | |
Overview: | |
Defines the computation performed at every call. | |
Arguments: | |
- x (:obj:`torch.Tensor`): The input tensor. | |
Returns: | |
- output (:obj:`torch.Tensor`): The output tensor after passing through the BasicBlock. | |
""" | |
shortcut = x | |
x = self.conv1(x) | |
x = self.bn1(x) | |
if self.drop_block is not None: | |
x = self.drop_block(x) | |
x = self.act1(x) | |
if self.aa is not None: | |
x = self.aa(x) | |
x = self.conv2(x) | |
x = self.bn2(x) | |
if self.drop_block is not None: | |
x = self.drop_block(x) | |
if self.se is not None: | |
x = self.se(x) | |
if self.drop_path is not None: | |
x = self.drop_path(x) | |
if self.downsample is not None: | |
shortcut = self.downsample(shortcut) | |
x += shortcut | |
x = self.act2(x) | |
return x | |
class Bottleneck(nn.Module): | |
""" | |
Overview: | |
The Bottleneck class is a basic block used to build ResNet networks. It is a part of the PyTorch's | |
implementation of ResNet. This block is designed with several layers including a convolutional layer, | |
normalization layer, activation layer, attention layer, anti-aliasing layer, and a dropout layer. | |
Interfaces: | |
``__init__``, ``forward``, ``zero_init_last_bn`` | |
Properties: | |
expansion, inplanes, planes, stride, downsample, cardinality, base_width, reduce_first, dilation, \ | |
first_dilation, act_layer, norm_layer, attn_layer, aa_layer, drop_block, drop_path | |
""" | |
expansion = 4 | |
def __init__( | |
self, | |
inplanes: int, | |
planes: int, | |
stride: int = 1, | |
downsample: Optional[nn.Module] = None, | |
cardinality: int = 1, | |
base_width: int = 64, | |
reduce_first: int = 1, | |
dilation: int = 1, | |
first_dilation: Optional[int] = None, | |
act_layer: Type[nn.Module] = nn.ReLU, | |
norm_layer: Type[nn.Module] = nn.BatchNorm2d, | |
attn_layer: Optional[Type[nn.Module]] = None, | |
aa_layer: Optional[Type[nn.Module]] = None, | |
drop_block: Callable = None, | |
drop_path: Callable = None | |
) -> None: | |
""" | |
Overview: | |
Initialize the Bottleneck class with various parameters. | |
Arguments: | |
- inplanes (:obj:`int`): The number of input planes. | |
- planes (:obj:`int`): The number of output planes. | |
- stride (:obj:`int`, optional): The stride size, defaults to 1. | |
- downsample (:obj:`nn.Module`, optional): The downsample method, defaults to None. | |
- cardinality (:obj:`int`, optional): The size of the group convolutions, defaults to 1. | |
- base_width (:obj:`int`, optional): The base width, defaults to 64. | |
- reduce_first (:obj:`int`, optional): The first reduction factor, defaults to 1. | |
- dilation (:obj:`int`, optional): The dilation factor, defaults to 1. | |
- first_dilation (:obj:`int`, optional): The first dilation factor, defaults to None. | |
- act_layer (:obj:`Type[nn.Module]`, optional): The activation layer type, defaults to nn.ReLU. | |
- norm_layer (:obj:`Type[nn.Module]`, optional): The normalization layer type, defaults to nn.BatchNorm2d. | |
- attn_layer (:obj:`Type[nn.Module]`, optional): The attention layer type, defaults to None. | |
- aa_layer (:obj:`Type[nn.Module]`, optional): The anti-aliasing layer type, defaults to None. | |
- drop_block (:obj:`Callable`): The dropout block, defaults to None. | |
- drop_path (:obj:`Callable`): The drop path, defaults to None. | |
""" | |
super(Bottleneck, self).__init__() | |
width = int(math.floor(planes * (base_width / 64)) * cardinality) | |
first_planes = width // reduce_first | |
outplanes = planes * self.expansion | |
first_dilation = first_dilation or dilation | |
use_aa = aa_layer is not None and (stride == 2 or first_dilation != dilation) | |
self.conv1 = nn.Conv2d(inplanes, first_planes, kernel_size=1, bias=False) | |
self.bn1 = norm_layer(first_planes) | |
self.act1 = act_layer(inplace=True) | |
self.conv2 = nn.Conv2d( | |
first_planes, | |
width, | |
kernel_size=3, | |
stride=1 if use_aa else stride, | |
padding=first_dilation, | |
dilation=first_dilation, | |
groups=cardinality, | |
bias=False | |
) | |
self.bn2 = norm_layer(width) | |
self.act2 = act_layer(inplace=True) | |
self.aa = aa_layer(channels=width, stride=stride) if use_aa else None | |
self.conv3 = nn.Conv2d(width, outplanes, kernel_size=1, bias=False) | |
self.bn3 = norm_layer(outplanes) | |
self.se = create_attn(attn_layer, outplanes) | |
self.act3 = act_layer(inplace=True) | |
self.downsample = downsample | |
self.stride = stride | |
self.dilation = dilation | |
self.drop_block = drop_block | |
self.drop_path = drop_path | |
def zero_init_last_bn(self) -> None: | |
""" | |
Overview: | |
Initialize the last batch normalization layer with zero. | |
""" | |
nn.init.zeros_(self.bn3.weight) | |
def forward(self, x: torch.Tensor) -> torch.Tensor: | |
""" | |
Overview: | |
Defines the computation performed at every call. | |
Arguments: | |
- x (:obj:`Tensor`): The input tensor. | |
Returns: | |
- x (:obj:`Tensor`): The output tensor resulting from the computation. | |
""" | |
shortcut = x | |
x = self.conv1(x) | |
x = self.bn1(x) | |
if self.drop_block is not None: | |
x = self.drop_block(x) | |
x = self.act1(x) | |
x = self.conv2(x) | |
x = self.bn2(x) | |
if self.drop_block is not None: | |
x = self.drop_block(x) | |
x = self.act2(x) | |
if self.aa is not None: | |
x = self.aa(x) | |
x = self.conv3(x) | |
x = self.bn3(x) | |
if self.drop_block is not None: | |
x = self.drop_block(x) | |
if self.se is not None: | |
x = self.se(x) | |
if self.drop_path is not None: | |
x = self.drop_path(x) | |
if self.downsample is not None: | |
shortcut = self.downsample(shortcut) | |
x += shortcut | |
x = self.act3(x) | |
return x | |
def downsample_conv( | |
in_channels: int, | |
out_channels: int, | |
kernel_size: int, | |
stride: int = 1, | |
dilation: int = 1, | |
first_dilation: int = None, | |
norm_layer: Type[nn.Module] = None | |
) -> nn.Sequential: | |
""" | |
Overview: | |
Create a sequential module for downsampling that includes a convolution layer and a normalization layer. | |
Arguments: | |
- in_channels (:obj:`int`): The number of input channels. | |
- out_channels (:obj:`int`): The number of output channels. | |
- kernel_size (:obj:`int`): The size of the kernel. | |
- stride (:obj:`int`, optional): The stride size, defaults to 1. | |
- dilation (:obj:`int`, optional): The dilation factor, defaults to 1. | |
- first_dilation (:obj:`int`, optional): The first dilation factor, defaults to None. | |
- norm_layer (:obj:`Type[nn.Module]`, optional): The normalization layer type, defaults to nn.BatchNorm2d. | |
Returns: | |
- nn.Sequential: A sequence of layers performing downsampling through convolution. | |
""" | |
norm_layer = norm_layer or nn.BatchNorm2d | |
kernel_size = 1 if stride == 1 and dilation == 1 else kernel_size | |
first_dilation = (first_dilation or dilation) if kernel_size > 1 else 1 | |
p = get_padding(kernel_size, stride, first_dilation) | |
return nn.Sequential( | |
*[ | |
nn.Conv2d( | |
in_channels, out_channels, kernel_size, stride=stride, padding=p, dilation=first_dilation, bias=False | |
), | |
norm_layer(out_channels) | |
] | |
) | |
def downsample_avg( | |
in_channels: int, | |
out_channels: int, | |
kernel_size: int, | |
stride: int = 1, | |
dilation: int = 1, | |
first_dilation: int = None, | |
norm_layer: Type[nn.Module] = None | |
) -> nn.Sequential: | |
""" | |
Overview: | |
Create a sequential module for downsampling that includes an average pooling layer, a convolution layer, | |
and a normalization layer. | |
Arguments: | |
- in_channels (:obj:`int`): The number of input channels. | |
- out_channels (:obj:`int`): The number of output channels. | |
- kernel_size (:obj:`int`): The size of the kernel. | |
- stride (:obj:`int`, optional): The stride size, defaults to 1. | |
- dilation (:obj:`int`, optional): The dilation factor, defaults to 1. | |
- first_dilation (:obj:`int`, optional): The first dilation factor, defaults to None. | |
- norm_layer (:obj:`Type[nn.Module]`, optional): The normalization layer type, defaults to nn.BatchNorm2d. | |
Returns: | |
- nn.Sequential: A sequence of layers performing downsampling through average pooling. | |
""" | |
norm_layer = norm_layer or nn.BatchNorm2d | |
avg_stride = stride if dilation == 1 else 1 | |
if stride == 1 and dilation == 1: | |
pool = nn.Identity() | |
else: | |
avg_pool_fn = AvgPool2dSame if avg_stride == 1 and dilation > 1 else nn.AvgPool2d | |
pool = avg_pool_fn(2, avg_stride, ceil_mode=True, count_include_pad=False) | |
return nn.Sequential( | |
*[pool, | |
nn.Conv2d(in_channels, out_channels, 1, stride=1, padding=0, bias=False), | |
norm_layer(out_channels)] | |
) | |
def drop_blocks(drop_block_rate: float = 0.) -> List[None]: | |
""" | |
Overview: | |
Generate a list of None values based on the drop block rate. | |
Arguments: | |
- drop_block_rate (:obj:`float`, optional): The drop block rate, defaults to 0. | |
Returns: | |
- List[None]: A list of None values. | |
""" | |
assert drop_block_rate == 0., drop_block_rate | |
return [None for _ in range(4)] | |
def make_blocks( | |
block_fn: Type[nn.Module], | |
channels: List[int], | |
block_repeats: List[int], | |
inplanes: int, | |
reduce_first: int = 1, | |
output_stride: int = 32, | |
down_kernel_size: int = 1, | |
avg_down: bool = False, | |
drop_block_rate: float = 0., | |
drop_path_rate: float = 0., | |
**kwargs | |
) -> Tuple[List[Tuple[str, nn.Module]], List[Dict[str, Union[int, str]]]]: | |
""" | |
Overview: | |
Create a list of blocks for the network, with each block having a given number of repeats. Also, create a | |
feature info list that contains information about the output of each block. | |
Arguments: | |
- block_fn (:obj:`Type[nn.Module]`): The type of block to use. | |
- channels (:obj:`List[int]`): The list of output channels for each block. | |
- block_repeats (:obj:`List[int]`): The list of number of repeats for each block. | |
- inplanes (:obj:`int`): The number of input planes. | |
- reduce_first (:obj:`int`, optional): The first reduction factor, defaults to 1. | |
- output_stride (:obj:`int`, optional): The total stride of the network, defaults to 32. | |
- down_kernel_size (:obj:`int`, optional): The size of the downsample kernel, defaults to 1. | |
- avg_down (:obj:`bool`, optional): Whether to use average pooling for downsampling, defaults to False. | |
- drop_block_rate (:obj:`float`, optional): The drop block rate, defaults to 0. | |
- drop_path_rate (:obj:`float`, optional): The drop path rate, defaults to 0. | |
- **kwargs: Additional keyword arguments. | |
Returns: | |
- Tuple[List[Tuple[str, nn.Module]], List[Dict[str, Union[int, str]]]]: \ | |
A tuple that includes a list of blocks for the network and a feature info list. | |
""" | |
stages = [] | |
feature_info = [] | |
net_num_blocks = sum(block_repeats) | |
net_block_idx = 0 | |
net_stride = 4 | |
dilation = prev_dilation = 1 | |
for stage_idx, (planes, num_blocks, db) in enumerate(zip(channels, block_repeats, drop_blocks(drop_block_rate))): | |
stage_name = f'layer{stage_idx + 1}' # never liked this name, but weight compat requires it | |
stride = 1 if stage_idx == 0 else 2 | |
if net_stride >= output_stride: | |
dilation *= stride | |
stride = 1 | |
else: | |
net_stride *= stride | |
downsample = None | |
if stride != 1 or inplanes != planes * block_fn.expansion: | |
down_kwargs = dict( | |
in_channels=inplanes, | |
out_channels=planes * block_fn.expansion, | |
kernel_size=down_kernel_size, | |
stride=stride, | |
dilation=dilation, | |
first_dilation=prev_dilation, | |
norm_layer=kwargs.get('norm_layer') | |
) | |
downsample = downsample_avg(**down_kwargs) if avg_down else downsample_conv(**down_kwargs) | |
block_kwargs = dict(reduce_first=reduce_first, dilation=dilation, drop_block=db, **kwargs) | |
blocks = [] | |
for block_idx in range(num_blocks): | |
downsample = downsample if block_idx == 0 else None | |
stride = stride if block_idx == 0 else 1 | |
block_dpr = drop_path_rate * net_block_idx / (net_num_blocks - 1) # stochastic depth linear decay rule | |
blocks.append( | |
block_fn( | |
inplanes, planes, stride, downsample, first_dilation=prev_dilation, drop_path=None, **block_kwargs | |
) | |
) | |
prev_dilation = dilation | |
inplanes = planes * block_fn.expansion | |
net_block_idx += 1 | |
stages.append((stage_name, nn.Sequential(*blocks))) | |
feature_info.append(dict(num_chs=inplanes, reduction=net_stride, module=stage_name)) | |
return stages, feature_info | |
class ResNet(nn.Module): | |
""" | |
Overview: | |
Implements ResNet, ResNeXt, SE-ResNeXt, and SENet models. This implementation supports various modifications | |
based on the v1c, v1d, v1e, and v1s variants included in the MXNet Gluon ResNetV1b model. For more details | |
about the variants and options, please refer to the 'Bag of Tricks' paper: https://arxiv.org/pdf/1812.01187. | |
Interfaces: | |
``__init__``, ``forward``, ``zero_init_last_bn``, ``get_classifier`` | |
""" | |
def __init__( | |
self, | |
block: nn.Module, | |
layers: List[int], | |
num_classes: int = 1000, | |
in_chans: int = 3, | |
cardinality: int = 1, | |
base_width: int = 64, | |
stem_width: int = 64, | |
stem_type: str = '', | |
replace_stem_pool: bool = False, | |
output_stride: int = 32, | |
block_reduce_first: int = 1, | |
down_kernel_size: int = 1, | |
avg_down: bool = False, | |
act_layer: nn.Module = nn.ReLU, | |
norm_layer: nn.Module = nn.BatchNorm2d, | |
aa_layer: Optional[nn.Module] = None, | |
drop_rate: float = 0.0, | |
drop_path_rate: float = 0.0, | |
drop_block_rate: float = 0.0, | |
global_pool: str = 'avg', | |
zero_init_last_bn: bool = True, | |
block_args: Optional[dict] = None | |
) -> None: | |
""" | |
Overview: | |
Initialize the ResNet model with given block, layers and other configuration options. | |
Arguments: | |
- block (:obj:`nn.Module`): Class for the residual block. | |
- layers (:obj:`List[int]`): Numbers of layers in each block. | |
- num_classes (:obj:`int`, optional): Number of classification classes. Default is 1000. | |
- in_chans (:obj:`int`, optional): Number of input (color) channels. Default is 3. | |
- cardinality (:obj:`int`, optional): Number of convolution groups for 3x3 conv in Bottleneck. Default is 1. | |
- base_width (:obj:`int`, optional): Factor determining bottleneck channels. Default is 64. | |
- stem_width (:obj:`int`, optional): Number of channels in stem convolutions. Default is 64. | |
- stem_type (:obj:`str`, optional): The type of stem. Default is ''. | |
- replace_stem_pool (:obj:`bool`, optional): Whether to replace stem pooling. Default is False. | |
- output_stride (:obj:`int`, optional): Output stride of the network. Default is 32. | |
- block_reduce_first (:obj:`int`, optional): Reduction factor for first convolution output width of \ | |
residual blocks. Default is 1. | |
- down_kernel_size (:obj:`int`, optional): Kernel size of residual block downsampling path. Default is 1. | |
- avg_down (:obj:`bool`, optional): Whether to use average pooling for projection skip connection between | |
stages/downsample. Default is False. | |
- act_layer (:obj:`nn.Module`, optional): Activation layer. Default is nn.ReLU. | |
- norm_layer (:obj:`nn.Module`, optional): Normalization layer. Default is nn.BatchNorm2d. | |
- aa_layer (:obj:`Optional[nn.Module]`, optional): Anti-aliasing layer. Default is None. | |
- drop_rate (:obj:`float`, optional): Dropout probability before classifier, for training. Default is 0.0. | |
- drop_path_rate (:obj:`float`, optional): Drop path rate. Default is 0.0. | |
- drop_block_rate (:obj:`float`, optional): Drop block rate. Default is 0.0. | |
- global_pool (:obj:`str`, optional): Global pooling type. Default is 'avg'. | |
- zero_init_last_bn (:obj:`bool`, optional): Whether to initialize last batch normalization with zero. \ | |
Default is True. | |
- block_args (:obj:`Optional[dict]`, optional): Additional arguments for block. Default is None. | |
""" | |
block_args = block_args or dict() | |
assert output_stride in (8, 16, 32) | |
self.num_classes = num_classes | |
self.drop_rate = drop_rate | |
super(ResNet, self).__init__() | |
# Stem | |
deep_stem = 'deep' in stem_type | |
inplanes = stem_width * 2 if deep_stem else 64 | |
if deep_stem: | |
stem_chs = (stem_width, stem_width) | |
if 'tiered' in stem_type: | |
stem_chs = (3 * (stem_width // 4), stem_width) | |
self.conv1 = nn.Sequential( | |
*[ | |
nn.Conv2d(in_chans, stem_chs[0], 3, stride=2, padding=1, bias=False), | |
norm_layer(stem_chs[0]), | |
act_layer(inplace=True), | |
nn.Conv2d(stem_chs[0], stem_chs[1], 3, stride=1, padding=1, bias=False), | |
norm_layer(stem_chs[1]), | |
act_layer(inplace=True), | |
nn.Conv2d(stem_chs[1], inplanes, 3, stride=1, padding=1, bias=False) | |
] | |
) | |
else: | |
self.conv1 = nn.Conv2d(in_chans, inplanes, kernel_size=7, stride=2, padding=3, bias=False) | |
self.bn1 = norm_layer(inplanes) | |
self.act1 = act_layer(inplace=True) | |
self.feature_info = [dict(num_chs=inplanes, reduction=2, module='act1')] | |
# Stem Pooling | |
if replace_stem_pool: | |
self.maxpool = nn.Sequential( | |
*filter( | |
None, [ | |
nn.Conv2d(inplanes, inplanes, 3, stride=1 if aa_layer else 2, padding=1, bias=False), | |
aa_layer(channels=inplanes, stride=2) if aa_layer else None, | |
norm_layer(inplanes), | |
act_layer(inplace=True) | |
] | |
) | |
) | |
else: | |
if aa_layer is not None: | |
self.maxpool = nn.Sequential( | |
*[nn.MaxPool2d(kernel_size=3, stride=1, padding=1), | |
aa_layer(channels=inplanes, stride=2)] | |
) | |
else: | |
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) | |
# Feature Blocks | |
channels = [64, 128, 256, 512] | |
stage_modules, stage_feature_info = make_blocks( | |
block, | |
channels, | |
layers, | |
inplanes, | |
cardinality=cardinality, | |
base_width=base_width, | |
output_stride=output_stride, | |
reduce_first=block_reduce_first, | |
avg_down=avg_down, | |
down_kernel_size=down_kernel_size, | |
act_layer=act_layer, | |
norm_layer=norm_layer, | |
aa_layer=aa_layer, | |
drop_block_rate=drop_block_rate, | |
drop_path_rate=drop_path_rate, | |
**block_args | |
) | |
for stage in stage_modules: | |
self.add_module(*stage) # layer1, layer2, etc | |
self.feature_info.extend(stage_feature_info) | |
# Head (Pooling and Classifier) | |
self.num_features = 512 * block.expansion | |
self.global_pool, self.fc = create_classifier(self.num_features, self.num_classes, pool_type=global_pool) | |
self.init_weights(zero_init_last_bn=zero_init_last_bn) | |
def init_weights(self, zero_init_last_bn: bool = True) -> None: | |
""" | |
Overview: | |
Initialize the weights in the model. | |
Arguments: | |
- zero_init_last_bn (:obj:`bool`, optional): Whether to initialize last batch normalization with zero. | |
Default is True. | |
""" | |
for n, m in self.named_modules(): | |
if isinstance(m, nn.Conv2d): | |
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') | |
elif isinstance(m, nn.BatchNorm2d): | |
nn.init.ones_(m.weight) | |
nn.init.zeros_(m.bias) | |
if zero_init_last_bn: | |
for m in self.modules(): | |
if hasattr(m, 'zero_init_last_bn'): | |
m.zero_init_last_bn() | |
def get_classifier(self) -> nn.Module: | |
""" | |
Overview: | |
Get the classifier module from the model. | |
Returns: | |
- classifier (:obj:`nn.Module`): The classifier module in the model. | |
""" | |
return self.fc | |
def reset_classifier(self, num_classes: int, global_pool: str = 'avg') -> None: | |
""" | |
Overview: | |
Reset the classifier with a new number of classes and pooling type. | |
Arguments: | |
- num_classes (:obj:`int`): New number of classification classes. | |
- global_pool (:obj:`str`, optional): New global pooling type. Default is 'avg'. | |
""" | |
self.num_classes = num_classes | |
self.global_pool, self.fc = create_classifier(self.num_features, self.num_classes, pool_type=global_pool) | |
def forward_features(self, x: torch.Tensor) -> torch.Tensor: | |
""" | |
Overview: | |
Forward pass through the feature layers of the model. | |
Arguments: | |
- x (:obj:`torch.Tensor`): The input tensor. | |
Returns: | |
- x (:obj:`torch.Tensor`): The output tensor after passing through feature layers. | |
""" | |
x = self.conv1(x) | |
x = self.bn1(x) | |
x = self.act1(x) | |
x = self.maxpool(x) | |
x = self.layer1(x) | |
x = self.layer2(x) | |
x = self.layer3(x) | |
x = self.layer4(x) | |
return x | |
def forward(self, x: torch.Tensor) -> torch.Tensor: | |
""" | |
Overview: | |
Full forward pass through the model. | |
Arguments: | |
- x (:obj:`torch.Tensor`): The input tensor. | |
Returns: | |
- x (:obj:`torch.Tensor`): The output tensor after passing through the model. | |
""" | |
x = self.forward_features(x) | |
x = self.global_pool(x) | |
x = x.view(x.shape[0], -1) | |
if self.drop_rate: | |
x = F.dropout(x, p=float(self.drop_rate), training=self.training) | |
x = self.fc(x) | |
return x | |
def resnet18() -> nn.Module: | |
""" | |
Overview: | |
Creates a ResNet18 model. | |
Returns: | |
- model (:obj:`nn.Module`): ResNet18 model. | |
""" | |
return ResNet(block=BasicBlock, layers=[2, 2, 2, 2]) | |