Spaces:
Runtime error
Runtime error
# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
""" | |
This code is refer from: | |
https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.3/ppdet/modeling/necks/fpn.py | |
""" | |
import paddle.nn as nn | |
import paddle.nn.functional as F | |
from paddle import ParamAttr | |
from paddle.nn.initializer import XavierUniform | |
from paddle.nn.initializer import Normal | |
from paddle.regularizer import L2Decay | |
__all__ = ['FCEFPN'] | |
class ConvNormLayer(nn.Layer): | |
def __init__(self, | |
ch_in, | |
ch_out, | |
filter_size, | |
stride, | |
groups=1, | |
norm_type='bn', | |
norm_decay=0., | |
norm_groups=32, | |
lr_scale=1., | |
freeze_norm=False, | |
initializer=Normal( | |
mean=0., std=0.01)): | |
super(ConvNormLayer, self).__init__() | |
assert norm_type in ['bn', 'sync_bn', 'gn'] | |
bias_attr = False | |
self.conv = nn.Conv2D( | |
in_channels=ch_in, | |
out_channels=ch_out, | |
kernel_size=filter_size, | |
stride=stride, | |
padding=(filter_size - 1) // 2, | |
groups=groups, | |
weight_attr=ParamAttr( | |
initializer=initializer, learning_rate=1.), | |
bias_attr=bias_attr) | |
norm_lr = 0. if freeze_norm else 1. | |
param_attr = ParamAttr( | |
learning_rate=norm_lr, | |
regularizer=L2Decay(norm_decay) if norm_decay is not None else None) | |
bias_attr = ParamAttr( | |
learning_rate=norm_lr, | |
regularizer=L2Decay(norm_decay) if norm_decay is not None else None) | |
if norm_type == 'bn': | |
self.norm = nn.BatchNorm2D( | |
ch_out, weight_attr=param_attr, bias_attr=bias_attr) | |
elif norm_type == 'sync_bn': | |
self.norm = nn.SyncBatchNorm( | |
ch_out, weight_attr=param_attr, bias_attr=bias_attr) | |
elif norm_type == 'gn': | |
self.norm = nn.GroupNorm( | |
num_groups=norm_groups, | |
num_channels=ch_out, | |
weight_attr=param_attr, | |
bias_attr=bias_attr) | |
def forward(self, inputs): | |
out = self.conv(inputs) | |
out = self.norm(out) | |
return out | |
class FCEFPN(nn.Layer): | |
""" | |
Feature Pyramid Network, see https://arxiv.org/abs/1612.03144 | |
Args: | |
in_channels (list[int]): input channels of each level which can be | |
derived from the output shape of backbone by from_config | |
out_channels (list[int]): output channel of each level | |
spatial_scales (list[float]): the spatial scales between input feature | |
maps and original input image which can be derived from the output | |
shape of backbone by from_config | |
has_extra_convs (bool): whether to add extra conv to the last level. | |
default False | |
extra_stage (int): the number of extra stages added to the last level. | |
default 1 | |
use_c5 (bool): Whether to use c5 as the input of extra stage, | |
otherwise p5 is used. default True | |
norm_type (string|None): The normalization type in FPN module. If | |
norm_type is None, norm will not be used after conv and if | |
norm_type is string, bn, gn, sync_bn are available. default None | |
norm_decay (float): weight decay for normalization layer weights. | |
default 0. | |
freeze_norm (bool): whether to freeze normalization layer. | |
default False | |
relu_before_extra_convs (bool): whether to add relu before extra convs. | |
default False | |
""" | |
def __init__(self, | |
in_channels, | |
out_channels, | |
spatial_scales=[0.25, 0.125, 0.0625, 0.03125], | |
has_extra_convs=False, | |
extra_stage=1, | |
use_c5=True, | |
norm_type=None, | |
norm_decay=0., | |
freeze_norm=False, | |
relu_before_extra_convs=True): | |
super(FCEFPN, self).__init__() | |
self.out_channels = out_channels | |
for s in range(extra_stage): | |
spatial_scales = spatial_scales + [spatial_scales[-1] / 2.] | |
self.spatial_scales = spatial_scales | |
self.has_extra_convs = has_extra_convs | |
self.extra_stage = extra_stage | |
self.use_c5 = use_c5 | |
self.relu_before_extra_convs = relu_before_extra_convs | |
self.norm_type = norm_type | |
self.norm_decay = norm_decay | |
self.freeze_norm = freeze_norm | |
self.lateral_convs = [] | |
self.fpn_convs = [] | |
fan = out_channels * 3 * 3 | |
# stage index 0,1,2,3 stands for res2,res3,res4,res5 on ResNet Backbone | |
# 0 <= st_stage < ed_stage <= 3 | |
st_stage = 4 - len(in_channels) | |
ed_stage = st_stage + len(in_channels) - 1 | |
for i in range(st_stage, ed_stage + 1): | |
if i == 3: | |
lateral_name = 'fpn_inner_res5_sum' | |
else: | |
lateral_name = 'fpn_inner_res{}_sum_lateral'.format(i + 2) | |
in_c = in_channels[i - st_stage] | |
if self.norm_type is not None: | |
lateral = self.add_sublayer( | |
lateral_name, | |
ConvNormLayer( | |
ch_in=in_c, | |
ch_out=out_channels, | |
filter_size=1, | |
stride=1, | |
norm_type=self.norm_type, | |
norm_decay=self.norm_decay, | |
freeze_norm=self.freeze_norm, | |
initializer=XavierUniform(fan_out=in_c))) | |
else: | |
lateral = self.add_sublayer( | |
lateral_name, | |
nn.Conv2D( | |
in_channels=in_c, | |
out_channels=out_channels, | |
kernel_size=1, | |
weight_attr=ParamAttr( | |
initializer=XavierUniform(fan_out=in_c)))) | |
self.lateral_convs.append(lateral) | |
for i in range(st_stage, ed_stage + 1): | |
fpn_name = 'fpn_res{}_sum'.format(i + 2) | |
if self.norm_type is not None: | |
fpn_conv = self.add_sublayer( | |
fpn_name, | |
ConvNormLayer( | |
ch_in=out_channels, | |
ch_out=out_channels, | |
filter_size=3, | |
stride=1, | |
norm_type=self.norm_type, | |
norm_decay=self.norm_decay, | |
freeze_norm=self.freeze_norm, | |
initializer=XavierUniform(fan_out=fan))) | |
else: | |
fpn_conv = self.add_sublayer( | |
fpn_name, | |
nn.Conv2D( | |
in_channels=out_channels, | |
out_channels=out_channels, | |
kernel_size=3, | |
padding=1, | |
weight_attr=ParamAttr( | |
initializer=XavierUniform(fan_out=fan)))) | |
self.fpn_convs.append(fpn_conv) | |
# add extra conv levels for RetinaNet(use_c5)/FCOS(use_p5) | |
if self.has_extra_convs: | |
for i in range(self.extra_stage): | |
lvl = ed_stage + 1 + i | |
if i == 0 and self.use_c5: | |
in_c = in_channels[-1] | |
else: | |
in_c = out_channels | |
extra_fpn_name = 'fpn_{}'.format(lvl + 2) | |
if self.norm_type is not None: | |
extra_fpn_conv = self.add_sublayer( | |
extra_fpn_name, | |
ConvNormLayer( | |
ch_in=in_c, | |
ch_out=out_channels, | |
filter_size=3, | |
stride=2, | |
norm_type=self.norm_type, | |
norm_decay=self.norm_decay, | |
freeze_norm=self.freeze_norm, | |
initializer=XavierUniform(fan_out=fan))) | |
else: | |
extra_fpn_conv = self.add_sublayer( | |
extra_fpn_name, | |
nn.Conv2D( | |
in_channels=in_c, | |
out_channels=out_channels, | |
kernel_size=3, | |
stride=2, | |
padding=1, | |
weight_attr=ParamAttr( | |
initializer=XavierUniform(fan_out=fan)))) | |
self.fpn_convs.append(extra_fpn_conv) | |
def from_config(cls, cfg, input_shape): | |
return { | |
'in_channels': [i.channels for i in input_shape], | |
'spatial_scales': [1.0 / i.stride for i in input_shape], | |
} | |
def forward(self, body_feats): | |
laterals = [] | |
num_levels = len(body_feats) | |
for i in range(num_levels): | |
laterals.append(self.lateral_convs[i](body_feats[i])) | |
for i in range(1, num_levels): | |
lvl = num_levels - i | |
upsample = F.interpolate( | |
laterals[lvl], | |
scale_factor=2., | |
mode='nearest', ) | |
laterals[lvl - 1] += upsample | |
fpn_output = [] | |
for lvl in range(num_levels): | |
fpn_output.append(self.fpn_convs[lvl](laterals[lvl])) | |
if self.extra_stage > 0: | |
# use max pool to get more levels on top of outputs (Faster R-CNN, Mask R-CNN) | |
if not self.has_extra_convs: | |
assert self.extra_stage == 1, 'extra_stage should be 1 if FPN has not extra convs' | |
fpn_output.append(F.max_pool2d(fpn_output[-1], 1, stride=2)) | |
# add extra conv levels for RetinaNet(use_c5)/FCOS(use_p5) | |
else: | |
if self.use_c5: | |
extra_source = body_feats[-1] | |
else: | |
extra_source = fpn_output[-1] | |
fpn_output.append(self.fpn_convs[num_levels](extra_source)) | |
for i in range(1, self.extra_stage): | |
if self.relu_before_extra_convs: | |
fpn_output.append(self.fpn_convs[num_levels + i](F.relu( | |
fpn_output[-1]))) | |
else: | |
fpn_output.append(self.fpn_convs[num_levels + i]( | |
fpn_output[-1])) | |
return fpn_output | |