Spaces:
Configuration error
Configuration error
# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
from functools import partial | |
import paddle | |
import paddle.nn as nn | |
import paddle.nn.functional as F | |
from paddleseg import utils | |
from paddleseg.models import layers | |
from paddleseg.cvlibs import manager | |
class LRASPP(nn.Layer): | |
""" | |
Semantic segmentation model with a light R-ASPP head. | |
The original article refers to | |
Howard, Andrew, et al. "Searching for mobilenetv3." | |
(https://arxiv.org/pdf/1909.11065.pdf) | |
Args: | |
num_classes (int): The number of target classes. | |
backbone(nn.Layer): Backbone network, such as stdc1net and resnet18. The backbone must | |
has feat_channels, of which the length is 5. | |
backbone_indices (List(int), optional): The values indicate the indices of backbone output | |
used as the input of the LR-ASPP head. | |
Default: [0, 1, 3]. | |
lraspp_head_inter_chs (List(int), optional): The intermediate channels of LR-ASPP head. | |
Default: [32, 64]. | |
lraspp_head_out_ch (int, optional): The output channels of each ASPP branch in the LR-ASPP head. | |
Default: 128 | |
resize_mode (str, optional): The resize mode for the upsampling operation in the LR-ASPP head. | |
Default: bilinear. | |
use_gap (bool, optional): If true, use global average pooling in the LR-ASPP head; otherwise, use | |
a 49x49 kernel for average pooling. | |
Default: True. | |
pretrained (str, optional): The path or url of pretrained model. Default: None. | |
""" | |
def __init__(self, | |
num_classes, | |
backbone, | |
backbone_indices=[0, 1, 3], | |
lraspp_head_inter_chs=[32, 64], | |
lraspp_head_out_ch=128, | |
resize_mode='bilinear', | |
use_gap=True, | |
pretrained=None): | |
super().__init__() | |
# backbone | |
assert hasattr(backbone, 'feat_channels'), \ | |
"The backbone should has feat_channels." | |
assert len(backbone.feat_channels) >= len(backbone_indices), \ | |
f"The length of input backbone_indices ({len(backbone_indices)}) should not be" \ | |
f"greater than the length of feat_channels ({len(backbone.feat_channels)})." | |
assert len(backbone.feat_channels) > max(backbone_indices), \ | |
f"The max value ({max(backbone_indices)}) of backbone_indices should be " \ | |
f"less than the length of feat_channels ({len(backbone.feat_channels)})." | |
self.backbone = backbone | |
assert len(backbone_indices) >= 1, "The lenght of backbone_indices " \ | |
"should not be lesser than 1" | |
# head | |
assert len(backbone_indices) == len( | |
lraspp_head_inter_chs | |
) + 1, "The length of backbone_indices should be 1 greater than lraspp_head_inter_chs." | |
self.backbone_indices = backbone_indices | |
self.lraspp_head = LRASPPHead(backbone_indices, backbone.feat_channels, | |
lraspp_head_inter_chs, lraspp_head_out_ch, | |
num_classes, resize_mode, use_gap) | |
# pretrained | |
self.pretrained = pretrained | |
self.init_weight() | |
def forward(self, x): | |
x_hw = paddle.shape(x)[2:] | |
feats_backbone = self.backbone(x) | |
assert len(feats_backbone) >= len(self.backbone_indices), \ | |
f"The nums of backbone feats ({len(feats_backbone)}) should be greater or " \ | |
f"equal than the nums of backbone_indices ({len(self.backbone_indices)})" | |
y = self.lraspp_head(feats_backbone) | |
y = F.interpolate(y, x_hw, mode='bilinear', align_corners=False) | |
logit_list = [y] | |
return logit_list | |
def init_weight(self): | |
if self.pretrained is not None: | |
utils.load_entire_model(self, self.pretrained) | |
class LRASPPHead(nn.Layer): | |
def __init__(self, | |
indices, | |
in_chs, | |
mid_chs, | |
out_ch, | |
n_classes, | |
resize_mode, | |
use_gap, | |
align_corners=False): | |
super().__init__() | |
self.indices = indices[-2::-1] | |
self.in_chs = [in_chs[i] for i in indices[::-1]] | |
self.mid_chs = mid_chs[::-1] | |
self.convs = nn.LayerList() | |
self.conv_ups = nn.LayerList() | |
for in_ch, mid_ch in zip(self.in_chs[1:], self.mid_chs): | |
self.convs.append( | |
nn.Conv2D( | |
in_ch, mid_ch, kernel_size=1, bias_attr=False)) | |
self.conv_ups.append(layers.ConvBNReLU(out_ch + mid_ch, out_ch, 1)) | |
self.conv_w = nn.Sequential( | |
nn.AvgPool2D( | |
kernel_size=(49, 49), stride=(16, 20)) | |
if not use_gap else nn.AdaptiveAvgPool2D(1), | |
nn.Conv2D( | |
self.in_chs[0], out_ch, 1, bias_attr=False), | |
nn.Sigmoid()) | |
self.conv_v = layers.ConvBNReLU(self.in_chs[0], out_ch, 1) | |
self.conv_t = nn.Conv2D(out_ch, out_ch, kernel_size=1, bias_attr=False) | |
self.conv_out = nn.Conv2D( | |
out_ch, n_classes, kernel_size=1, bias_attr=False) | |
self.interp = partial( | |
F.interpolate, mode=resize_mode, align_corners=align_corners) | |
def forward(self, in_feat_list): | |
x = in_feat_list[-1] | |
x = self.conv_v(x) * self.interp(self.conv_w(x), paddle.shape(x)[2:]) | |
y = self.conv_t(x) | |
for idx, conv, conv_up in zip(self.indices, self.convs, self.conv_ups): | |
feat = in_feat_list[idx] | |
y = self.interp(y, paddle.shape(feat)[2:]) | |
y = paddle.concat([y, conv(feat)], axis=1) | |
y = conv_up(y) | |
y = self.conv_out(y) | |
return y | |