sidharthism's picture
Added model *.pdparams
1ab1a09
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddleseg.cvlibs import manager
from paddleseg.models import layers
from paddleseg.utils import utils
@manager.MODELS.add_component
class BiseNetV1(nn.Layer):
"""
The BiSeNetV1 implementation based on PaddlePaddle.
The original article refers to
Yu, Changqian, et al. "BiSeNet V2: Bilateral Network with Guided Aggregation for Real-time Semantic Segmentation"
(https://paperswithcode.com/paper/bisenet-bilateral-segmentation-network-for)
Args:
num_classes (int): The unique number of target classes.
backbone (paddle.nn.Layer): Backbone network, currently support Resnet18_vd/Resnet34_vd/Resnet50_vd/Resnet101_vd.
pretrained (str, optional): The path or url of pretrained model. Default: None.
"""
def __init__(self, num_classes, backbone, conv_channel=128,
pretrained=None):
super().__init__()
self.backbone = backbone
self.spatial_path = SpatialPath(3, 128)
self.global_context = nn.Sequential(
nn.AdaptiveAvgPool2D(1),
layers.ConvBNReLU(
512, conv_channel, 1, bias_attr=False), )
self.arms = nn.LayerList([
AttentionRefinement(512, conv_channel),
AttentionRefinement(256, conv_channel),
])
self.refines = nn.LayerList([
layers.ConvBNReLU(
conv_channel,
conv_channel,
3,
stride=1,
padding=1,
bias_attr=False),
layers.ConvBNReLU(
conv_channel,
conv_channel,
3,
stride=1,
padding=1,
bias_attr=False),
])
self.heads = nn.LayerList([
BiSeNetHead(conv_channel, num_classes, 8, True),
BiSeNetHead(conv_channel, num_classes, 8, True),
BiSeNetHead(conv_channel * 2, num_classes, 8, False),
])
self.ffm = FeatureFusion(conv_channel * 2, conv_channel * 2, 1)
self.pretrained = pretrained
def init_weight(self):
if self.pretrained is not None:
utils.load_entire_model(self, self.pretrained)
def forward(self, x):
spatial_out = self.spatial_path(x)
context_blocks = self.backbone(x)
context_blocks.reverse()
global_context = self.global_context(context_blocks[0])
global_context = F.interpolate(
global_context,
size=paddle.shape(context_blocks[0])[2:],
mode='bilinear',
align_corners=True)
last_fm = global_context
pred_out = []
for i, (
fm, arm, refine
) in enumerate(zip(context_blocks[:2], self.arms, self.refines)):
fm = arm(fm)
fm += last_fm
last_fm = F.interpolate(
fm,
size=paddle.shape(context_blocks[i + 1])[2:],
mode='bilinear',
align_corners=True)
last_fm = refine(last_fm)
pred_out.append(last_fm)
context_out = last_fm
concate_fm = self.ffm(spatial_out, context_out)
pred_out.append(concate_fm)
output = []
if self.training:
for i, head in enumerate(self.heads):
out = head(pred_out[i])
output.append(out)
else:
out = self.heads[-1](pred_out[-1])
output.append(out)
return output
class SpatialPath(nn.Layer):
"""
SpatialPath module of BiseNetV1 model
Args:
in_channels (int): The number of input channels in spatial path module.
out_channels (int): The number of output channels in spatial path module.
"""
def __init__(self, in_channels, out_channels, inner_channel=64):
super().__init__()
self.conv_7x7 = layers.ConvBNReLU(
in_channels, inner_channel, 7, stride=2, padding=3, bias_attr=False)
self.conv_3x3_1 = layers.ConvBNReLU(
inner_channel,
inner_channel,
3,
stride=2,
padding=1,
bias_attr=False)
self.conv_3x3_2 = layers.ConvBNReLU(
inner_channel,
inner_channel,
3,
stride=2,
padding=1,
bias_attr=False)
self.conv_1x1 = layers.ConvBNReLU(
inner_channel, out_channels, 1, bias_attr=False)
def forward(self, x):
x = self.conv_7x7(x)
x = self.conv_3x3_1(x)
x = self.conv_3x3_2(x)
x = self.conv_1x1(x)
return x
class BiSeNetHead(nn.Layer):
"""
BiSeNet head of BiseNetV1 model
Args:
in_channels (int): The number of input channels in spatial path module.
out_channels (int): The number of output channels in spatial path module.
scale (int, float): The scale factor of interpolation.
"""
def __init__(self, in_channels, out_channels, scale, is_aux=False):
super().__init__()
inner_channel = 128 if is_aux else 64
self.conv_3x3 = layers.ConvBNReLU(
in_channels, inner_channel, 3, stride=1, padding=1, bias_attr=False)
self.conv_1x1 = nn.Conv2D(inner_channel, out_channels, 1)
self.scale = scale
def forward(self, x):
x = self.conv_3x3(x)
x = self.conv_1x1(x)
if self.scale > 1:
x = F.interpolate(
x, scale_factor=self.scale, mode='bilinear', align_corners=True)
return x
class AttentionRefinement(nn.Layer):
"""
AttentionRefinement module of BiseNetV1 model
Args:
in_channels (int): The number of input channels in spatial path module.
out_channels (int): The number of output channels in spatial path module.
"""
def __init__(self, in_channels, out_channels):
super().__init__()
self.conv_3x3 = layers.ConvBNReLU(
in_channels, out_channels, 3, stride=1, padding=1, bias_attr=False)
self.channel_attention = nn.Sequential(
nn.AdaptiveAvgPool2D(1),
layers.ConvBNReLU(
out_channels, out_channels, 1, bias_attr=False),
nn.Sigmoid(), )
def forward(self, x):
x = self.conv_3x3(x)
se = self.channel_attention(x)
x = x * se
return x
class FeatureFusion(nn.Layer):
"""
AttentionRefinement module of BiseNetV1 model
Args:
in_channels (int): The number of input channels in spatial path module.
out_channels (int): The number of output channels in spatial path module.
reduction (int): A factor shrinks convolutional channels. Default: 1.
"""
def __init__(self, in_channels, out_channels, reduction=1):
super().__init__()
self.conv_1x1 = layers.ConvBNReLU(
in_channels, out_channels, 1, bias_attr=False)
self.channel_attention = nn.Sequential(
nn.AdaptiveAvgPool2D(1),
layers.ConvBNReLU(
out_channels, out_channels // reduction, 1, bias_attr=False),
layers.ConvBNReLU(
out_channels // reduction, out_channels, 1, bias_attr=False),
nn.Sigmoid(), )
def forward(self, x1, x2):
fm = paddle.concat([x1, x2], axis=1)
fm = self.conv_1x1(fm)
fm_se = self.channel_attention(fm)
output = fm + fm * fm_se
return output