sidharthism's picture
Added model *.pdparams
1ab1a09
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddleseg.models import layers
from paddleseg.models.layers import tensor_fusion_helper as helper
class UAFM(nn.Layer):
"""
The base of Unified Attention Fusion Module.
Args:
x_ch (int): The channel of x tensor, which is the low level feature.
y_ch (int): The channel of y tensor, which is the high level feature.
out_ch (int): The channel of output tensor.
ksize (int, optional): The kernel size of the conv for x tensor. Default: 3.
resize_mode (str, optional): The resize model in unsampling y tensor. Default: bilinear.
"""
def __init__(self, x_ch, y_ch, out_ch, ksize=3, resize_mode='bilinear'):
super().__init__()
self.conv_x = layers.ConvBNReLU(
x_ch, y_ch, kernel_size=ksize, padding=ksize // 2, bias_attr=False)
self.conv_out = layers.ConvBNReLU(
y_ch, out_ch, kernel_size=3, padding=1, bias_attr=False)
self.resize_mode = resize_mode
def check(self, x, y):
assert x.ndim == 4 and y.ndim == 4
x_h, x_w = x.shape[2:]
y_h, y_w = y.shape[2:]
assert x_h >= y_h and x_w >= y_w
def prepare(self, x, y):
x = self.prepare_x(x, y)
y = self.prepare_y(x, y)
return x, y
def prepare_x(self, x, y):
x = self.conv_x(x)
return x
def prepare_y(self, x, y):
y_up = F.interpolate(y, paddle.shape(x)[2:], mode=self.resize_mode)
return y_up
def fuse(self, x, y):
out = x + y
out = self.conv_out(out)
return out
def forward(self, x, y):
"""
Args:
x (Tensor): The low level feature.
y (Tensor): The high level feature.
"""
self.check(x, y)
x, y = self.prepare(x, y)
out = self.fuse(x, y)
return out
class UAFM_ChAtten(UAFM):
"""
The UAFM with channel attention, which uses mean and max values.
Args:
x_ch (int): The channel of x tensor, which is the low level feature.
y_ch (int): The channel of y tensor, which is the high level feature.
out_ch (int): The channel of output tensor.
ksize (int, optional): The kernel size of the conv for x tensor. Default: 3.
resize_mode (str, optional): The resize model in unsampling y tensor. Default: bilinear.
"""
def __init__(self, x_ch, y_ch, out_ch, ksize=3, resize_mode='bilinear'):
super().__init__(x_ch, y_ch, out_ch, ksize, resize_mode)
self.conv_xy_atten = nn.Sequential(
layers.ConvBNAct(
4 * y_ch,
y_ch // 2,
kernel_size=1,
bias_attr=False,
act_type="leakyrelu"),
layers.ConvBN(
y_ch // 2, y_ch, kernel_size=1, bias_attr=False))
def fuse(self, x, y):
"""
Args:
x (Tensor): The low level feature.
y (Tensor): The high level feature.
"""
atten = helper.avg_max_reduce_hw([x, y], self.training)
atten = F.sigmoid(self.conv_xy_atten(atten))
out = x * atten + y * (1 - atten)
out = self.conv_out(out)
return out
class UAFM_ChAtten_S(UAFM):
"""
The UAFM with channel attention, which uses mean values.
Args:
x_ch (int): The channel of x tensor, which is the low level feature.
y_ch (int): The channel of y tensor, which is the high level feature.
out_ch (int): The channel of output tensor.
ksize (int, optional): The kernel size of the conv for x tensor. Default: 3.
resize_mode (str, optional): The resize model in unsampling y tensor. Default: bilinear.
"""
def __init__(self, x_ch, y_ch, out_ch, ksize=3, resize_mode='bilinear'):
super().__init__(x_ch, y_ch, out_ch, ksize, resize_mode)
self.conv_xy_atten = nn.Sequential(
layers.ConvBNAct(
2 * y_ch,
y_ch // 2,
kernel_size=1,
bias_attr=False,
act_type="leakyrelu"),
layers.ConvBN(
y_ch // 2, y_ch, kernel_size=1, bias_attr=False))
def fuse(self, x, y):
"""
Args:
x (Tensor): The low level feature.
y (Tensor): The high level feature.
"""
atten = helper.avg_reduce_hw([x, y])
atten = F.sigmoid(self.conv_xy_atten(atten))
out = x * atten + y * (1 - atten)
out = self.conv_out(out)
return out
class UAFM_SpAtten(UAFM):
"""
The UAFM with spatial attention, which uses mean and max values.
Args:
x_ch (int): The channel of x tensor, which is the low level feature.
y_ch (int): The channel of y tensor, which is the high level feature.
out_ch (int): The channel of output tensor.
ksize (int, optional): The kernel size of the conv for x tensor. Default: 3.
resize_mode (str, optional): The resize model in unsampling y tensor. Default: bilinear.
"""
def __init__(self, x_ch, y_ch, out_ch, ksize=3, resize_mode='bilinear'):
super().__init__(x_ch, y_ch, out_ch, ksize, resize_mode)
self.conv_xy_atten = nn.Sequential(
layers.ConvBNReLU(
4, 2, kernel_size=3, padding=1, bias_attr=False),
layers.ConvBN(
2, 1, kernel_size=3, padding=1, bias_attr=False))
def fuse(self, x, y):
"""
Args:
x (Tensor): The low level feature.
y (Tensor): The high level feature.
"""
atten = helper.avg_max_reduce_channel([x, y])
atten = F.sigmoid(self.conv_xy_atten(atten))
out = x * atten + y * (1 - atten)
out = self.conv_out(out)
return out
class UAFM_SpAtten_S(UAFM):
"""
The UAFM with spatial attention, which uses mean values.
Args:
x_ch (int): The channel of x tensor, which is the low level feature.
y_ch (int): The channel of y tensor, which is the high level feature.
out_ch (int): The channel of output tensor.
ksize (int, optional): The kernel size of the conv for x tensor. Default: 3.
resize_mode (str, optional): The resize model in unsampling y tensor. Default: bilinear.
"""
def __init__(self, x_ch, y_ch, out_ch, ksize=3, resize_mode='bilinear'):
super().__init__(x_ch, y_ch, out_ch, ksize, resize_mode)
self.conv_xy_atten = nn.Sequential(
layers.ConvBNReLU(
2, 2, kernel_size=3, padding=1, bias_attr=False),
layers.ConvBN(
2, 1, kernel_size=3, padding=1, bias_attr=False))
def fuse(self, x, y):
"""
Args:
x (Tensor): The low level feature.
y (Tensor): The high level feature.
"""
atten = helper.avg_reduce_channel([x, y])
atten = F.sigmoid(self.conv_xy_atten(atten))
out = x * atten + y * (1 - atten)
out = self.conv_out(out)
return out
class UAFMMobile(UAFM):
"""
Unified Attention Fusion Module for mobile.
Args:
x_ch (int): The channel of x tensor, which is the low level feature.
y_ch (int): The channel of y tensor, which is the high level feature.
out_ch (int): The channel of output tensor.
ksize (int, optional): The kernel size of the conv for x tensor. Default: 3.
resize_mode (str, optional): The resize model in unsampling y tensor. Default: bilinear.
"""
def __init__(self, x_ch, y_ch, out_ch, ksize=3, resize_mode='bilinear'):
super().__init__(x_ch, y_ch, out_ch, ksize, resize_mode)
self.conv_x = layers.SeparableConvBNReLU(
x_ch, y_ch, kernel_size=ksize, padding=ksize // 2, bias_attr=False)
self.conv_out = layers.SeparableConvBNReLU(
y_ch, out_ch, kernel_size=3, padding=1, bias_attr=False)
class UAFMMobile_SpAtten(UAFM):
"""
Unified Attention Fusion Module with spatial attention for mobile.
Args:
x_ch (int): The channel of x tensor, which is the low level feature.
y_ch (int): The channel of y tensor, which is the high level feature.
out_ch (int): The channel of output tensor.
ksize (int, optional): The kernel size of the conv for x tensor. Default: 3.
resize_mode (str, optional): The resize model in unsampling y tensor. Default: bilinear.
"""
def __init__(self, x_ch, y_ch, out_ch, ksize=3, resize_mode='bilinear'):
super().__init__(x_ch, y_ch, out_ch, ksize, resize_mode)
self.conv_x = layers.SeparableConvBNReLU(
x_ch, y_ch, kernel_size=ksize, padding=ksize // 2, bias_attr=False)
self.conv_out = layers.SeparableConvBNReLU(
y_ch, out_ch, kernel_size=3, padding=1, bias_attr=False)
self.conv_xy_atten = nn.Sequential(
layers.ConvBNReLU(
4, 2, kernel_size=3, padding=1, bias_attr=False),
layers.ConvBN(
2, 1, kernel_size=3, padding=1, bias_attr=False))
def fuse(self, x, y):
"""
Args:
x (Tensor): The low level feature.
y (Tensor): The high level feature.
"""
atten = helper.avg_max_reduce_channel([x, y])
atten = F.sigmoid(self.conv_xy_atten(atten))
out = x * atten + y * (1 - atten)
out = self.conv_out(out)
return out