Spaces:

sidharthism
/

pipeline_paddle

Configuration error

App Files Files Community

pipeline_paddle / paddleseg /models /espnetv1.py

sidharthism

Added model *.pdparams

1ab1a09 almost 3 years ago

raw

history blame contribute delete

9.99 kB

	# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	import paddle
	import paddle.nn as nn
	import paddle.nn.functional as F

	from paddleseg.models import layers
	from paddleseg.cvlibs import manager
	from paddleseg.utils import utils


	@manager.MODELS.add_component
	class ESPNetV1(nn.Layer):
	"""
	The ESPNetV1 implementation based on PaddlePaddle.

	The original article refers to
	Sachin Mehta1, Mohammad Rastegari, Anat Caspi, Linda Shapiro, and Hannaneh Hajishirzi. "ESPNet: Efficient Spatial Pyramid of Dilated Convolutions for Semantic Segmentation"
	(https://arxiv.org/abs/1803.06815).

	Args:
	num_classes (int): The unique number of target classes.
	in_channels (int, optional): Number of input channels. Default: 3.
	level2_depth (int, optional): Depth of DilatedResidualBlock. Default: 2.
	level3_depth (int, optional): Depth of DilatedResidualBlock. Default: 3.
	pretrained (str, optional): The path or url of pretrained model. Default: None.
	"""

	def __init__(self,
	num_classes,
	in_channels=3,
	level2_depth=2,
	level3_depth=3,
	pretrained=None):
	super().__init__()
	self.encoder = ESPNetEncoder(num_classes, in_channels, level2_depth,
	level3_depth)

	self.level3_up = nn.Conv2DTranspose(
	num_classes,
	num_classes,
	2,
	stride=2,
	padding=0,
	output_padding=0,
	bias_attr=False)
	self.br3 = layers.SyncBatchNorm(num_classes)
	self.level2_proj = nn.Conv2D(
	in_channels + 128, num_classes, 1, bias_attr=False)
	self.combine_l2_l3 = nn.Sequential(
	BNPReLU(2 * num_classes),
	DilatedResidualBlock(
	2 * num_classes, num_classes, residual=False), )
	self.level2_up = nn.Sequential(
	nn.Conv2DTranspose(
	num_classes,
	num_classes,
	2,
	stride=2,
	padding=0,
	output_padding=0,
	bias_attr=False),
	BNPReLU(num_classes), )
	self.out_proj = layers.ConvBNPReLU(
	16 + in_channels + num_classes,
	num_classes,
	3,
	padding='same',
	stride=1)
	self.out_up = nn.Conv2DTranspose(
	num_classes,
	num_classes,
	2,
	stride=2,
	padding=0,
	output_padding=0,
	bias_attr=False)
	self.pretrained = pretrained

	def init_weight(self):
	if self.pretrained is not None:
	utils.load_entire_model(self, self.pretrained)

	def forward(self, x):
	p1, p2, p3 = self.encoder(x)
	up_p3 = self.level3_up(p3)

	combine = self.combine_l2_l3(paddle.concat([up_p3, p2], axis=1))
	up_p2 = self.level2_up(combine)

	combine = self.out_proj(paddle.concat([up_p2, p1], axis=1))
	out = self.out_up(combine)
	return [out]


	class BNPReLU(nn.Layer):
	def __init__(self, channels):
	super().__init__()
	self.bn = layers.SyncBatchNorm(channels)
	self.act = nn.PReLU(channels)

	def forward(self, x):
	x = self.bn(x)
	x = self.act(x)
	return x


	class DownSampler(nn.Layer):
	"""
	Down sampler.
	Args:
	in_channels (int): Number of input channels.
	out_channels (int): Number of output channels.
	"""

	def __init__(self, in_channels, out_channels):
	super().__init__()
	branch_channels = out_channels // 5
	remain_channels = out_channels - branch_channels * 4
	self.conv1 = nn.Conv2D(
	in_channels,
	branch_channels,
	3,
	stride=2,
	padding=1,
	bias_attr=False)
	self.d_conv1 = nn.Conv2D(
	branch_channels, remain_channels, 3, padding=1, bias_attr=False)
	self.d_conv2 = nn.Conv2D(
	branch_channels,
	branch_channels,
	3,
	padding=2,
	dilation=2,
	bias_attr=False)
	self.d_conv4 = nn.Conv2D(
	branch_channels,
	branch_channels,
	3,
	padding=4,
	dilation=4,
	bias_attr=False)
	self.d_conv8 = nn.Conv2D(
	branch_channels,
	branch_channels,
	3,
	padding=8,
	dilation=8,
	bias_attr=False)
	self.d_conv16 = nn.Conv2D(
	branch_channels,
	branch_channels,
	3,
	padding=16,
	dilation=16,
	bias_attr=False)
	self.bn = layers.SyncBatchNorm(out_channels)
	self.act = nn.PReLU(out_channels)

	def forward(self, x):
	x = self.conv1(x)
	d1 = self.d_conv1(x)
	d2 = self.d_conv2(x)
	d4 = self.d_conv4(x)
	d8 = self.d_conv8(x)
	d16 = self.d_conv16(x)

	feat1 = d2
	feat2 = feat1 + d4
	feat3 = feat2 + d8
	feat4 = feat3 + d16

	feat = paddle.concat([d1, feat1, feat2, feat3, feat4], axis=1)
	out = self.bn(feat)
	out = self.act(out)
	return out


	class DilatedResidualBlock(nn.Layer):
	'''
	ESP block, principle: reduce -> split -> transform -> merge
	Args:
	in_channels (int): Number of input channels.
	out_channels (int): Number of output channels.
	residual (bool, optional): Add a residual connection through identity operation. Default: True.
	'''

	def __init__(self, in_channels, out_channels, residual=True):
	super().__init__()
	branch_channels = out_channels // 5
	remain_channels = out_channels - branch_channels * 4
	self.conv1 = nn.Conv2D(in_channels, branch_channels, 1, bias_attr=False)
	self.d_conv1 = nn.Conv2D(
	branch_channels, remain_channels, 3, padding=1, bias_attr=False)
	self.d_conv2 = nn.Conv2D(
	branch_channels,
	branch_channels,
	3,
	padding=2,
	dilation=2,
	bias_attr=False)
	self.d_conv4 = nn.Conv2D(
	branch_channels,
	branch_channels,
	3,
	padding=4,
	dilation=4,
	bias_attr=False)
	self.d_conv8 = nn.Conv2D(
	branch_channels,
	branch_channels,
	3,
	padding=8,
	dilation=8,
	bias_attr=False)
	self.d_conv16 = nn.Conv2D(
	branch_channels,
	branch_channels,
	3,
	padding=16,
	dilation=16,
	bias_attr=False)

	self.bn = BNPReLU(out_channels)
	self.residual = residual

	def forward(self, x):
	x_proj = self.conv1(x)
	d1 = self.d_conv1(x_proj)
	d2 = self.d_conv2(x_proj)
	d4 = self.d_conv4(x_proj)
	d8 = self.d_conv8(x_proj)
	d16 = self.d_conv16(x_proj)

	feat1 = d2
	feat2 = feat1 + d4
	feat3 = feat2 + d8
	feat4 = feat3 + d16

	feat = paddle.concat([d1, feat1, feat2, feat3, feat4], axis=1)

	if self.residual:
	feat = feat + x
	out = self.bn(feat)
	return out


	class ESPNetEncoder(nn.Layer):
	'''
	The ESPNet-C implementation based on PaddlePaddle.
	Args:
	num_classes (int): The unique number of target classes.
	in_channels (int, optional): Number of input channels. Default: 3.
	level2_depth (int, optional): Depth of DilatedResidualBlock. Default: 5.
	level3_depth (int, optional): Depth of DilatedResidualBlock. Default: 3.
	'''

	def __init__(self,
	num_classes,
	in_channels=3,
	level2_depth=5,
	level3_depth=3):
	super().__init__()
	self.level1 = layers.ConvBNPReLU(
	in_channels, 16, 3, padding='same', stride=2)
	self.br1 = BNPReLU(in_channels + 16)
	self.proj1 = layers.ConvBNPReLU(in_channels + 16, num_classes, 1)

	self.level2_0 = DownSampler(in_channels + 16, 64)
	self.level2 = nn.Sequential(
	*[DilatedResidualBlock(64, 64) for i in range(level2_depth)])
	self.br2 = BNPReLU(in_channels + 128)
	self.proj2 = layers.ConvBNPReLU(in_channels + 128, num_classes, 1)

	self.level3_0 = DownSampler(in_channels + 128, 128)
	self.level3 = nn.Sequential(
	*[DilatedResidualBlock(128, 128) for i in range(level3_depth)])
	self.br3 = BNPReLU(256)
	self.proj3 = layers.ConvBNPReLU(256, num_classes, 1)

	def forward(self, x):
	f1 = self.level1(x)
	down2 = F.adaptive_avg_pool2d(x, output_size=f1.shape[2:])
	feat1 = paddle.concat([f1, down2], axis=1)
	feat1 = self.br1(feat1)
	p1 = self.proj1(feat1)

	f2_res = self.level2_0(feat1)
	f2 = self.level2(f2_res)
	down4 = F.adaptive_avg_pool2d(x, output_size=f2.shape[2:])
	feat2 = paddle.concat([f2, f2_res, down4], axis=1)
	feat2 = self.br2(feat2)
	p2 = self.proj2(feat2)

	f3_res = self.level3_0(feat2)
	f3 = self.level3(f3_res)
	feat3 = paddle.concat([f3, f3_res], axis=1)
	feat3 = self.br3(feat3)
	p3 = self.proj3(feat3)

	return p1, p2, p3