IDM-VTON-demo

Paused

IDM-VTON-demo / densepose /modeling /roi_heads /deeplab.py

IDM-VTON

update IDM-VTON Demo

938e515 7 months ago

8.63 kB

	# Copyright (c) Facebook, Inc. and its affiliates.

	import fvcore.nn.weight_init as weight_init
	import torch
	from torch import nn
	from torch.nn import functional as F

	from detectron2.config import CfgNode
	from detectron2.layers import Conv2d

	from .registry import ROI_DENSEPOSE_HEAD_REGISTRY


	@ROI_DENSEPOSE_HEAD_REGISTRY.register()
	class DensePoseDeepLabHead(nn.Module):
	"""
	DensePose head using DeepLabV3 model from
	"Rethinking Atrous Convolution for Semantic Image Segmentation"
	<https://arxiv.org/abs/1706.05587>.
	"""

	def __init__(self, cfg: CfgNode, input_channels: int):
	super(DensePoseDeepLabHead, self).__init__()
	# fmt: off
	hidden_dim = cfg.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_DIM
	kernel_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_KERNEL
	norm = cfg.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB.NORM
	self.n_stacked_convs = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_STACKED_CONVS
	self.use_nonlocal = cfg.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB.NONLOCAL_ON
	# fmt: on
	pad_size = kernel_size // 2
	n_channels = input_channels

	self.ASPP = ASPP(input_channels, [6, 12, 56], n_channels) # 6, 12, 56
	self.add_module("ASPP", self.ASPP)

	if self.use_nonlocal:
	self.NLBlock = NONLocalBlock2D(input_channels, bn_layer=True)
	self.add_module("NLBlock", self.NLBlock)
	# weight_init.c2_msra_fill(self.ASPP)

	for i in range(self.n_stacked_convs):
	norm_module = nn.GroupNorm(32, hidden_dim) if norm == "GN" else None
	layer = Conv2d(
	n_channels,
	hidden_dim,
	kernel_size,
	stride=1,
	padding=pad_size,
	bias=not norm,
	norm=norm_module,
	)
	weight_init.c2_msra_fill(layer)
	n_channels = hidden_dim
	layer_name = self._get_layer_name(i)
	self.add_module(layer_name, layer)
	self.n_out_channels = hidden_dim
	# initialize_module_params(self)

	def forward(self, features):
	x0 = features
	x = self.ASPP(x0)
	if self.use_nonlocal:
	x = self.NLBlock(x)
	output = x
	for i in range(self.n_stacked_convs):
	layer_name = self._get_layer_name(i)
	x = getattr(self, layer_name)(x)
	x = F.relu(x)
	output = x
	return output

	def _get_layer_name(self, i: int):
	layer_name = "body_conv_fcn{}".format(i + 1)
	return layer_name


	# Copied from
	# https://github.com/pytorch/vision/blob/master/torchvision/models/segmentation/deeplabv3.py
	# See https://arxiv.org/pdf/1706.05587.pdf for details
	class ASPPConv(nn.Sequential):
	def __init__(self, in_channels, out_channels, dilation):
	modules = [
	nn.Conv2d(
	in_channels, out_channels, 3, padding=dilation, dilation=dilation, bias=False
	),
	nn.GroupNorm(32, out_channels),
	nn.ReLU(),
	]
	super(ASPPConv, self).__init__(*modules)


	class ASPPPooling(nn.Sequential):
	def __init__(self, in_channels, out_channels):
	super(ASPPPooling, self).__init__(
	nn.AdaptiveAvgPool2d(1),
	nn.Conv2d(in_channels, out_channels, 1, bias=False),
	nn.GroupNorm(32, out_channels),
	nn.ReLU(),
	)

	def forward(self, x):
	size = x.shape[-2:]
	x = super(ASPPPooling, self).forward(x)
	return F.interpolate(x, size=size, mode="bilinear", align_corners=False)


	class ASPP(nn.Module):
	def __init__(self, in_channels, atrous_rates, out_channels):
	super(ASPP, self).__init__()
	modules = []
	modules.append(
	nn.Sequential(
	nn.Conv2d(in_channels, out_channels, 1, bias=False),
	nn.GroupNorm(32, out_channels),
	nn.ReLU(),
	)
	)

	rate1, rate2, rate3 = tuple(atrous_rates)
	modules.append(ASPPConv(in_channels, out_channels, rate1))
	modules.append(ASPPConv(in_channels, out_channels, rate2))
	modules.append(ASPPConv(in_channels, out_channels, rate3))
	modules.append(ASPPPooling(in_channels, out_channels))

	self.convs = nn.ModuleList(modules)

	self.project = nn.Sequential(
	nn.Conv2d(5 * out_channels, out_channels, 1, bias=False),
	# nn.BatchNorm2d(out_channels),
	nn.ReLU()
	# nn.Dropout(0.5)
	)

	def forward(self, x):
	res = []
	for conv in self.convs:
	res.append(conv(x))
	res = torch.cat(res, dim=1)
	return self.project(res)


	# copied from
	# https://github.com/AlexHex7/Non-local_pytorch/blob/master/lib/non_local_embedded_gaussian.py
	# See https://arxiv.org/abs/1711.07971 for details
	class _NonLocalBlockND(nn.Module):
	def __init__(
	self, in_channels, inter_channels=None, dimension=3, sub_sample=True, bn_layer=True
	):
	super(_NonLocalBlockND, self).__init__()

	assert dimension in [1, 2, 3]

	self.dimension = dimension
	self.sub_sample = sub_sample

	self.in_channels = in_channels
	self.inter_channels = inter_channels

	if self.inter_channels is None:
	self.inter_channels = in_channels // 2
	if self.inter_channels == 0:
	self.inter_channels = 1

	if dimension == 3:
	conv_nd = nn.Conv3d
	max_pool_layer = nn.MaxPool3d(kernel_size=(1, 2, 2))
	bn = nn.GroupNorm # (32, hidden_dim) #nn.BatchNorm3d
	elif dimension == 2:
	conv_nd = nn.Conv2d
	max_pool_layer = nn.MaxPool2d(kernel_size=(2, 2))
	bn = nn.GroupNorm # (32, hidden_dim)nn.BatchNorm2d
	else:
	conv_nd = nn.Conv1d
	max_pool_layer = nn.MaxPool1d(kernel_size=2)
	bn = nn.GroupNorm # (32, hidden_dim)nn.BatchNorm1d

	self.g = conv_nd(
	in_channels=self.in_channels,
	out_channels=self.inter_channels,
	kernel_size=1,
	stride=1,
	padding=0,
	)

	if bn_layer:
	self.W = nn.Sequential(
	conv_nd(
	in_channels=self.inter_channels,
	out_channels=self.in_channels,
	kernel_size=1,
	stride=1,
	padding=0,
	),
	bn(32, self.in_channels),
	)
	nn.init.constant_(self.W[1].weight, 0)
	nn.init.constant_(self.W[1].bias, 0)
	else:
	self.W = conv_nd(
	in_channels=self.inter_channels,
	out_channels=self.in_channels,
	kernel_size=1,
	stride=1,
	padding=0,
	)
	nn.init.constant_(self.W.weight, 0)
	nn.init.constant_(self.W.bias, 0)

	self.theta = conv_nd(
	in_channels=self.in_channels,
	out_channels=self.inter_channels,
	kernel_size=1,
	stride=1,
	padding=0,
	)
	self.phi = conv_nd(
	in_channels=self.in_channels,
	out_channels=self.inter_channels,
	kernel_size=1,
	stride=1,
	padding=0,
	)

	if sub_sample:
	self.g = nn.Sequential(self.g, max_pool_layer)
	self.phi = nn.Sequential(self.phi, max_pool_layer)

	def forward(self, x):
	"""
	:param x: (b, c, t, h, w)
	:return:
	"""

	batch_size = x.size(0)

	g_x = self.g(x).view(batch_size, self.inter_channels, -1)
	g_x = g_x.permute(0, 2, 1)

	theta_x = self.theta(x).view(batch_size, self.inter_channels, -1)
	theta_x = theta_x.permute(0, 2, 1)
	phi_x = self.phi(x).view(batch_size, self.inter_channels, -1)
	f = torch.matmul(theta_x, phi_x)
	f_div_C = F.softmax(f, dim=-1)

	y = torch.matmul(f_div_C, g_x)
	y = y.permute(0, 2, 1).contiguous()
	y = y.view(batch_size, self.inter_channels, *x.size()[2:])
	W_y = self.W(y)
	z = W_y + x

	return z


	class NONLocalBlock2D(_NonLocalBlockND):
	def __init__(self, in_channels, inter_channels=None, sub_sample=True, bn_layer=True):
	super(NONLocalBlock2D, self).__init__(
	in_channels,
	inter_channels=inter_channels,
	dimension=2,
	sub_sample=sub_sample,
	bn_layer=bn_layer,
	)