test2 / mmseg /models /decode_heads /vit_mla_head.py

Upload 660 files

b13b124 over 1 year ago

3.69 kB

	import torch
	import torch.nn as nn
	import torch.nn.functional as F
	from functools import partial
	import math

	from .helpers import load_pretrained
	from .layers import DropPath, to_2tuple, trunc_normal_

	from ..builder import HEADS
	from .decode_head import BaseDecodeHead
	from ..backbones.vit import Block

	from mmcv.cnn import build_norm_layer


	class MLAHead(nn.Module):
	def __init__(self, mla_channels=256, mlahead_channels=128, norm_cfg=None):
	super(MLAHead, self).__init__()
	self.head2 = nn.Sequential(nn.Conv2d(mla_channels, mlahead_channels, 3, padding=1, bias=False),
	build_norm_layer(norm_cfg, mlahead_channels)[1], nn.ReLU(),
	nn.Conv2d(mlahead_channels, mlahead_channels, 3, padding=1, bias=False),
	build_norm_layer(norm_cfg, mlahead_channels)[1], nn.ReLU())
	self.head3 = nn.Sequential(nn.Conv2d(mla_channels, mlahead_channels, 3, padding=1, bias=False),
	build_norm_layer(norm_cfg, mlahead_channels)[1], nn.ReLU(),
	nn.Conv2d(mlahead_channels, mlahead_channels, 3, padding=1, bias=False),
	build_norm_layer(norm_cfg, mlahead_channels)[1], nn.ReLU())
	self.head4 = nn.Sequential(nn.Conv2d(mla_channels, mlahead_channels, 3, padding=1, bias=False),
	build_norm_layer(norm_cfg, mlahead_channels)[1], nn.ReLU(),
	nn.Conv2d(mlahead_channels, mlahead_channels, 3, padding=1, bias=False),
	build_norm_layer(norm_cfg, mlahead_channels)[1], nn.ReLU())
	self.head5 = nn.Sequential(nn.Conv2d(mla_channels, mlahead_channels, 3, padding=1, bias=False),
	build_norm_layer(norm_cfg, mlahead_channels)[1], nn.ReLU(),
	nn.Conv2d(mlahead_channels, mlahead_channels, 3, padding=1, bias=False),
	build_norm_layer(norm_cfg, mlahead_channels)[1], nn.ReLU())

	def forward(self, mla_p2, mla_p3, mla_p4, mla_p5):
	# head2 = self.head2(mla_p2)
	head2 = F.interpolate(self.head2(mla_p2), 4*mla_p2.shape[-1], mode='bilinear', align_corners=True)
	head3 = F.interpolate(self.head3(mla_p3), 4*mla_p3.shape[-1], mode='bilinear', align_corners=True)
	head4 = F.interpolate(self.head4(mla_p4), 4*mla_p4.shape[-1], mode='bilinear', align_corners=True)
	head5 = F.interpolate(self.head5(mla_p5), 4*mla_p5.shape[-1], mode='bilinear', align_corners=True)
	return torch.cat([head2, head3, head4, head5], dim=1)



	@HEADS.register_module()
	class VIT_MLAHead(BaseDecodeHead):
	""" Vision Transformer with support for patch or hybrid CNN input stage
	"""
	def __init__(self, img_size=768, mla_channels=256, mlahead_channels=128,
	norm_layer=nn.BatchNorm2d, norm_cfg=None, **kwargs):
	super(VIT_MLAHead, self).__init__(**kwargs)
	self.img_size = img_size
	self.norm_cfg = norm_cfg
	self.mla_channels = mla_channels
	self.BatchNorm = norm_layer
	self.mlahead_channels = mlahead_channels

	self.mlahead = MLAHead(mla_channels=self.mla_channels, mlahead_channels=self.mlahead_channels, norm_cfg=self.norm_cfg)
	self.cls = nn.Conv2d(4 * self.mlahead_channels, self.num_classes, 3, padding=1)

	def forward(self, inputs):
	x = self.mlahead(inputs[0], inputs[1], inputs[2], inputs[3])
	x = self.cls(x)
	x = F.interpolate(x, size=self.img_size, mode='bilinear', align_corners=self.align_corners)
	return x