Spaces:

ZhengPeng7
/

GCoNet_plus_demo

Running on Zero

App Files Files Community

ZhengPeng7 commited on Mar 31, 2023

Commit

7febe9c

1 Parent(s): 1a1cf3c

Initialization.

Browse files

Files changed (5) hide show

.gitignore +134 -0
app.py +73 -0
config.py +107 -0
models/GCoNet.py +248 -0
models/modules.py +516 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,134 @@

+# Custom
+.vscode
+*.pth
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+.python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/

app.py ADDED Viewed

	@@ -0,0 +1,73 @@

+import os
+from glob import glob
+import cv2
+import numpy as np
+from PIL import Image
+import matplotlib.pyplot as plt
+import torch
+from torchvision import transforms
+import gradio as gr
+from models.GCoNet import GCoNet
+device = ['cpu', 'cuda'][0]
+class ImagePreprocessor():
+    def __init__(self) -> None:
+        self.transform_image = transforms.Compose([
+            transforms.Resize((256, 256)),
+            transforms.ToTensor(),
+            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
+        ])
+    def proc(self, image):
+        image = self.transform_image(image)
+        return image
+model = GCoNet(bb_pretrained=False).to(device)
+state_dict = './ultimate_duts_cocoseg (The best one).pth'
+if os.path.exists(state_dict):
+    gconet_dict = torch.load(state_dict, map_location=device)
+    model.load_state_dict(gconet_dict)
+model.eval()
+def pred_maps(dr):
+    images = [cv2.imread(image_path) for image_path in glob(os.path.join(dr, '*'))]
+    image_shapes = [image.shape[:2] for image in images]
+    images = [Image.fromarray(image) for image in images]
+    images_proc = []
+    image_preprocessor = ImagePreprocessor()
+    for image in images:
+        images_proc.append(image_preprocessor.proc(image))
+    images_proc = torch.cat([image_proc.unsqueeze(0) for image_proc in images_proc])
+    with torch.no_grad():
+        scaled_preds_tensor = model(images_proc.to(device))[-1]
+    preds = []
+    for image_shape, pred_tensor in zip(image_shapes, scaled_preds_tensor):
+        if device == 'cuda':
+            pred_tensor = pred_tensor.cpu()
+        preds.append(torch.nn.functional.interpolate(pred_tensor.unsqueeze(0), size=image_shape, mode='bilinear', align_corners=True).squeeze().numpy())
+    image_preds = []
+    for image, pred in zip(images, preds):
+        image_preds.append(
+            cv2.cvtColor(
+                np.hstack([np.array(image.convert('RGB')), cv2.cvtColor((pred*255).astype(np.uint8), cv2.COLOR_GRAY2RGB)]),
+                cv2.COLOR_BGR2RGB
+        ))
+    # for image_pred in image_preds:
+    #     cv2.imwrite('a.png', cv2.cvtColor(image_pred, cv2.COLOR_RGB2BGR))
+    return image_preds[:]
+demo = gr.Interface(
+    fn=pred_maps,
+    inputs='text',
+    outputs=['image', 'image', 'image', 'image', 'image'],
+    css=".output_image, .input_image {height: 300px !important}",
+)
+demo.launch(debug=True)

config.py ADDED Viewed

	@@ -0,0 +1,107 @@

+import os
+class Config():
+    def __init__(self) -> None:
+        # Backbone
+        self.bb = ['vgg16', 'vgg16bn', 'resnet50'][1]
+        # BN
+        self.use_bn = 'bn' in self.bb or 'resnet' in self.bb
+        # Augmentation
+        self.preproc_methods = ['flip', 'enhance', 'rotate', 'crop', 'pepper'][:3]
+        # Mask
+        losses = ['sal', 'cls', 'contrast', 'cls_mask']
+        self.loss = losses[:]
+        self.cls_mask_operation = ['x', '+', 'c'][0]
+        # Loss + Triplet Loss
+        self.lambdas_sal_last = {
+            # not 0 means opening this loss
+            # original rate -- 1 : 30 : 1.5 : 0.2, bce x 30
+            'bce': 30 * 1,          # high performance
+            'iou': 0.5 * 1,         # 0 / 255
+            'ssim': 1 * 0,          # help contours
+            'mse': 150 * 0,         # can smooth the saliency map
+            'reg': 100 * 0,
+            'triplet': 3 * 1 * ('cls' in self.loss),
+        }
+        # DB
+        self.db_output_decoder = True
+        self.db_k = 300
+        self.db_k_alpha = 1
+        self.split_mask = True and 'cls_mask' in self.loss
+        self.db_mask = False and self.split_mask
+        # Triplet Loss
+        self.triplet = ['_x5', 'mask'][:1]
+        self.triplet_loss_margin = 0.1
+        # Adv
+        self.lambda_adv = 0.        # turn to 0 to avoid adv training
+        # Refiner
+        self.refine = [0, 1, 4][0]         # 0 -- no refinement, 1 -- only output mask for refinement, 4 -- but also raw input.
+        if self.refine:
+            self.batch_size = 16
+        else:
+            if self.bb != 'vgg16':
+                self.batch_size = 26
+            else:
+                self.batch_size = 48
+        self.db_output_refiner = False and self.refine
+        # Intermediate Layers
+        self.lambdas_sal_others = {
+            'bce': 0,
+            'iou': 0.,
+            'ssim': 0,
+            'mse': 0,
+            'reg': 0,
+            'triplet': 0,
+        }
+        self.output_number = 1
+        self.loss_sal_layers = 4              # used to be last 4 layers
+        self.loss_cls_mask_last_layers = 1         # used to be last 4 layers
+        if 'keep in range':
+            self.loss_sal_layers = min(self.output_number, self.loss_sal_layers)
+            self.loss_cls_mask_last_layers = min(self.output_number, self.loss_cls_mask_last_layers)
+            self.output_number = min(self.output_number, max(self.loss_sal_layers, self.loss_cls_mask_last_layers))
+            if self.output_number == 1:
+                for cri in self.lambdas_sal_others:
+                    self.lambdas_sal_others[cri] = 0
+        self.conv_after_itp = False
+        self.complex_lateral_connection = False
+        # to control the quantitive level of each single loss by number of output branches.
+        self.loss_cls_mask_ratio_by_last_layers = 4 / self.loss_cls_mask_last_layers
+        for loss_sal in self.lambdas_sal_last.keys():
+            loss_sal_ratio_by_last_layers = 4 / (int(bool(self.lambdas_sal_others[loss_sal])) * (self.loss_sal_layers - 1) + 1)
+            self.lambdas_sal_last[loss_sal] *= loss_sal_ratio_by_last_layers
+            self.lambdas_sal_others[loss_sal] *= loss_sal_ratio_by_last_layers
+        self.lambda_cls_mask = 2.5 * self.loss_cls_mask_ratio_by_last_layers
+        self.lambda_cls = 3.
+        self.lambda_contrast = 250.
+        # Performance of GCoNet
+        self.val_measures = {
+            'Emax': {'CoCA': 0.760, 'CoSOD3k': 0.860, 'CoSal2015': 0.887},
+            'Smeasure': {'CoCA': 0.673, 'CoSOD3k': 0.802, 'CoSal2015': 0.845},
+            'Fmax': {'CoCA': 0.544, 'CoSOD3k': 0.777, 'CoSal2015': 0.847},
+        }
+        # others
+        self.GAM = True
+        if not self.GAM and 'contrast' in self.loss:
+            self.loss.remove('contrast')
+        self.lr = 1e-4 * (self.batch_size / 16)
+        self.relation_module = ['GAM', 'ICE', 'NonLocal', 'MHA'][0]
+        self.self_supervision = False
+        self.label_smoothing = False
+        self.freeze = True
+        self.validation = False
+        self.decay_step_size = 3000
+        self.rand_seed = 7
+        run_sh_file = [f for f in os.listdir('.') if 'gco' in f and '.sh' in f] + [os.path.join('..', f) for f in os.listdir('..') if 'gco' in f and '.sh' in f]
+        # with open(run_sh_file[0], 'r') as f:
+        #     self.val_last = int([l.strip() for l in f.readlines() if 'val_last=' in l][0].split('=')[-1])

models/GCoNet.py ADDED Viewed

	@@ -0,0 +1,248 @@

+from collections import OrderedDict
+import torch
+from torch.functional import norm
+import torch.nn as nn
+import torch.nn.functional as F
+from torchvision.models import vgg16, vgg16_bn
+import fvcore.nn.weight_init as weight_init
+from torchvision.models import resnet50
+from models.modules import ResBlk, DSLayer, half_DSLayer, CoAttLayer, RefUnet, DBHead
+from config import Config
+class GCoNet(nn.Module):
+    def __init__(self, bb_pretrained=True):
+        super(GCoNet, self).__init__()
+        self.config = Config()
+        bb = self.config.bb
+        if bb == 'vgg16':
+            bb_net = list(vgg16(pretrained=bb_pretrained).children())[0]
+            bb_convs = OrderedDict({
+                'conv1': bb_net[:4],
+                'conv2': bb_net[4:9],
+                'conv3': bb_net[9:16],
+                'conv4': bb_net[16:23],
+                'conv5': bb_net[23:30]
+            })
+            channel_scale = 1
+        elif bb == 'resnet50':
+            bb_net = list(resnet50(pretrained=bb_pretrained).children())
+            bb_convs = OrderedDict({
+                'conv1': nn.Sequential(*bb_net[0:3]),
+                'conv2': bb_net[4],
+                'conv3': bb_net[5],
+                'conv4': bb_net[6],
+                'conv5': bb_net[7]
+            })
+            channel_scale = 4
+        elif bb == 'vgg16bn':
+            bb_net = list(vgg16_bn(pretrained=bb_pretrained).children())[0]
+            bb_convs = OrderedDict({
+                'conv1': bb_net[:6],
+                'conv2': bb_net[6:13],
+                'conv3': bb_net[13:23],
+                'conv4': bb_net[23:33],
+                'conv5': bb_net[33:43]
+            })
+            channel_scale = 1
+        self.bb = nn.Sequential(bb_convs)
+        lateral_channels_in = [512, 512, 256, 128, 64] if 'vgg16' in bb else [2048, 1024, 512, 256, 64]
+        # channel_scale_latlayer = channel_scale // 2 if bb == 'resnet50' else 1
+        # channel_last = 32
+        ch_decoder = lateral_channels_in[0]//2//channel_scale
+        self.top_layer = ResBlk(lateral_channels_in[0], ch_decoder)
+        self.enlayer5 = ResBlk(ch_decoder, ch_decoder)
+        if self.config.conv_after_itp:
+            self.dslayer5 = DSLayer(ch_decoder, ch_decoder)
+        self.latlayer5 = ResBlk(lateral_channels_in[1], ch_decoder) if self.config.complex_lateral_connection else nn.Conv2d(lateral_channels_in[1], ch_decoder, 1, 1, 0)
+        ch_decoder //= 2
+        self.enlayer4 = ResBlk(ch_decoder*2, ch_decoder)
+        if self.config.conv_after_itp:
+            self.dslayer4 = DSLayer(ch_decoder, ch_decoder)
+        self.latlayer4 = ResBlk(lateral_channels_in[2], ch_decoder) if self.config.complex_lateral_connection else nn.Conv2d(lateral_channels_in[2], ch_decoder, 1, 1, 0)
+        if self.config.output_number >= 4:
+            self.conv_out4 = nn.Sequential(nn.Conv2d(ch_decoder, 32, 1, 1, 0), nn.ReLU(inplace=True), nn.Conv2d(32, 1, 1, 1, 0))
+        ch_decoder //= 2
+        self.enlayer3 = ResBlk(ch_decoder*2, ch_decoder)
+        if self.config.conv_after_itp:
+            self.dslayer3 = DSLayer(ch_decoder, ch_decoder)
+        self.latlayer3 = ResBlk(lateral_channels_in[3], ch_decoder) if self.config.complex_lateral_connection else nn.Conv2d(lateral_channels_in[3], ch_decoder, 1, 1, 0)
+        if self.config.output_number >= 3:
+            self.conv_out3 = nn.Sequential(nn.Conv2d(ch_decoder, 32, 1, 1, 0), nn.ReLU(inplace=True), nn.Conv2d(32, 1, 1, 1, 0))
+        ch_decoder //= 2
+        self.enlayer2 = ResBlk(ch_decoder*2, ch_decoder)
+        if self.config.conv_after_itp:
+            self.dslayer2 = DSLayer(ch_decoder, ch_decoder)
+        self.latlayer2 = ResBlk(lateral_channels_in[4], ch_decoder) if self.config.complex_lateral_connection else nn.Conv2d(lateral_channels_in[4], ch_decoder, 1, 1, 0)
+        if self.config.output_number >= 2:
+            self.conv_out2 = nn.Sequential(nn.Conv2d(ch_decoder, 32, 1, 1, 0), nn.ReLU(inplace=True), nn.Conv2d(32, 1, 1, 1, 0))
+        self.enlayer1 = ResBlk(ch_decoder, ch_decoder)
+        self.conv_out1 = nn.Sequential(nn.Conv2d(ch_decoder, 1, 1, 1, 0))
+        if self.config.GAM:
+            self.co_x5 = CoAttLayer(channel_in=lateral_channels_in[0])
+        if 'contrast' in self.config.loss:
+            self.pred_layer = half_DSLayer(lateral_channels_in[0])
+        if {'cls', 'cls_mask'} & set(self.config.loss):
+            self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
+            self.classifier = nn.Linear(lateral_channels_in[0], 291)       # DUTS_class has 291 classes
+            for layer in [self.classifier]:
+                weight_init.c2_msra_fill(layer)
+        if self.config.split_mask:
+            self.sgm = nn.Sigmoid()
+        if self.config.refine:
+            self.refiner = nn.Sequential(RefUnet(self.config.refine, 64))
+        if self.config.split_mask:
+            self.conv_out_mask = nn.Sequential(nn.Conv2d(ch_decoder, 1, 1, 1, 0))
+        if self.config.db_mask:
+            self.db_mask = DBHead(32)
+        if self.config.db_output_decoder:
+            self.db_output_decoder = DBHead(32)
+        if self.config.cls_mask_operation == 'c':
+            self.conv_cat_mask = nn.Conv2d(4, 3, 1, 1, 0)
+    def forward(self, x):
+        ########## Encoder ##########
+        [N, _, H, W] = x.size()
+        x1 = self.bb.conv1(x)
+        x2 = self.bb.conv2(x1)
+        x3 = self.bb.conv3(x2)
+        x4 = self.bb.conv4(x3)
+        x5 = self.bb.conv5(x4)
+        if 'cls' in self.config.loss:
+            _x5 = self.avgpool(x5)
+            _x5 = _x5.view(_x5.size(0), -1)
+            pred_cls = self.classifier(_x5)
+        if self.config.GAM:
+            weighted_x5, neg_x5 = self.co_x5(x5)
+            if 'contrast' in self.config.loss:
+                if self.training:
+                    ########## contrastive branch #########
+                    cat_x5 = torch.cat([weighted_x5, neg_x5], dim=0)
+                    pred_contrast = self.pred_layer(cat_x5)
+                    pred_contrast = F.interpolate(pred_contrast, size=(H, W), mode='bilinear', align_corners=True)
+            p5 = self.top_layer(weighted_x5)
+        else:
+            p5 = self.top_layer(x5)
+        ########## Decoder ##########
+        scaled_preds = []
+        p5 = self.enlayer5(p5)
+        p5 = F.interpolate(p5, size=x4.shape[2:], mode='bilinear', align_corners=True)
+        if self.config.conv_after_itp:
+            p5 = self.dslayer5(p5)
+        p4 = p5 + self.latlayer5(x4)
+        p4 = self.enlayer4(p4)
+        p4 = F.interpolate(p4, size=x3.shape[2:], mode='bilinear', align_corners=True)
+        if self.config.conv_after_itp:
+            p4 = self.dslayer4(p4)
+        if self.config.output_number >= 4:
+            p4_out = self.conv_out4(p4)
+            scaled_preds.append(p4_out)
+        p3 = p4 + self.latlayer4(x3)
+        p3 = self.enlayer3(p3)
+        p3 = F.interpolate(p3, size=x2.shape[2:], mode='bilinear', align_corners=True)
+        if self.config.conv_after_itp:
+            p3 = self.dslayer3(p3)
+        if self.config.output_number >= 3:
+            p3_out = self.conv_out3(p3)
+            scaled_preds.append(p3_out)
+        p2 = p3 + self.latlayer3(x2)
+        p2 = self.enlayer2(p2)
+        p2 = F.interpolate(p2, size=x1.shape[2:], mode='bilinear', align_corners=True)
+        if self.config.conv_after_itp:
+            p2 = self.dslayer2(p2)
+        if self.config.output_number >= 2:
+            p2_out = self.conv_out2(p2)
+            scaled_preds.append(p2_out)
+        p1 = p2 + self.latlayer2(x1)
+        p1 = self.enlayer1(p1)
+        p1 = F.interpolate(p1, size=x.shape[2:], mode='bilinear', align_corners=True)
+        if self.config.db_output_decoder:
+            p1_out = self.db_output_decoder(p1)
+        else:
+            p1_out = self.conv_out1(p1)
+        scaled_preds.append(p1_out)
+        if self.config.refine == 1:
+            scaled_preds.append(self.refiner(p1_out))
+        elif self.config.refine == 4:
+            scaled_preds.append(self.refiner(torch.cat([x, p1_out], dim=1)))
+        if 'cls_mask' in self.config.loss:
+            pred_cls_masks = []
+            norm_features_mask = []
+            input_features = [x, x1, x2, x3][:self.config.loss_cls_mask_last_layers]
+            bb_lst = [self.bb.conv1, self.bb.conv2, self.bb.conv3, self.bb.conv4, self.bb.conv5]
+            for idx_out in range(self.config.loss_cls_mask_last_layers):
+                if idx_out:
+                    mask_output = scaled_preds[-(idx_out+1+int(bool(self.config.refine)))]
+                else:
+                    if self.config.split_mask:
+                        if self.config.db_mask:
+                            mask_output = self.db_mask(p1)
+                        else:
+                            mask_output = self.sgm(self.conv_out_mask(p1))
+                if self.config.cls_mask_operation == 'x':
+                    masked_features = input_features[idx_out] * mask_output
+                elif self.config.cls_mask_operation == '+':
+                    masked_features = input_features[idx_out] + mask_output
+                elif self.config.cls_mask_operation == 'c':
+                    masked_features = self.conv_cat_mask(torch.cat((input_features[idx_out], mask_output), dim=1))
+                norm_feature_mask = self.avgpool(
+                    nn.Sequential(*bb_lst[idx_out:])(
+                        masked_features
+                    )
+                ).view(N, -1)
+                norm_features_mask.append(norm_feature_mask)
+                pred_cls_masks.append(
+                    self.classifier(
+                        norm_feature_mask
+                    )
+                )
+        if self.training:
+            return_values = []
+            if {'sal', 'cls', 'contrast', 'cls_mask'} == set(self.config.loss):
+                return_values = [scaled_preds, pred_cls, pred_contrast, pred_cls_masks]
+            elif {'sal', 'cls', 'contrast'} == set(self.config.loss):
+                return_values = [scaled_preds, pred_cls, pred_contrast]
+            elif {'sal', 'cls', 'cls_mask'} == set(self.config.loss):
+                return_values = [scaled_preds, pred_cls, pred_cls_masks]
+            elif {'sal', 'cls'} == set(self.config.loss):
+                return_values = [scaled_preds, pred_cls]
+            elif {'sal', 'contrast'} == set(self.config.loss):
+                return_values = [scaled_preds, pred_contrast]
+            elif {'sal', 'cls_mask'} == set(self.config.loss):
+                return_values = [scaled_preds, pred_cls_masks]
+            else:
+                return_values = [scaled_preds]
+            if self.config.lambdas_sal_last['triplet']:
+                norm_features = []
+                if '_x5' in self.config.triplet:
+                    norm_features.append(_x5)
+                if 'mask' in self.config.triplet:
+                    norm_features.append(norm_features_mask[0])
+                return_values.append(norm_features)
+            return return_values
+        else:
+            return scaled_preds

models/modules.py ADDED Viewed

	@@ -0,0 +1,516 @@

+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import fvcore.nn.weight_init as weight_init
+from config import Config
+config = Config()
+class ResBlk(nn.Module):
+    def __init__(self, channel_in=64, channel_out=64):
+        super(ResBlk, self).__init__()
+        self.conv_in = nn.Conv2d(channel_in, 64, 3, 1, 1)
+        self.relu_in = nn.ReLU(inplace=True)
+        self.conv_out = nn.Conv2d(64, channel_out, 3, 1, 1)
+        if config.use_bn:
+            self.bn_in = nn.BatchNorm2d(64)
+            self.bn_out = nn.BatchNorm2d(channel_out)
+    def forward(self, x):
+        x = self.conv_in(x)
+        if config.use_bn:
+            x = self.bn_in(x)
+        x = self.relu_in(x)
+        x = self.conv_out(x)
+        if config.use_bn:
+            x = self.bn_out(x)
+        return x
+class DSLayer(nn.Module):
+    def __init__(self, channel_in=64, channel_out=1, activation_out='relu'):
+        super(DSLayer, self).__init__()
+        self.activation_out = activation_out
+        self.conv1 = nn.Conv2d(channel_in, 64, kernel_size=3, stride=1, padding=1)
+        self.relu1 = nn.ReLU(inplace=True)
+        self.conv2 = nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1)
+        self.relu2 = nn.ReLU(inplace=True)
+        if activation_out:
+            self.pred_conv = nn.Conv2d(64, channel_out, kernel_size=1, stride=1, padding=0)
+            self.pred_relu = nn.ReLU(inplace=True)
+        else:
+            self.pred_conv = nn.Conv2d(64, channel_out, kernel_size=1, stride=1, padding=0)
+        if config.use_bn:
+            self.bn1 = nn.BatchNorm2d(64)
+            self.bn2 = nn.BatchNorm2d(64)
+            self.pred_bn = nn.BatchNorm2d(channel_out)
+    def forward(self, x):
+        x = self.conv1(x)
+        if config.use_bn:
+            x = self.bn1(x)
+        x = self.relu1(x)
+        x = self.conv2(x)
+        if config.use_bn:
+            x = self.bn2(x)
+        x = self.relu2(x)
+        x = self.pred_conv(x)
+        if config.use_bn:
+            x = self.pred_bn(x)
+        if self.activation_out:
+            x = self.pred_relu(x)
+        return x
+class half_DSLayer(nn.Module):
+    def __init__(self, channel_in=512):
+        super(half_DSLayer, self).__init__()
+        self.enlayer = nn.Sequential(
+            nn.Conv2d(channel_in, int(channel_in//4), kernel_size=3, stride=1, padding=1),
+            nn.ReLU(inplace=True)
+        )
+        self.predlayer = nn.Sequential(
+            nn.Conv2d(int(channel_in//4), 1, kernel_size=1, stride=1, padding=0),
+        )
+    def forward(self, x):
+        x = self.enlayer(x)
+        x = self.predlayer(x)
+        return x
+class CoAttLayer(nn.Module):
+    def __init__(self, channel_in=512):
+        super(CoAttLayer, self).__init__()
+        self.all_attention = eval(Config().relation_module + '(channel_in)')
+        self.conv_output = nn.Conv2d(channel_in, channel_in, kernel_size=1, stride=1, padding=0)
+        self.conv_transform = nn.Conv2d(channel_in, channel_in, kernel_size=1, stride=1, padding=0)
+        self.fc_transform = nn.Linear(channel_in, channel_in)
+        for layer in [self.conv_output, self.conv_transform, self.fc_transform]:
+            weight_init.c2_msra_fill(layer)
+    def forward(self, x5):
+        if self.training:
+            f_begin = 0
+            f_end = int(x5.shape[0] / 2)
+            s_begin = f_end
+            s_end = int(x5.shape[0])
+            x5_1 = x5[f_begin: f_end]
+            x5_2 = x5[s_begin: s_end]
+            x5_new_1 = self.all_attention(x5_1)
+            x5_new_2 = self.all_attention(x5_2)
+            x5_1_proto = torch.mean(x5_new_1, (0, 2, 3), True).view(1, -1)
+            x5_1_proto = x5_1_proto.unsqueeze(-1).unsqueeze(-1) # 1, C, 1, 1
+            x5_2_proto = torch.mean(x5_new_2, (0, 2, 3), True).view(1, -1)
+            x5_2_proto = x5_2_proto.unsqueeze(-1).unsqueeze(-1) # 1, C, 1, 1
+            x5_11 = x5_1 * x5_1_proto
+            x5_22 = x5_2 * x5_2_proto
+            weighted_x5 = torch.cat([x5_11, x5_22], dim=0)
+            x5_12 = x5_1 * x5_2_proto
+            x5_21 = x5_2 * x5_1_proto
+            neg_x5 = torch.cat([x5_12, x5_21], dim=0)
+        else:
+            x5_new = self.all_attention(x5)
+            x5_proto = torch.mean(x5_new, (0, 2, 3), True).view(1, -1)
+            x5_proto = x5_proto.unsqueeze(-1).unsqueeze(-1) # 1, C, 1, 1
+            weighted_x5 = x5 * x5_proto #* cweight
+            neg_x5 = None
+        return weighted_x5, neg_x5
+class ICE(nn.Module):
+    # The Integrity Channel Enhancement (ICE) module
+    # _X means in X-th column
+    def __init__(self, channel_in=512):
+        super(ICE, self).__init__()
+        self.conv_1 = nn.Conv2d(channel_in, channel_in, 3, 1, 1)
+        self.conv_2 = nn.Conv1d(channel_in, channel_in, 3, 1, 1)
+        self.conv_3 = nn.Conv2d(channel_in*3, channel_in, 3, 1, 1)
+        self.fc_2 = nn.Linear(channel_in, channel_in)
+        self.fc_3 = nn.Linear(channel_in, channel_in)
+    def forward(self, x):
+        x_1, x_2, x_3 = x, x, x
+        x_1 = x_1 * x_2 * x_3
+        x_2 = x_1 + x_2 + x_3
+        x_3 = torch.cat((x_1, x_2, x_3), dim=1)
+        V = self.conv_1(x_1)
+        bs, c, h, w = x_2.shape
+        K = self.conv_2(x_2.view(bs, c, h*w))
+        Q_prime = self.conv_3(x_3)
+        Q_prime = torch.norm(Q_prime, dim=(-2, -1)).view(bs, c, 1, 1)
+        Q_prime = Q_prime.view(bs, -1)
+        Q_prime = self.fc_3(Q_prime)
+        Q_prime = torch.softmax(Q_prime, dim=-1)
+        Q_prime = Q_prime.unsqueeze(1)
+        Q = torch.matmul(Q_prime, K)
+        x_2 = torch.nn.functional.cosine_similarity(K, Q, dim=-1)
+        x_2 = torch.sigmoid(x_2)
+        x_2 = self.fc_2(x_2)
+        x_2 = x_2.unsqueeze(-1).unsqueeze(-1)
+        x_1 = V * x_2 + V
+        return x_1
+class GAM(nn.Module):
+    def __init__(self, channel_in=512):
+        super(GAM, self).__init__()
+        self.query_transform = nn.Conv2d(channel_in, channel_in, kernel_size=1, stride=1, padding=0)
+        self.key_transform = nn.Conv2d(channel_in, channel_in, kernel_size=1, stride=1, padding=0)
+        self.scale = 1.0 / (channel_in ** 0.5)
+        self.conv6 = nn.Conv2d(channel_in, channel_in, kernel_size=1, stride=1, padding=0)
+        for layer in [self.query_transform, self.key_transform, self.conv6]:
+            weight_init.c2_msra_fill(layer)
+    def forward(self, x5):
+        # x: B,C,H,W
+        # x_query: B,C,HW
+        B, C, H5, W5 = x5.size()
+        x_query = self.query_transform(x5).view(B, C, -1)
+        # x_query: B,HW,C
+        x_query = torch.transpose(x_query, 1, 2).contiguous().view(-1, C) # BHW, C
+        # x_key: B,C,HW
+        x_key = self.key_transform(x5).view(B, C, -1)
+        x_key = torch.transpose(x_key, 0, 1).contiguous().view(C, -1) # C, BHW
+        # W = Q^T K: B,HW,HW
+        x_w = torch.matmul(x_query, x_key) #* self.scale # BHW, BHW
+        x_w = x_w.view(B*H5*W5, B, H5*W5)
+        x_w = torch.max(x_w, -1).values # BHW, B
+        x_w = x_w.mean(-1)
+        #x_w = torch.mean(x_w, -1).values # BHW
+        x_w = x_w.view(B, -1) * self.scale # B, HW
+        x_w = F.softmax(x_w, dim=-1) # B, HW
+        x_w = x_w.view(B, H5, W5).unsqueeze(1) # B, 1, H, W
+        x5 = x5 * x_w
+        x5 = self.conv6(x5)
+        return x5
+class MHA(nn.Module):
+    '''
+    Scaled dot-product attention
+    '''
+    def __init__(self, d_model=512, d_k=512, d_v=512, h=8, dropout=.1, channel_in=512):
+        '''
+        :param d_model: Output dimensionality of the model
+        :param d_k: Dimensionality of queries and keys
+        :param d_v: Dimensionality of values
+        :param h: Number of heads
+        '''
+        super(MHA, self).__init__()
+        self.query_transform = nn.Conv2d(channel_in, channel_in, kernel_size=1, stride=1, padding=0)
+        self.key_transform = nn.Conv2d(channel_in, channel_in, kernel_size=1, stride=1, padding=0)
+        self.value_transform = nn.Conv2d(channel_in, channel_in, kernel_size=1, stride=1, padding=0)
+        self.fc_q = nn.Linear(d_model, h * d_k)
+        self.fc_k = nn.Linear(d_model, h * d_k)
+        self.fc_v = nn.Linear(d_model, h * d_v)
+        self.fc_o = nn.Linear(h * d_v, d_model)
+        self.dropout = nn.Dropout(dropout)
+        self.d_model = d_model
+        self.d_k = d_k
+        self.d_v = d_v
+        self.h = h
+        self.init_weights()
+    def init_weights(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight, mode='fan_out')
+                if m.bias is not None:
+                    nn.init.constant_(m.bias, 0)
+            elif isinstance(m, nn.BatchNorm2d):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+            elif isinstance(m, nn.Linear):
+                nn.init.normal_(m.weight, std=0.001)
+                if m.bias is not None:
+                    nn.init.constant_(m.bias, 0)
+    def forward(self, x, attention_mask=None, attention_weights=None):
+        '''
+        Computes
+        :param queries: Queries (b_s, nq, d_model)
+        :param keys: Keys (b_s, nk, d_model)
+        :param values: Values (b_s, nk, d_model)
+        :param attention_mask: Mask over attention values (b_s, h, nq, nk). True indicates masking.
+        :param attention_weights: Multiplicative weights for attention values (b_s, h, nq, nk).
+        :return:
+        '''
+        B, C, H, W = x.size()
+        queries = self.query_transform(x).view(B, -1, C)
+        keys = self.query_transform(x).view(B, -1, C)
+        values = self.query_transform(x).view(B, -1, C)
+        b_s, nq = queries.shape[:2]
+        nk = keys.shape[1]
+        q = self.fc_q(queries).view(b_s, nq, self.h, self.d_k).permute(0, 2, 1, 3)  # (b_s, h, nq, d_k)
+        k = self.fc_k(keys).view(b_s, nk, self.h, self.d_k).permute(0, 2, 3, 1)  # (b_s, h, d_k, nk)
+        v = self.fc_v(values).view(b_s, nk, self.h, self.d_v).permute(0, 2, 1, 3)  # (b_s, h, nk, d_v)
+        att = torch.matmul(q, k) / np.sqrt(self.d_k)  # (b_s, h, nq, nk)
+        if attention_weights is not None:
+            att = att * attention_weights
+        if attention_mask is not None:
+            att = att.masked_fill(attention_mask, -np.inf)
+        att = torch.softmax(att, -1)
+        att = self.dropout(att)
+        out = torch.matmul(att, v).permute(0, 2, 1, 3).contiguous().view(b_s, nq, self.h * self.d_v)  # (b_s, nq, h*d_v)
+        out = self.fc_o(out).view(B, C, H, W)  # (b_s, nq, d_model)
+        return out
+class NonLocal(nn.Module):
+    def __init__(self, channel_in=512, inter_channels=None, dimension=2, sub_sample=True, bn_layer=True):
+        super(NonLocal, self).__init__()
+        assert dimension in [1, 2, 3]
+        self.dimension = dimension
+        self.sub_sample = sub_sample
+        self.channel_in = channel_in
+        self.inter_channels = inter_channels
+        if self.inter_channels is None:
+            self.inter_channels = channel_in // 2
+            if self.inter_channels == 0:
+                self.inter_channels = 1
+        self.g = nn.Conv2d(self.channel_in, self.inter_channels, 1, 1, 0)
+        if bn_layer:
+            self.W = nn.Sequential(
+                nn.Conv2d(self.inter_channels, self.channel_in, kernel_size=1, stride=1, padding=0),
+                nn.BatchNorm2d(self.channel_in)
+            )
+            nn.init.constant_(self.W[1].weight, 0)
+            nn.init.constant_(self.W[1].bias, 0)
+        else:
+            self.W = nn.Conv2d(self.inter_channels, self.channel_in, kernel_size=1, stride=1, padding=0)
+            nn.init.constant_(self.W.weight, 0)
+            nn.init.constant_(self.W.bias, 0)
+        self.theta = nn.Conv2d(self.channel_in, self.inter_channels, kernel_size=1, stride=1, padding=0)
+        self.phi = nn.Conv2d(self.channel_in, self.inter_channels, kernel_size=1, stride=1, padding=0)
+        if sub_sample:
+            self.g = nn.Sequential(self.g, nn.MaxPool2d(kernel_size=(2, 2)))
+            self.phi = nn.Sequential(self.phi, nn.MaxPool2d(kernel_size=(2, 2)))
+    def forward(self, x, return_nl_map=False):
+        """
+        :param x: (b, c, t, h, w)
+        :param return_nl_map: if True return z, nl_map, else only return z.
+        :return:
+        """
+        batch_size = x.size(0)
+        g_x = self.g(x).view(batch_size, self.inter_channels, -1)
+        g_x = g_x.permute(0, 2, 1)
+        theta_x = self.theta(x).view(batch_size, self.inter_channels, -1)
+        theta_x = theta_x.permute(0, 2, 1)
+        phi_x = self.phi(x).view(batch_size, self.inter_channels, -1)
+        f = torch.matmul(theta_x, phi_x)
+        f_div_C = F.softmax(f, dim=-1)
+        y = torch.matmul(f_div_C, g_x)
+        y = y.permute(0, 2, 1).contiguous()
+        y = y.view(batch_size, self.inter_channels, *x.size()[2:])
+        W_y = self.W(y)
+        z = W_y + x
+        if return_nl_map:
+            return z, f_div_C
+        return z
+class DBHead(nn.Module):
+    def __init__(self, channel_in=32, channel_out=1, k=config.db_k):
+        super().__init__()
+        self.k = k
+        self.binarize = nn.Sequential(
+            nn.Conv2d(channel_in, channel_in, 3, 1, 1),
+            *[nn.BatchNorm2d(channel_in), nn.ReLU(inplace=True)] if config.use_bn else nn.ReLU(inplace=True),
+            nn.Conv2d(channel_in, channel_in, 3, 1, 1),
+            *[nn.BatchNorm2d(channel_in), nn.ReLU(inplace=True)] if config.use_bn else nn.ReLU(inplace=True),
+            nn.Conv2d(channel_in, channel_out, 1, 1, 0),
+            nn.Sigmoid()
+        )
+        self.thresh = nn.Sequential(
+            nn.Conv2d(channel_in, channel_in, 3, padding=1),
+            *[nn.BatchNorm2d(channel_in), nn.ReLU(inplace=True)] if config.use_bn else nn.ReLU(inplace=True),
+            nn.Conv2d(channel_in, channel_in, 3, 1, 1),
+            *[nn.BatchNorm2d(channel_in), nn.ReLU(inplace=True)] if config.use_bn else nn.ReLU(inplace=True),
+            nn.Conv2d(channel_in, channel_out, 1, 1, 0),
+            nn.Sigmoid()
+        )
+    def forward(self, x):
+        shrink_maps = self.binarize(x)
+        threshold_maps = self.thresh(x)
+        binary_maps = self.step_function(shrink_maps, threshold_maps)
+        return binary_maps
+    def step_function(self, x, y):
+        if config.db_k_alpha != 1:
+            z = x - y
+            mask_neg_inv = 1 - 2 * (z < 0)
+            a = torch.exp(-self.k * (torch.pow(z * mask_neg_inv + 1e-16, 1/config.k_alpha) * mask_neg_inv))
+        else:
+            a = torch.exp(-self.k * (x - y))
+        if torch.isinf(a).any():
+            a = torch.exp(-50 * (x - y))
+        return torch.reciprocal(1 + a)
+class RefUnet(nn.Module):
+    # Refinement
+    def __init__(self, in_ch, inc_ch):
+        super(RefUnet, self).__init__()
+        self.conv0 = nn.Conv2d(in_ch, inc_ch, 3, padding=1)
+        self.conv1 = nn.Conv2d(inc_ch, 64, 3, padding=1)
+        if config.use_bn:
+            self.bn1 = nn.BatchNorm2d(64)
+        self.relu1 = nn.ReLU(inplace=True)
+        self.pool1 = nn.MaxPool2d(2, 2, ceil_mode=True)
+        self.conv2 = nn.Conv2d(64, 64, 3, padding=1)
+        if config.use_bn:
+            self.bn2 = nn.BatchNorm2d(64)
+        self.relu2 = nn.ReLU(inplace=True)
+        self.pool2 = nn.MaxPool2d(2, 2, ceil_mode=True)
+        self.conv3 = nn.Conv2d(64, 64, 3, padding=1)
+        if config.use_bn:
+            self.bn3 = nn.BatchNorm2d(64)
+        self.relu3 = nn.ReLU(inplace=True)
+        self.pool3 = nn.MaxPool2d(2, 2, ceil_mode=True)
+        self.conv4 = nn.Conv2d(64, 64, 3, padding=1)
+        if config.use_bn:
+            self.bn4 = nn.BatchNorm2d(64)
+        self.relu4 = nn.ReLU(inplace=True)
+        self.pool4 = nn.MaxPool2d(2, 2, ceil_mode=True)
+        #####
+        self.conv5 = nn.Conv2d(64, 64, 3, padding=1)
+        if config.use_bn:
+            self.bn5 = nn.BatchNorm2d(64)
+        self.relu5 = nn.ReLU(inplace=True)
+        #####
+        self.conv_d4 = nn.Conv2d(128, 64, 3, padding=1)
+        if config.use_bn:
+            self.bn_d4 = nn.BatchNorm2d(64)
+        self.relu_d4 = nn.ReLU(inplace=True)
+        self.conv_d3 = nn.Conv2d(128, 64, 3, padding=1)
+        if config.use_bn:
+            self.bn_d3 = nn.BatchNorm2d(64)
+        self.relu_d3 = nn.ReLU(inplace=True)
+        self.conv_d2 = nn.Conv2d(128, 64, 3, padding=1)
+        if config.use_bn:
+            self.bn_d2 = nn.BatchNorm2d(64)
+        self.relu_d2 = nn.ReLU(inplace=True)
+        self.conv_d1 = nn.Conv2d(128, 64, 3, padding=1)
+        if config.use_bn:
+            self.bn_d1 = nn.BatchNorm2d(64)
+        self.relu_d1 = nn.ReLU(inplace=True)
+        self.conv_d0 = nn.Conv2d(64, 1, 3, padding=1)
+        self.upscore2 = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
+        if config.db_output_refiner:
+            self.db_output_refiner = DBHead(64)
+    def forward(self, x):
+        hx = x
+        hx = self.conv1(self.conv0(hx))
+        if config.use_bn:
+            hx = self.bn1(hx)
+        hx1 = self.relu1(hx)
+        hx = self.conv2(self.pool1(hx1))
+        if config.use_bn:
+            hx = self.bn2(hx)
+        hx2 = self.relu2(hx)
+        hx = self.conv3(self.pool2(hx2))
+        if config.use_bn:
+            hx = self.bn3(hx)
+        hx3 = self.relu3(hx)
+        hx = self.conv4(self.pool3(hx3))
+        if config.use_bn:
+            hx = self.bn4(hx)
+        hx4 = self.relu4(hx)
+        hx = self.conv5(self.pool4(hx4))
+        if config.use_bn:
+            hx = self.bn5(hx)
+        hx5 = self.relu5(hx)
+        hx = self.upscore2(hx5)
+        d4 = self.conv_d4(torch.cat((hx, hx4), 1))
+        if config.use_bn:
+            d4 = self.bn_d4(d4)
+        d4 = self.relu_d4(d4)
+        hx = self.upscore2(d4)
+        d3 = self.conv_d3(torch.cat((hx, hx3), 1))
+        if config.use_bn:
+            d3 = self.bn_d3(d3)
+        d3 = self.relu_d3(d3)
+        hx = self.upscore2(d3)
+        d2 = self.conv_d2(torch.cat((hx, hx2), 1))
+        if config.use_bn:
+            d2 = self.bn_d2(d2)
+        d2 = self.relu_d2(d2)
+        hx = self.upscore2(d2)
+        d1 = self.conv_d1(torch.cat((hx, hx1), 1))
+        if config.use_bn:
+            d1 = self.bn_d1(d1)
+        d1 = self.relu_d1(d1)
+        if config.db_output_refiner:
+            x = self.db_output_refiner(d1)
+        else:
+            residual = self.conv_d0(d1)
+            x = x + residual
+        return x