Spaces:

NeuralFalcon
/

Meta-Watermark-Remover

Running

App Files Files Community

NeuralFalcon commited on Feb 17

Commit

712b45c

verified ·

1 Parent(s): 2b2eb3f

Upload 7 files

Browse files

Files changed (7) hide show

deepfillv2/LICENSE +21 -0
deepfillv2/__init__.py +1 -0
deepfillv2/network.py +666 -0
deepfillv2/network_module.py +596 -0
deepfillv2/network_utils.py +79 -0
deepfillv2/test_dataset.py +47 -0
deepfillv2/utils.py +145 -0

deepfillv2/LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2020 Qiang Wen
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

deepfillv2/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+

deepfillv2/network.py ADDED Viewed

	@@ -0,0 +1,666 @@

+import torch
+import torch.nn as nn
+import torch.nn.init as init
+import torchvision
+from deepfillv2.network_module import *
+def weights_init(net, init_type="kaiming", init_gain=0.02):
+    """Initialize network weights.
+    Parameters:
+        net (network)       -- network to be initialized
+        init_type (str)     -- the name of an initialization method: normal | xavier | kaiming | orthogonal
+        init_var (float)    -- scaling factor for normal, xavier and orthogonal.
+    """
+    def init_func(m):
+        classname = m.__class__.__name__
+        if hasattr(m, "weight") and classname.find("Conv") != -1:
+            if init_type == "normal":
+                init.normal_(m.weight.data, 0.0, init_gain)
+            elif init_type == "xavier":
+                init.xavier_normal_(m.weight.data, gain=init_gain)
+            elif init_type == "kaiming":
+                init.kaiming_normal_(m.weight.data, a=0, mode="fan_in")
+            elif init_type == "orthogonal":
+                init.orthogonal_(m.weight.data, gain=init_gain)
+            else:
+                raise NotImplementedError(
+                    "initialization method [%s] is not implemented" % init_type
+                )
+        elif classname.find("BatchNorm2d") != -1:
+            init.normal_(m.weight.data, 1.0, 0.02)
+            init.constant_(m.bias.data, 0.0)
+        elif classname.find("Linear") != -1:
+            init.normal_(m.weight, 0, 0.01)
+            init.constant_(m.bias, 0)
+    # Apply the initialization function <init_func>
+    net.apply(init_func)
+# -----------------------------------------------
+#                   Generator
+# -----------------------------------------------
+# Input: masked image + mask
+# Output: filled image
+class GatedGenerator(nn.Module):
+    def __init__(self, opt):
+        super(GatedGenerator, self).__init__()
+        self.coarse = nn.Sequential(
+            # encoder
+            GatedConv2d(
+                opt.in_channels,
+                opt.latent_channels,
+                5,
+                1,
+                2,
+                pad_type=opt.pad_type,
+                activation=opt.activation,
+                norm=opt.norm,
+            ),
+            GatedConv2d(
+                opt.latent_channels,
+                opt.latent_channels * 2,
+                3,
+                2,
+                1,
+                pad_type=opt.pad_type,
+                activation=opt.activation,
+                norm=opt.norm,
+            ),
+            GatedConv2d(
+                opt.latent_channels * 2,
+                opt.latent_channels * 2,
+                3,
+                1,
+                1,
+                pad_type=opt.pad_type,
+                activation=opt.activation,
+                norm=opt.norm,
+            ),
+            GatedConv2d(
+                opt.latent_channels * 2,
+                opt.latent_channels * 4,
+                3,
+                2,
+                1,
+                pad_type=opt.pad_type,
+                activation=opt.activation,
+                norm=opt.norm,
+            ),
+            # Bottleneck
+            GatedConv2d(
+                opt.latent_channels * 4,
+                opt.latent_channels * 4,
+                3,
+                1,
+                1,
+                pad_type=opt.pad_type,
+                activation=opt.activation,
+                norm=opt.norm,
+            ),
+            GatedConv2d(
+                opt.latent_channels * 4,
+                opt.latent_channels * 4,
+                3,
+                1,
+                1,
+                pad_type=opt.pad_type,
+                activation=opt.activation,
+                norm=opt.norm,
+            ),
+            GatedConv2d(
+                opt.latent_channels * 4,
+                opt.latent_channels * 4,
+                3,
+                1,
+                2,
+                dilation=2,
+                pad_type=opt.pad_type,
+                activation=opt.activation,
+                norm=opt.norm,
+            ),
+            GatedConv2d(
+                opt.latent_channels * 4,
+                opt.latent_channels * 4,
+                3,
+                1,
+                4,
+                dilation=4,
+                pad_type=opt.pad_type,
+                activation=opt.activation,
+                norm=opt.norm,
+            ),
+            GatedConv2d(
+                opt.latent_channels * 4,
+                opt.latent_channels * 4,
+                3,
+                1,
+                8,
+                dilation=8,
+                pad_type=opt.pad_type,
+                activation=opt.activation,
+                norm=opt.norm,
+            ),
+            GatedConv2d(
+                opt.latent_channels * 4,
+                opt.latent_channels * 4,
+                3,
+                1,
+                16,
+                dilation=16,
+                pad_type=opt.pad_type,
+                activation=opt.activation,
+                norm=opt.norm,
+            ),
+            GatedConv2d(
+                opt.latent_channels * 4,
+                opt.latent_channels * 4,
+                3,
+                1,
+                1,
+                pad_type=opt.pad_type,
+                activation=opt.activation,
+                norm=opt.norm,
+            ),
+            GatedConv2d(
+                opt.latent_channels * 4,
+                opt.latent_channels * 4,
+                3,
+                1,
+                1,
+                pad_type=opt.pad_type,
+                activation=opt.activation,
+                norm=opt.norm,
+            ),
+            # decoder
+            TransposeGatedConv2d(
+                opt.latent_channels * 4,
+                opt.latent_channels * 2,
+                3,
+                1,
+                1,
+                pad_type=opt.pad_type,
+                activation=opt.activation,
+                norm=opt.norm,
+            ),
+            GatedConv2d(
+                opt.latent_channels * 2,
+                opt.latent_channels * 2,
+                3,
+                1,
+                1,
+                pad_type=opt.pad_type,
+                activation=opt.activation,
+                norm=opt.norm,
+            ),
+            TransposeGatedConv2d(
+                opt.latent_channels * 2,
+                opt.latent_channels,
+                3,
+                1,
+                1,
+                pad_type=opt.pad_type,
+                activation=opt.activation,
+                norm=opt.norm,
+            ),
+            GatedConv2d(
+                opt.latent_channels,
+                opt.latent_channels // 2,
+                3,
+                1,
+                1,
+                pad_type=opt.pad_type,
+                activation=opt.activation,
+                norm=opt.norm,
+            ),
+            GatedConv2d(
+                opt.latent_channels // 2,
+                opt.out_channels,
+                3,
+                1,
+                1,
+                pad_type=opt.pad_type,
+                activation="none",
+                norm=opt.norm,
+            ),
+            nn.Tanh(),
+        )
+        self.refine_conv = nn.Sequential(
+            GatedConv2d(
+                opt.in_channels,
+                opt.latent_channels,
+                5,
+                1,
+                2,
+                pad_type=opt.pad_type,
+                activation=opt.activation,
+                norm=opt.norm,
+            ),
+            GatedConv2d(
+                opt.latent_channels,
+                opt.latent_channels,
+                3,
+                2,
+                1,
+                pad_type=opt.pad_type,
+                activation=opt.activation,
+                norm=opt.norm,
+            ),
+            GatedConv2d(
+                opt.latent_channels,
+                opt.latent_channels * 2,
+                3,
+                1,
+                1,
+                pad_type=opt.pad_type,
+                activation=opt.activation,
+                norm=opt.norm,
+            ),
+            GatedConv2d(
+                opt.latent_channels * 2,
+                opt.latent_channels * 2,
+                3,
+                2,
+                1,
+                pad_type=opt.pad_type,
+                activation=opt.activation,
+                norm=opt.norm,
+            ),
+            GatedConv2d(
+                opt.latent_channels * 2,
+                opt.latent_channels * 4,
+                3,
+                1,
+                1,
+                pad_type=opt.pad_type,
+                activation=opt.activation,
+                norm=opt.norm,
+            ),
+            GatedConv2d(
+                opt.latent_channels * 4,
+                opt.latent_channels * 4,
+                3,
+                1,
+                1,
+                pad_type=opt.pad_type,
+                activation=opt.activation,
+                norm=opt.norm,
+            ),
+            GatedConv2d(
+                opt.latent_channels * 4,
+                opt.latent_channels * 4,
+                3,
+                1,
+                2,
+                dilation=2,
+                pad_type=opt.pad_type,
+                activation=opt.activation,
+                norm=opt.norm,
+            ),
+            GatedConv2d(
+                opt.latent_channels * 4,
+                opt.latent_channels * 4,
+                3,
+                1,
+                4,
+                dilation=4,
+                pad_type=opt.pad_type,
+                activation=opt.activation,
+                norm=opt.norm,
+            ),
+            GatedConv2d(
+                opt.latent_channels * 4,
+                opt.latent_channels * 4,
+                3,
+                1,
+                8,
+                dilation=8,
+                pad_type=opt.pad_type,
+                activation=opt.activation,
+                norm=opt.norm,
+            ),
+            GatedConv2d(
+                opt.latent_channels * 4,
+                opt.latent_channels * 4,
+                3,
+                1,
+                16,
+                dilation=16,
+                pad_type=opt.pad_type,
+                activation=opt.activation,
+                norm=opt.norm,
+            ),
+        )
+        self.refine_atten_1 = nn.Sequential(
+            GatedConv2d(
+                opt.in_channels,
+                opt.latent_channels,
+                5,
+                1,
+                2,
+                pad_type=opt.pad_type,
+                activation=opt.activation,
+                norm=opt.norm,
+            ),
+            GatedConv2d(
+                opt.latent_channels,
+                opt.latent_channels,
+                3,
+                2,
+                1,
+                pad_type=opt.pad_type,
+                activation=opt.activation,
+                norm=opt.norm,
+            ),
+            GatedConv2d(
+                opt.latent_channels,
+                opt.latent_channels * 2,
+                3,
+                1,
+                1,
+                pad_type=opt.pad_type,
+                activation=opt.activation,
+                norm=opt.norm,
+            ),
+            GatedConv2d(
+                opt.latent_channels * 2,
+                opt.latent_channels * 4,
+                3,
+                2,
+                1,
+                pad_type=opt.pad_type,
+                activation=opt.activation,
+                norm=opt.norm,
+            ),
+            GatedConv2d(
+                opt.latent_channels * 4,
+                opt.latent_channels * 4,
+                3,
+                1,
+                1,
+                pad_type=opt.pad_type,
+                activation=opt.activation,
+                norm=opt.norm,
+            ),
+            GatedConv2d(
+                opt.latent_channels * 4,
+                opt.latent_channels * 4,
+                3,
+                1,
+                1,
+                pad_type=opt.pad_type,
+                activation="relu",
+                norm=opt.norm,
+            ),
+        )
+        self.refine_atten_2 = nn.Sequential(
+            GatedConv2d(
+                opt.latent_channels * 4,
+                opt.latent_channels * 4,
+                3,
+                1,
+                1,
+                pad_type=opt.pad_type,
+                activation=opt.activation,
+                norm=opt.norm,
+            ),
+            GatedConv2d(
+                opt.latent_channels * 4,
+                opt.latent_channels * 4,
+                3,
+                1,
+                1,
+                pad_type=opt.pad_type,
+                activation=opt.activation,
+                norm=opt.norm,
+            ),
+        )
+        self.refine_combine = nn.Sequential(
+            GatedConv2d(
+                opt.latent_channels * 8,
+                opt.latent_channels * 4,
+                3,
+                1,
+                1,
+                pad_type=opt.pad_type,
+                activation=opt.activation,
+                norm=opt.norm,
+            ),
+            GatedConv2d(
+                opt.latent_channels * 4,
+                opt.latent_channels * 4,
+                3,
+                1,
+                1,
+                pad_type=opt.pad_type,
+                activation=opt.activation,
+                norm=opt.norm,
+            ),
+            TransposeGatedConv2d(
+                opt.latent_channels * 4,
+                opt.latent_channels * 2,
+                3,
+                1,
+                1,
+                pad_type=opt.pad_type,
+                activation=opt.activation,
+                norm=opt.norm,
+            ),
+            GatedConv2d(
+                opt.latent_channels * 2,
+                opt.latent_channels * 2,
+                3,
+                1,
+                1,
+                pad_type=opt.pad_type,
+                activation=opt.activation,
+                norm=opt.norm,
+            ),
+            TransposeGatedConv2d(
+                opt.latent_channels * 2,
+                opt.latent_channels,
+                3,
+                1,
+                1,
+                pad_type=opt.pad_type,
+                activation=opt.activation,
+                norm=opt.norm,
+            ),
+            GatedConv2d(
+                opt.latent_channels,
+                opt.latent_channels // 2,
+                3,
+                1,
+                1,
+                pad_type=opt.pad_type,
+                activation=opt.activation,
+                norm=opt.norm,
+            ),
+            GatedConv2d(
+                opt.latent_channels // 2,
+                opt.out_channels,
+                3,
+                1,
+                1,
+                pad_type=opt.pad_type,
+                activation="none",
+                norm=opt.norm,
+            ),
+            nn.Tanh(),
+        )
+        use_cuda = opt.use_cuda
+        self.context_attention = ContextualAttention(
+            ksize=3,
+            stride=1,
+            rate=2,
+            fuse_k=3,
+            softmax_scale=10,
+            fuse=True,
+            use_cuda=use_cuda,
+        )
+    def forward(self, img, mask):
+        # img: entire img
+        # mask: 1 for mask region; 0 for unmask region
+        # Coarse
+        first_masked_img = img * (1 - mask) + mask
+        first_in = torch.cat(
+            (first_masked_img, mask), dim=1
+        )  # in: [B, 4, H, W]
+        first_out = self.coarse(first_in)  # out: [B, 3, H, W]
+        first_out = nn.functional.interpolate(
+            first_out,
+            (img.shape[2], img.shape[3]),
+            recompute_scale_factor=False,
+        )
+        # Refinement
+        second_masked_img = img * (1 - mask) + first_out * mask
+        second_in = torch.cat([second_masked_img, mask], dim=1)
+        refine_conv = self.refine_conv(second_in)
+        refine_atten = self.refine_atten_1(second_in)
+        mask_s = nn.functional.interpolate(
+            mask,
+            (refine_atten.shape[2], refine_atten.shape[3]),
+            recompute_scale_factor=False,
+        )
+        refine_atten = self.context_attention(
+            refine_atten, refine_atten, mask_s
+        )
+        refine_atten = self.refine_atten_2(refine_atten)
+        second_out = torch.cat([refine_conv, refine_atten], dim=1)
+        second_out = self.refine_combine(second_out)
+        second_out = nn.functional.interpolate(
+            second_out,
+            (img.shape[2], img.shape[3]),
+            recompute_scale_factor=False,
+        )
+        return first_out, second_out
+# -----------------------------------------------
+#                  Discriminator
+# -----------------------------------------------
+# Input: generated image / ground truth and mask
+# Output: patch based region, we set 30 * 30
+class PatchDiscriminator(nn.Module):
+    def __init__(self, opt):
+        super(PatchDiscriminator, self).__init__()
+        # Down sampling
+        self.block1 = Conv2dLayer(
+            opt.in_channels,
+            opt.latent_channels,
+            7,
+            1,
+            3,
+            pad_type=opt.pad_type,
+            activation=opt.activation,
+            norm=opt.norm,
+            sn=True,
+        )
+        self.block2 = Conv2dLayer(
+            opt.latent_channels,
+            opt.latent_channels * 2,
+            4,
+            2,
+            1,
+            pad_type=opt.pad_type,
+            activation=opt.activation,
+            norm=opt.norm,
+            sn=True,
+        )
+        self.block3 = Conv2dLayer(
+            opt.latent_channels * 2,
+            opt.latent_channels * 4,
+            4,
+            2,
+            1,
+            pad_type=opt.pad_type,
+            activation=opt.activation,
+            norm=opt.norm,
+            sn=True,
+        )
+        self.block4 = Conv2dLayer(
+            opt.latent_channels * 4,
+            opt.latent_channels * 4,
+            4,
+            2,
+            1,
+            pad_type=opt.pad_type,
+            activation=opt.activation,
+            norm=opt.norm,
+            sn=True,
+        )
+        self.block5 = Conv2dLayer(
+            opt.latent_channels * 4,
+            opt.latent_channels * 4,
+            4,
+            2,
+            1,
+            pad_type=opt.pad_type,
+            activation=opt.activation,
+            norm=opt.norm,
+            sn=True,
+        )
+        self.block6 = Conv2dLayer(
+            opt.latent_channels * 4,
+            1,
+            4,
+            2,
+            1,
+            pad_type=opt.pad_type,
+            activation="none",
+            norm="none",
+            sn=True,
+        )
+    def forward(self, img, mask):
+        # the input x should contain 4 channels because it is a combination of recon image and mask
+        x = torch.cat((img, mask), 1)
+        x = self.block1(x)  # out: [B, 64, 256, 256]
+        x = self.block2(x)  # out: [B, 128, 128, 128]
+        x = self.block3(x)  # out: [B, 256, 64, 64]
+        x = self.block4(x)  # out: [B, 256, 32, 32]
+        x = self.block5(x)  # out: [B, 256, 16, 16]
+        x = self.block6(x)  # out: [B, 256, 8, 8]
+        return x
+# ----------------------------------------
+#            Perceptual Network
+# ----------------------------------------
+# VGG-16 conv4_3 features
+class PerceptualNet(nn.Module):
+    def __init__(self):
+        super(PerceptualNet, self).__init__()
+        block = [
+            torchvision.models.vgg16(pretrained=True).features[:15].eval()
+        ]
+        for p in block[0]:
+            p.requires_grad = False
+        self.block = torch.nn.ModuleList(block)
+        self.transform = torch.nn.functional.interpolate
+        self.register_buffer(
+            "mean", torch.FloatTensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1)
+        )
+        self.register_buffer(
+            "std", torch.FloatTensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1)
+        )
+    def forward(self, x):
+        x = (x - self.mean) / self.std
+        x = self.transform(
+            x,
+            mode="bilinear",
+            size=(224, 224),
+            align_corners=False,
+            recompute_scale_factor=False,
+        )
+        for block in self.block:
+            x = block(x)
+        return x

deepfillv2/network_module.py ADDED Viewed

	@@ -0,0 +1,596 @@

+import torch
+from torch import nn
+from torch.nn import functional as F
+from torch.nn import Parameter
+from deepfillv2.network_utils import *
+# -----------------------------------------------
+#                Normal ConvBlock
+# -----------------------------------------------
+class Conv2dLayer(nn.Module):
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        kernel_size,
+        stride=1,
+        padding=0,
+        dilation=1,
+        pad_type="zero",
+        activation="elu",
+        norm="none",
+        sn=False,
+    ):
+        super(Conv2dLayer, self).__init__()
+        # Initialize the padding scheme
+        if pad_type == "reflect":
+            self.pad = nn.ReflectionPad2d(padding)
+        elif pad_type == "replicate":
+            self.pad = nn.ReplicationPad2d(padding)
+        elif pad_type == "zero":
+            self.pad = nn.ZeroPad2d(padding)
+        else:
+            assert 0, "Unsupported padding type: {}".format(pad_type)
+        # Initialize the normalization type
+        if norm == "bn":
+            self.norm = nn.BatchNorm2d(out_channels)
+        elif norm == "in":
+            self.norm = nn.InstanceNorm2d(out_channels)
+        elif norm == "ln":
+            self.norm = LayerNorm(out_channels)
+        elif norm == "none":
+            self.norm = None
+        else:
+            assert 0, "Unsupported normalization: {}".format(norm)
+        # Initialize the activation funtion
+        if activation == "relu":
+            self.activation = nn.ReLU(inplace=True)
+        elif activation == "lrelu":
+            self.activation = nn.LeakyReLU(0.2, inplace=True)
+        elif activation == "elu":
+            self.activation = nn.ELU(inplace=True)
+        elif activation == "selu":
+            self.activation = nn.SELU(inplace=True)
+        elif activation == "tanh":
+            self.activation = nn.Tanh()
+        elif activation == "sigmoid":
+            self.activation = nn.Sigmoid()
+        elif activation == "none":
+            self.activation = None
+        else:
+            assert 0, "Unsupported activation: {}".format(activation)
+        # Initialize the convolution layers
+        if sn:
+            self.conv2d = SpectralNorm(
+                nn.Conv2d(
+                    in_channels,
+                    out_channels,
+                    kernel_size,
+                    stride,
+                    padding=0,
+                    dilation=dilation,
+                )
+            )
+        else:
+            self.conv2d = nn.Conv2d(
+                in_channels,
+                out_channels,
+                kernel_size,
+                stride,
+                padding=0,
+                dilation=dilation,
+            )
+    def forward(self, x):
+        x = self.pad(x)
+        x = self.conv2d(x)
+        if self.norm:
+            x = self.norm(x)
+        if self.activation:
+            x = self.activation(x)
+        return x
+class TransposeConv2dLayer(nn.Module):
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        kernel_size,
+        stride=1,
+        padding=0,
+        dilation=1,
+        pad_type="zero",
+        activation="lrelu",
+        norm="none",
+        sn=False,
+        scale_factor=2,
+    ):
+        super(TransposeConv2dLayer, self).__init__()
+        # Initialize the conv scheme
+        self.scale_factor = scale_factor
+        self.conv2d = Conv2dLayer(
+            in_channels,
+            out_channels,
+            kernel_size,
+            stride,
+            padding,
+            dilation,
+            pad_type,
+            activation,
+            norm,
+            sn,
+        )
+    def forward(self, x):
+        x = F.interpolate(
+            x,
+            scale_factor=self.scale_factor,
+            mode="nearest",
+            recompute_scale_factor=False,
+        )
+        x = self.conv2d(x)
+        return x
+# -----------------------------------------------
+#                Gated ConvBlock
+# -----------------------------------------------
+class GatedConv2d(nn.Module):
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        kernel_size,
+        stride=1,
+        padding=0,
+        dilation=1,
+        pad_type="reflect",
+        activation="elu",
+        norm="none",
+        sn=False,
+    ):
+        super(GatedConv2d, self).__init__()
+        # Initialize the padding scheme
+        if pad_type == "reflect":
+            self.pad = nn.ReflectionPad2d(padding)
+        elif pad_type == "replicate":
+            self.pad = nn.ReplicationPad2d(padding)
+        elif pad_type == "zero":
+            self.pad = nn.ZeroPad2d(padding)
+        else:
+            assert 0, "Unsupported padding type: {}".format(pad_type)
+        # Initialize the normalization type
+        if norm == "bn":
+            self.norm = nn.BatchNorm2d(out_channels)
+        elif norm == "in":
+            self.norm = nn.InstanceNorm2d(out_channels)
+        elif norm == "ln":
+            self.norm = LayerNorm(out_channels)
+        elif norm == "none":
+            self.norm = None
+        else:
+            assert 0, "Unsupported normalization: {}".format(norm)
+        # Initialize the activation funtion
+        if activation == "relu":
+            self.activation = nn.ReLU(inplace=True)
+        elif activation == "lrelu":
+            self.activation = nn.LeakyReLU(0.2, inplace=True)
+        elif activation == "elu":
+            self.activation = nn.ELU()
+        elif activation == "selu":
+            self.activation = nn.SELU(inplace=True)
+        elif activation == "tanh":
+            self.activation = nn.Tanh()
+        elif activation == "sigmoid":
+            self.activation = nn.Sigmoid()
+        elif activation == "none":
+            self.activation = None
+        else:
+            assert 0, "Unsupported activation: {}".format(activation)
+        # Initialize the convolution layers
+        if sn:
+            self.conv2d = SpectralNorm(
+                nn.Conv2d(
+                    in_channels,
+                    out_channels,
+                    kernel_size,
+                    stride,
+                    padding=0,
+                    dilation=dilation,
+                )
+            )
+            self.mask_conv2d = SpectralNorm(
+                nn.Conv2d(
+                    in_channels,
+                    out_channels,
+                    kernel_size,
+                    stride,
+                    padding=0,
+                    dilation=dilation,
+                )
+            )
+        else:
+            self.conv2d = nn.Conv2d(
+                in_channels,
+                out_channels,
+                kernel_size,
+                stride,
+                padding=0,
+                dilation=dilation,
+            )
+            self.mask_conv2d = nn.Conv2d(
+                in_channels,
+                out_channels,
+                kernel_size,
+                stride,
+                padding=0,
+                dilation=dilation,
+            )
+        self.sigmoid = torch.nn.Sigmoid()
+    def forward(self, x):
+        x = self.pad(x)
+        conv = self.conv2d(x)
+        mask = self.mask_conv2d(x)
+        gated_mask = self.sigmoid(mask)
+        if self.activation:
+            conv = self.activation(conv)
+        x = conv * gated_mask
+        return x
+class TransposeGatedConv2d(nn.Module):
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        kernel_size,
+        stride=1,
+        padding=0,
+        dilation=1,
+        pad_type="zero",
+        activation="lrelu",
+        norm="none",
+        sn=True,
+        scale_factor=2,
+    ):
+        super(TransposeGatedConv2d, self).__init__()
+        # Initialize the conv scheme
+        self.scale_factor = scale_factor
+        self.gated_conv2d = GatedConv2d(
+            in_channels,
+            out_channels,
+            kernel_size,
+            stride,
+            padding,
+            dilation,
+            pad_type,
+            activation,
+            norm,
+            sn,
+        )
+    def forward(self, x):
+        x = F.interpolate(
+            x,
+            scale_factor=self.scale_factor,
+            mode="nearest",
+            recompute_scale_factor=False,
+        )
+        x = self.gated_conv2d(x)
+        return x
+# ----------------------------------------
+#               Layer Norm
+# ----------------------------------------
+class LayerNorm(nn.Module):
+    def __init__(self, num_features, eps=1e-8, affine=True):
+        super(LayerNorm, self).__init__()
+        self.num_features = num_features
+        self.affine = affine
+        self.eps = eps
+        if self.affine:
+            self.gamma = Parameter(torch.Tensor(num_features).uniform_())
+            self.beta = Parameter(torch.zeros(num_features))
+    def forward(self, x):
+        # layer norm
+        shape = [-1] + [1] * (x.dim() - 1)  # for 4d input: [-1, 1, 1, 1]
+        if x.size(0) == 1:
+            # These two lines run much faster in pytorch 0.4 than the two lines listed below.
+            mean = x.view(-1).mean().view(*shape)
+            std = x.view(-1).std().view(*shape)
+        else:
+            mean = x.view(x.size(0), -1).mean(1).view(*shape)
+            std = x.view(x.size(0), -1).std(1).view(*shape)
+        x = (x - mean) / (std + self.eps)
+        # if it is learnable
+        if self.affine:
+            shape = [1, -1] + [1] * (
+                x.dim() - 2
+            )  # for 4d input: [1, -1, 1, 1]
+            x = x * self.gamma.view(*shape) + self.beta.view(*shape)
+        return x
+# -----------------------------------------------
+#                  SpectralNorm
+# -----------------------------------------------
+def l2normalize(v, eps=1e-12):
+    return v / (v.norm() + eps)
+class SpectralNorm(nn.Module):
+    def __init__(self, module, name="weight", power_iterations=1):
+        super(SpectralNorm, self).__init__()
+        self.module = module
+        self.name = name
+        self.power_iterations = power_iterations
+        if not self._made_params():
+            self._make_params()
+    def _update_u_v(self):
+        u = getattr(self.module, self.name + "_u")
+        v = getattr(self.module, self.name + "_v")
+        w = getattr(self.module, self.name + "_bar")
+        height = w.data.shape[0]
+        for _ in range(self.power_iterations):
+            v.data = l2normalize(
+                torch.mv(torch.t(w.view(height, -1).data), u.data)
+            )
+            u.data = l2normalize(torch.mv(w.view(height, -1).data, v.data))
+        # sigma = torch.dot(u.data, torch.mv(w.view(height,-1).data, v.data))
+        sigma = u.dot(w.view(height, -1).mv(v))
+        setattr(self.module, self.name, w / sigma.expand_as(w))
+    def _made_params(self):
+        try:
+            u = getattr(self.module, self.name + "_u")
+            v = getattr(self.module, self.name + "_v")
+            w = getattr(self.module, self.name + "_bar")
+            return True
+        except AttributeError:
+            return False
+    def _make_params(self):
+        w = getattr(self.module, self.name)
+        height = w.data.shape[0]
+        width = w.view(height, -1).data.shape[1]
+        u = Parameter(w.data.new(height).normal_(0, 1), requires_grad=False)
+        v = Parameter(w.data.new(width).normal_(0, 1), requires_grad=False)
+        u.data = l2normalize(u.data)
+        v.data = l2normalize(v.data)
+        w_bar = Parameter(w.data)
+        del self.module._parameters[self.name]
+        self.module.register_parameter(self.name + "_u", u)
+        self.module.register_parameter(self.name + "_v", v)
+        self.module.register_parameter(self.name + "_bar", w_bar)
+    def forward(self, *args):
+        self._update_u_v()
+        return self.module.forward(*args)
+class ContextualAttention(nn.Module):
+    def __init__(
+        self,
+        ksize=3,
+        stride=1,
+        rate=1,
+        fuse_k=3,
+        softmax_scale=10,
+        fuse=True,
+        use_cuda=True,
+        device_ids=None,
+    ):
+        super(ContextualAttention, self).__init__()
+        self.ksize = ksize
+        self.stride = stride
+        self.rate = rate
+        self.fuse_k = fuse_k
+        self.softmax_scale = softmax_scale
+        self.fuse = fuse
+        self.use_cuda = use_cuda
+        self.device_ids = device_ids
+    def forward(self, f, b, mask=None):
+        """Contextual attention layer implementation.
+            Contextual attention is first introduced in publication:
+            Generative Image Inpainting with Contextual Attention, Yu et al.
+        Args:
+            f: Input feature to match (foreground).
+            b: Input feature for match (background).
+            mask: Input mask for b, indicating patches not available.
+            ksize: Kernel size for contextual attention.
+            stride: Stride for extracting patches from b.
+            rate: Dilation for matching.
+            softmax_scale: Scaled softmax for attention.
+        Returns:
+            torch.tensor: output
+        """
+        # get shapes
+        raw_int_fs = list(f.size())  # b*c*h*w
+        raw_int_bs = list(b.size())  # b*c*h*w
+        # extract patches from background with stride and rate
+        kernel = 2 * self.rate
+        # raw_w is extracted for reconstruction
+        raw_w = extract_image_patches(
+            b,
+            ksizes=[kernel, kernel],
+            strides=[self.rate * self.stride, self.rate * self.stride],
+            rates=[1, 1],
+            padding="same",
+        )  # [N, C*k*k, L]
+        # raw_shape: [N, C, k, k, L] [4, 192, 4, 4, 1024]
+        raw_w = raw_w.view(raw_int_bs[0], raw_int_bs[1], kernel, kernel, -1)
+        raw_w = raw_w.permute(0, 4, 1, 2, 3)  # raw_shape: [N, L, C, k, k]
+        raw_w_groups = torch.split(raw_w, 1, dim=0)
+        # downscaling foreground option: downscaling both foreground and
+        # background for matching and use original background for reconstruction.
+        f = F.interpolate(
+            f,
+            scale_factor=1.0 / self.rate,
+            mode="nearest",
+            recompute_scale_factor=False,
+        )
+        b = F.interpolate(
+            b,
+            scale_factor=1.0 / self.rate,
+            mode="nearest",
+            recompute_scale_factor=False,
+        )
+        int_fs = list(f.size())  # b*c*h*w
+        int_bs = list(b.size())
+        f_groups = torch.split(
+            f, 1, dim=0
+        )  # split tensors along the batch dimension
+        # w shape: [N, C*k*k, L]
+        w = extract_image_patches(
+            b,
+            ksizes=[self.ksize, self.ksize],
+            strides=[self.stride, self.stride],
+            rates=[1, 1],
+            padding="same",
+        )
+        # w shape: [N, C, k, k, L]
+        w = w.view(int_bs[0], int_bs[1], self.ksize, self.ksize, -1)
+        w = w.permute(0, 4, 1, 2, 3)  # w shape: [N, L, C, k, k]
+        w_groups = torch.split(w, 1, dim=0)
+        # process mask
+        mask = F.interpolate(
+            mask,
+            scale_factor=1.0 / self.rate,
+            mode="nearest",
+            recompute_scale_factor=False,
+        )
+        int_ms = list(mask.size())
+        # m shape: [N, C*k*k, L]
+        m = extract_image_patches(
+            mask,
+            ksizes=[self.ksize, self.ksize],
+            strides=[self.stride, self.stride],
+            rates=[1, 1],
+            padding="same",
+        )
+        # m shape: [N, C, k, k, L]
+        m = m.view(int_ms[0], int_ms[1], self.ksize, self.ksize, -1)
+        m = m.permute(0, 4, 1, 2, 3)  # m shape: [N, L, C, k, k]
+        m = m[0]  # m shape: [L, C, k, k]
+        # mm shape: [L, 1, 1, 1]
+        mm = (reduce_mean(m, axis=[1, 2, 3], keepdim=True) == 0.0).to(
+            torch.float32
+        )
+        mm = mm.permute(1, 0, 2, 3)  # mm shape: [1, L, 1, 1]
+        y = []
+        offsets = []
+        k = self.fuse_k
+        scale = (
+            self.softmax_scale
+        )  # to fit the PyTorch tensor image value range
+        fuse_weight = torch.eye(k).view(1, 1, k, k)  # 1*1*k*k
+        if self.use_cuda:
+            fuse_weight = fuse_weight.cuda()
+        for xi, wi, raw_wi in zip(f_groups, w_groups, raw_w_groups):
+            """
+            O => output channel as a conv filter
+            I => input channel as a conv filter
+            xi : separated tensor along batch dimension of front; (B=1, C=128, H=32, W=32)
+            wi : separated patch tensor along batch dimension of back; (B=1, O=32*32, I=128, KH=3, KW=3)
+            raw_wi : separated tensor along batch dimension of back; (B=1, I=32*32, O=128, KH=4, KW=4)
+            """
+            # conv for compare
+            escape_NaN = torch.FloatTensor([1e-4])
+            if self.use_cuda:
+                escape_NaN = escape_NaN.cuda()
+            wi = wi[0]  # [L, C, k, k]
+            max_wi = torch.sqrt(
+                reduce_sum(
+                    torch.pow(wi, 2) + escape_NaN, axis=[1, 2, 3], keepdim=True
+                )
+            )
+            wi_normed = wi / max_wi
+            # xi shape: [1, C, H, W], yi shape: [1, L, H, W]
+            xi = same_padding(
+                xi, [self.ksize, self.ksize], [1, 1], [1, 1]
+            )  # xi: 1*c*H*W
+            yi = F.conv2d(xi, wi_normed, stride=1)  # [1, L, H, W]
+            # conv implementation for fuse scores to encourage large patches
+            if self.fuse:
+                # make all of depth to spatial resolution
+                yi = yi.view(
+                    1, 1, int_bs[2] * int_bs[3], int_fs[2] * int_fs[3]
+                )  # (B=1, I=1, H=32*32, W=32*32)
+                yi = same_padding(yi, [k, k], [1, 1], [1, 1])
+                yi = F.conv2d(
+                    yi, fuse_weight, stride=1
+                )  # (B=1, C=1, H=32*32, W=32*32)
+                yi = yi.contiguous().view(
+                    1, int_bs[2], int_bs[3], int_fs[2], int_fs[3]
+                )  # (B=1, 32, 32, 32, 32)
+                yi = yi.permute(0, 2, 1, 4, 3)
+                yi = yi.contiguous().view(
+                    1, 1, int_bs[2] * int_bs[3], int_fs[2] * int_fs[3]
+                )
+                yi = same_padding(yi, [k, k], [1, 1], [1, 1])
+                yi = F.conv2d(yi, fuse_weight, stride=1)
+                yi = yi.contiguous().view(
+                    1, int_bs[3], int_bs[2], int_fs[3], int_fs[2]
+                )
+                yi = yi.permute(0, 2, 1, 4, 3).contiguous()
+            yi = yi.view(
+                1, int_bs[2] * int_bs[3], int_fs[2], int_fs[3]
+            )  # (B=1, C=32*32, H=32, W=32)
+            # softmax to match
+            yi = yi * mm
+            yi = F.softmax(yi * scale, dim=1)
+            yi = yi * mm  # [1, L, H, W]
+            offset = torch.argmax(yi, dim=1, keepdim=True)  # 1*1*H*W
+            if int_bs != int_fs:
+                # Normalize the offset value to match foreground dimension
+                times = float(int_fs[2] * int_fs[3]) / float(
+                    int_bs[2] * int_bs[3]
+                )
+                offset = ((offset + 1).float() * times - 1).to(torch.int64)
+            offset = torch.cat(
+                [offset // int_fs[3], offset % int_fs[3]], dim=1
+            )  # 1*2*H*W
+            # deconv for patch pasting
+            wi_center = raw_wi[0]
+            # yi = F.pad(yi, [0, 1, 0, 1])    # here may need conv_transpose same padding
+            yi = (
+                F.conv_transpose2d(yi, wi_center, stride=self.rate, padding=1)
+                / 4.0
+            )  # (B=1, C=128, H=64, W=64)
+            y.append(yi)
+            offsets.append(offset)
+        y = torch.cat(y, dim=0)  # back to the mini-batch
+        y.contiguous().view(raw_int_fs)
+        return y

deepfillv2/network_utils.py ADDED Viewed

	@@ -0,0 +1,79 @@

+# for contextual attention
+import torch
+def extract_image_patches(images, ksizes, strides, rates, padding="same"):
+    """
+    Extract patches from images and put them in the C output dimension.
+    :param padding:
+    :param images: [batch, channels, in_rows, in_cols]. A 4-D Tensor with shape
+    :param ksizes: [ksize_rows, ksize_cols]. The size of the sliding window for
+     each dimension of images
+    :param strides: [stride_rows, stride_cols]
+    :param rates: [dilation_rows, dilation_cols]
+    :return: A Tensor
+    """
+    assert len(images.size()) == 4
+    assert padding in ["same", "valid"]
+    batch_size, channel, height, width = images.size()
+    if padding == "same":
+        images = same_padding(images, ksizes, strides, rates)
+    elif padding == "valid":
+        pass
+    else:
+        raise NotImplementedError(
+            'Unsupported padding type: {}.\
+                Only "same" or "valid" are supported.'.format(
+                padding
+            )
+        )
+    unfold = torch.nn.Unfold(
+        kernel_size=ksizes, dilation=rates, padding=0, stride=strides
+    )
+    patches = unfold(images)
+    return patches  # [N, C*k*k, L], L is the total number of such blocks
+def same_padding(images, ksizes, strides, rates):
+    assert len(images.size()) == 4
+    batch_size, channel, rows, cols = images.size()
+    out_rows = (rows + strides[0] - 1) // strides[0]
+    out_cols = (cols + strides[1] - 1) // strides[1]
+    effective_k_row = (ksizes[0] - 1) * rates[0] + 1
+    effective_k_col = (ksizes[1] - 1) * rates[1] + 1
+    padding_rows = max(0, (out_rows - 1) * strides[0] + effective_k_row - rows)
+    padding_cols = max(0, (out_cols - 1) * strides[1] + effective_k_col - cols)
+    # Pad the input
+    padding_top = int(padding_rows / 2.0)
+    padding_left = int(padding_cols / 2.0)
+    padding_bottom = padding_rows - padding_top
+    padding_right = padding_cols - padding_left
+    paddings = (padding_left, padding_right, padding_top, padding_bottom)
+    images = torch.nn.ZeroPad2d(paddings)(images)
+    return images
+def reduce_mean(x, axis=None, keepdim=False):
+    if not axis:
+        axis = range(len(x.shape))
+    for i in sorted(axis, reverse=True):
+        x = torch.mean(x, dim=i, keepdim=keepdim)
+    return x
+def reduce_std(x, axis=None, keepdim=False):
+    if not axis:
+        axis = range(len(x.shape))
+    for i in sorted(axis, reverse=True):
+        x = torch.std(x, dim=i, keepdim=keepdim)
+    return x
+def reduce_sum(x, axis=None, keepdim=False):
+    if not axis:
+        axis = range(len(x.shape))
+    for i in sorted(axis, reverse=True):
+        x = torch.sum(x, dim=i, keepdim=keepdim)
+    return x

deepfillv2/test_dataset.py ADDED Viewed

	@@ -0,0 +1,47 @@

+import cv2
+import numpy as np
+import torch
+from torch.utils.data import Dataset
+from config import *
+class InpaintDataset(Dataset):
+    def __init__(self):
+        self.imglist = [INIMAGE]
+        self.masklist = [MASKIMAGE]
+        self.setsize = RESIZE_TO
+    def __len__(self):
+        return len(self.imglist)
+    def __getitem__(self, index):
+        # image
+        img = cv2.imread(self.imglist[index])
+        mask = cv2.imread(self.masklist[index])[:, :, 0]
+        ## COMMENTING FOR NOW
+        # h, w = mask.shape
+        # # img = cv2.resize(img, (w, h))
+        img = cv2.resize(img, self.setsize)
+        mask = cv2.resize(mask, self.setsize)
+        ##
+        # find the Minimum bounding rectangle in the mask
+        """
+        contours, hier = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+        for cidx, cnt in enumerate(contours):
+            (x, y, w, h) = cv2.boundingRect(cnt)
+            mask[y:y+h, x:x+w] = 255
+        """
+        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+        img = (
+            torch.from_numpy(img.astype(np.float32) / 255.0)
+            .permute(2, 0, 1)
+            .contiguous()
+        )
+        mask = (
+            torch.from_numpy(mask.astype(np.float32) / 255.0)
+            .unsqueeze(0)
+            .contiguous()
+        )
+        return img, mask

deepfillv2/utils.py ADDED Viewed

	@@ -0,0 +1,145 @@

+import os
+import numpy as np
+import cv2
+import torch
+from deepfillv2 import network
+import skimage
+from config import GPU_DEVICE
+# ----------------------------------------
+#                 Network
+# ----------------------------------------
+def create_generator(opt):
+    # Initialize the networks
+    generator = network.GatedGenerator(opt)
+    print("-- Generator is created! --")
+    network.weights_init(
+        generator, init_type=opt.init_type, init_gain=opt.init_gain
+    )
+    print("-- Initialized generator with %s type --" % opt.init_type)
+    return generator
+def create_discriminator(opt):
+    # Initialize the networks
+    discriminator = network.PatchDiscriminator(opt)
+    print("-- Discriminator is created! --")
+    network.weights_init(
+        discriminator, init_type=opt.init_type, init_gain=opt.init_gain
+    )
+    print("-- Initialize discriminator with %s type --" % opt.init_type)
+    return discriminator
+def create_perceptualnet():
+    # Get the first 15 layers of vgg16, which is conv3_3
+    perceptualnet = network.PerceptualNet()
+    print("-- Perceptual network is created! --")
+    return perceptualnet
+# ----------------------------------------
+#             PATH processing
+# ----------------------------------------
+def text_readlines(filename):
+    # Try to read a txt file and return a list.Return [] if there was a mistake.
+    try:
+        file = open(filename, "r")
+    except IOError:
+        error = []
+        return error
+    content = file.readlines()
+    # This for loop deletes the EOF (like \n)
+    for i in range(len(content)):
+        content[i] = content[i][: len(content[i]) - 1]
+    file.close()
+    return content
+def savetxt(name, loss_log):
+    np_loss_log = np.array(loss_log)
+    np.savetxt(name, np_loss_log)
+def get_files(path, mask=False):
+    # read a folder, return the complete path
+    ret = []
+    for root, dirs, files in os.walk(path):
+        for filespath in files:
+            if filespath != ".DS_Store":
+                continue
+            ret.append(os.path.join(root, filespath))
+    return ret
+def get_names(path):
+    # read a folder, return the image name
+    ret = []
+    for root, dirs, files in os.walk(path):
+        for filespath in files:
+            ret.append(filespath)
+    return ret
+def text_save(content, filename, mode="a"):
+    # save a list to a txt
+    # Try to save a list variable in txt file.
+    file = open(filename, mode)
+    for i in range(len(content)):
+        file.write(str(content[i]) + "\n")
+    file.close()
+def check_path(path):
+    if not os.path.exists(path):
+        os.makedirs(path)
+# ----------------------------------------
+#    Validation and Sample at training
+# ----------------------------------------
+def save_sample_png(
+    sample_folder, sample_name, img_list, name_list, pixel_max_cnt=255
+):
+    # Save image one-by-one
+    for i in range(len(img_list)):
+        img = img_list[i]
+        # Recover normalization: * 255 because last layer is sigmoid activated
+        img = img * 255
+        # Process img_copy and do not destroy the data of img
+        img_copy = (
+            img.clone().data.permute(0, 2, 3, 1)[0, :, :, :].to("cpu").numpy()
+        )
+        img_copy = np.clip(img_copy, 0, pixel_max_cnt)
+        img_copy = img_copy.astype(np.uint8)
+        img_copy = cv2.cvtColor(img_copy, cv2.COLOR_RGB2BGR)
+        # Save to certain path
+        save_img_path = os.path.join(sample_folder, sample_name)
+        cv2.imwrite(save_img_path, img_copy)
+def psnr(pred, target, pixel_max_cnt=255):
+    mse = torch.mul(target - pred, target - pred)
+    rmse_avg = (torch.mean(mse).item()) ** 0.5
+    p = 20 * np.log10(pixel_max_cnt / rmse_avg)
+    return p
+def grey_psnr(pred, target, pixel_max_cnt=255):
+    pred = torch.sum(pred, dim=0)
+    target = torch.sum(target, dim=0)
+    mse = torch.mul(target - pred, target - pred)
+    rmse_avg = (torch.mean(mse).item()) ** 0.5
+    p = 20 * np.log10(pixel_max_cnt * 3 / rmse_avg)
+    return p
+def ssim(pred, target):
+    pred = pred.clone().data.permute(0, 2, 3, 1).to(GPU_DEVICE).numpy()
+    target = target.clone().data.permute(0, 2, 3, 1).to(GPU_DEVICE).numpy()
+    target = target[0]
+    pred = pred[0]
+    ssim = skimage.measure.compare_ssim(target, pred, multichannel=True)
+    return ssim