File size: 4,121 Bytes
4d0b7ae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
"""
    reference: https://github.com/xuebinqin/DIS
"""

import PIL.Image
import numpy as np
import torch
import torch.nn.functional as F
from PIL import Image
from torch import nn
from torch.autograd import Variable
from torchvision import transforms
from torchvision.transforms.functional import normalize

from .models import ISNetDIS

# Helpers
device = 'cuda' if torch.cuda.is_available() else 'cpu'


class GOSNormalize(object):
    """
    Normalize the Image using torch.transforms
    """

    def __init__(self, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]):
        self.mean = mean
        self.std = std

    def __call__(self, image):
        image = normalize(image, self.mean, self.std)
        return image


def im_preprocess(im, size):
    if len(im.shape) < 3:
        im = im[:, :, np.newaxis]
    if im.shape[2] == 1:
        im = np.repeat(im, 3, axis=2)
    im_tensor = torch.tensor(im.copy(), dtype=torch.float32)
    im_tensor = torch.transpose(torch.transpose(im_tensor, 1, 2), 0, 1)
    if len(size) < 2:
        return im_tensor, im.shape[0:2]
    else:
        im_tensor = torch.unsqueeze(im_tensor, 0)
        im_tensor = F.upsample(im_tensor, size, mode="bilinear")
        im_tensor = torch.squeeze(im_tensor, 0)

    return im_tensor.type(torch.uint8), im.shape[0:2]


class IsNetPipeLine:
    def __init__(self, model_path=None, model_digit="full"):
        self.model_digit = model_digit
        self.model = ISNetDIS()
        self.cache_size = [1024, 1024]
        self.transform = transforms.Compose([
            GOSNormalize([0.5, 0.5, 0.5], [1.0, 1.0, 1.0])
        ])

        # Build Model
        self.build_model(model_path)

    def load_image(self, image: PIL.Image.Image):
        im = np.array(image.convert("RGB"))
        im, im_shp = im_preprocess(im, self.cache_size)
        im = torch.divide(im, 255.0)
        shape = torch.from_numpy(np.array(im_shp))
        return self.transform(im).unsqueeze(0), shape.unsqueeze(0)  # make a batch of image, shape

    def build_model(self, model_path=None):
        if model_path is not None:
            self.model.load_state_dict(torch.load(model_path, map_location=device))

        # convert to half precision
        if self.model_digit == "half":
            self.model.half()
            for layer in self.model.modules():
                if isinstance(layer, nn.BatchNorm2d):
                    layer.float()
        self.model.to(device)
        self.model.eval()

    def __call__(self, image: PIL.Image.Image):
        image_tensor, orig_size = self.load_image(image)
        mask = self.predict(image_tensor, orig_size)

        pil_mask = Image.fromarray(mask).convert('L')
        im_rgb = image.convert("RGB")

        im_rgba = im_rgb.copy()
        im_rgba.putalpha(pil_mask)

        return [im_rgba, pil_mask]

    def predict(self, inputs_val: torch.Tensor, shapes_val):
        """
        Given an Image, predict the mask
        """

        if self.model_digit == "full":
            inputs_val = inputs_val.type(torch.FloatTensor)
        else:
            inputs_val = inputs_val.type(torch.HalfTensor)

        inputs_val_v = Variable(inputs_val, requires_grad=False).to(device)  # wrap inputs in Variable

        ds_val = self.model(inputs_val_v)[0]  # list of 6 results

        # B x 1 x H x W    # we want the first one which is the most accurate prediction
        pred_val = ds_val[0][0, :, :, :]

        # recover the prediction spatial size to the orignal image size
        pred_val = torch.squeeze(
            F.upsample(torch.unsqueeze(pred_val, 0), (shapes_val[0][0], shapes_val[0][1]), mode='bilinear'))

        ma = torch.max(pred_val)
        mi = torch.min(pred_val)
        pred_val = (pred_val - mi) / (ma - mi)  # max = 1

        if device == 'cuda':
            torch.cuda.empty_cache()
        return (pred_val.detach().cpu().numpy() * 255).astype(np.uint8)  # it is the mask we need


# a = IsNetPipeLine(model_path="save_models/isnet.pth")
# input_image = Image.open("image_0mx.png")
# rgb, mask = a(input_image)
#
# rgb.save("rgb.png")
# mask.save("mask.png")