File size: 3,904 Bytes
82ea528
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import numpy as np
import torch
from PIL import Image
from .parsing_api import onnx_inference
from ..libs.utils import install_package

class HumanParsing:
    def __init__(self, model_path):
        self.model_path = model_path
        self.session = None

    def __call__(self, input_image, mask_components):
        if self.session is None:
            install_package('onnxruntime')
            import onnxruntime as ort

            session_options = ort.SessionOptions()
            session_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
            session_options.execution_mode = ort.ExecutionMode.ORT_SEQUENTIAL
            # session_options.add_session_config_entry('gpu_id', str(gpu_id))
            self.session = ort.InferenceSession(self.model_path, sess_options=session_options,
                                                providers=['CPUExecutionProvider'])

        parsed_image, mask = onnx_inference(self.session, input_image, mask_components)
        return parsed_image, mask


class HumanParts:

    def __init__(self, model_path):
        self.model_path = model_path
        self.session = None
        # self.classes_dict = {
        #     "background": 0,
        #     "hair": 2,
        #     "glasses": 4,
        #     "top-clothes": 5,
        #     "bottom-clothes": 9,
        #     "torso-skin": 10,
        #     "face": 13,
        #     "left-arm": 14,
        #     "right-arm": 15,
        #     "left-leg": 16,
        #     "right-leg": 17,
        #     "left-foot": 18,
        #     "right-foot": 19,
        # },
        self.classes = [0, 13, 2, 4, 5, 9, 10, 14, 15, 16, 17, 18, 19]


    def __call__(self, input_image, mask_components):
        if self.session is None:
            install_package('onnxruntime')
            import onnxruntime as ort

            self.session = ort.InferenceSession(self.model_path, providers=['TensorrtExecutionProvider', 'CUDAExecutionProvider', 'CPUExecutionProvider'])

        mask, = self.get_mask(self.session, input_image, 0, mask_components)
        return mask

    def get_mask(self, model, image, rotation, mask_components):
        image = image.squeeze(0)
        image_np = image.numpy() * 255

        pil_image = Image.fromarray(image_np.astype(np.uint8))
        original_size = pil_image.size  # to resize the mask later
        # resize to 512x512 as the model expects
        pil_image = pil_image.resize((512, 512))
        center = (256, 256)

        if rotation != 0:
            pil_image = pil_image.rotate(rotation, center=center)

        # normalize the image
        image_np = np.array(pil_image).astype(np.float32) / 127.5 - 1
        image_np = np.expand_dims(image_np, axis=0)

        # use the onnx model to get the mask
        input_name = model.get_inputs()[0].name
        output_name = model.get_outputs()[0].name
        result = model.run([output_name], {input_name: image_np})
        result = np.array(result[0]).argmax(axis=3).squeeze(0)

        score: int = 0

        mask = np.zeros_like(result)
        for class_index in mask_components:
            detected = result == self.classes[class_index]
            mask[detected] = 255
            score += mask.sum()

        # back to the original size
        mask_image = Image.fromarray(mask.astype(np.uint8), mode="L")
        if rotation != 0:
            mask_image = mask_image.rotate(-rotation, center=center)

        mask_image = mask_image.resize(original_size)

        # and back to numpy...
        mask = np.array(mask_image).astype(np.float32) / 255

        # add 2 dimensions to match the expected output
        mask = np.expand_dims(mask, axis=0)
        mask = np.expand_dims(mask, axis=0)
        # ensure to return a "binary mask_image"

        del image_np, result  # free up memory, maybe not necessary

        return (torch.from_numpy(mask.astype(np.uint8)),)