File size: 6,041 Bytes
938e515
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
# Copyright (c) Facebook, Inc. and its affiliates.

from typing import Any
import torch
from torch.nn import functional as F

from detectron2.structures import BitMasks, Boxes, BoxMode

from .base import IntTupleBox, make_int_box
from .to_mask import ImageSizeType


def resample_coarse_segm_tensor_to_bbox(coarse_segm: torch.Tensor, box_xywh_abs: IntTupleBox):
    """
    Resample coarse segmentation tensor to the given
    bounding box and derive labels for each pixel of the bounding box

    Args:
        coarse_segm: float tensor of shape [1, K, Hout, Wout]
        box_xywh_abs (tuple of 4 int): bounding box given by its upper-left
            corner coordinates, width (W) and height (H)
    Return:
        Labels for each pixel of the bounding box, a long tensor of size [1, H, W]
    """
    x, y, w, h = box_xywh_abs
    w = max(int(w), 1)
    h = max(int(h), 1)
    labels = F.interpolate(coarse_segm, (h, w), mode="bilinear", align_corners=False).argmax(dim=1)
    return labels


def resample_fine_and_coarse_segm_tensors_to_bbox(
    fine_segm: torch.Tensor, coarse_segm: torch.Tensor, box_xywh_abs: IntTupleBox
):
    """
    Resample fine and coarse segmentation tensors to the given
    bounding box and derive labels for each pixel of the bounding box

    Args:
        fine_segm: float tensor of shape [1, C, Hout, Wout]
        coarse_segm: float tensor of shape [1, K, Hout, Wout]
        box_xywh_abs (tuple of 4 int): bounding box given by its upper-left
            corner coordinates, width (W) and height (H)
    Return:
        Labels for each pixel of the bounding box, a long tensor of size [1, H, W]
    """
    x, y, w, h = box_xywh_abs
    w = max(int(w), 1)
    h = max(int(h), 1)
    # coarse segmentation
    coarse_segm_bbox = F.interpolate(
        coarse_segm,
        (h, w),
        mode="bilinear",
        align_corners=False,
    ).argmax(dim=1)
    # combined coarse and fine segmentation
    labels = (
        F.interpolate(fine_segm, (h, w), mode="bilinear", align_corners=False).argmax(dim=1)
        * (coarse_segm_bbox > 0).long()
    )
    return labels


def resample_fine_and_coarse_segm_to_bbox(predictor_output: Any, box_xywh_abs: IntTupleBox):
    """
    Resample fine and coarse segmentation outputs from a predictor to the given
    bounding box and derive labels for each pixel of the bounding box

    Args:
        predictor_output: DensePose predictor output that contains segmentation
            results to be resampled
        box_xywh_abs (tuple of 4 int): bounding box given by its upper-left
            corner coordinates, width (W) and height (H)
    Return:
        Labels for each pixel of the bounding box, a long tensor of size [1, H, W]
    """
    return resample_fine_and_coarse_segm_tensors_to_bbox(
        predictor_output.fine_segm,
        predictor_output.coarse_segm,
        box_xywh_abs,
    )


def predictor_output_with_coarse_segm_to_mask(
    predictor_output: Any, boxes: Boxes, image_size_hw: ImageSizeType
) -> BitMasks:
    """
    Convert predictor output with coarse and fine segmentation to a mask.
    Assumes that predictor output has the following attributes:
     - coarse_segm (tensor of size [N, D, H, W]): coarse segmentation
         unnormalized scores for N instances; D is the number of coarse
         segmentation labels, H and W is the resolution of the estimate

    Args:
        predictor_output: DensePose predictor output to be converted to mask
        boxes (Boxes): bounding boxes that correspond to the DensePose
            predictor outputs
        image_size_hw (tuple [int, int]): image height Himg and width Wimg
    Return:
        BitMasks that contain a bool tensor of size [N, Himg, Wimg] with
        a mask of the size of the image for each instance
    """
    H, W = image_size_hw
    boxes_xyxy_abs = boxes.tensor.clone()
    boxes_xywh_abs = BoxMode.convert(boxes_xyxy_abs, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
    N = len(boxes_xywh_abs)
    masks = torch.zeros((N, H, W), dtype=torch.bool, device=boxes.tensor.device)
    for i in range(len(boxes_xywh_abs)):
        box_xywh = make_int_box(boxes_xywh_abs[i])
        box_mask = resample_coarse_segm_tensor_to_bbox(predictor_output[i].coarse_segm, box_xywh)
        x, y, w, h = box_xywh
        masks[i, y : y + h, x : x + w] = box_mask

    return BitMasks(masks)


def predictor_output_with_fine_and_coarse_segm_to_mask(
    predictor_output: Any, boxes: Boxes, image_size_hw: ImageSizeType
) -> BitMasks:
    """
    Convert predictor output with coarse and fine segmentation to a mask.
    Assumes that predictor output has the following attributes:
     - coarse_segm (tensor of size [N, D, H, W]): coarse segmentation
         unnormalized scores for N instances; D is the number of coarse
         segmentation labels, H and W is the resolution of the estimate
     - fine_segm (tensor of size [N, C, H, W]): fine segmentation
         unnormalized scores for N instances; C is the number of fine
         segmentation labels, H and W is the resolution of the estimate

    Args:
        predictor_output: DensePose predictor output to be converted to mask
        boxes (Boxes): bounding boxes that correspond to the DensePose
            predictor outputs
        image_size_hw (tuple [int, int]): image height Himg and width Wimg
    Return:
        BitMasks that contain a bool tensor of size [N, Himg, Wimg] with
        a mask of the size of the image for each instance
    """
    H, W = image_size_hw
    boxes_xyxy_abs = boxes.tensor.clone()
    boxes_xywh_abs = BoxMode.convert(boxes_xyxy_abs, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
    N = len(boxes_xywh_abs)
    masks = torch.zeros((N, H, W), dtype=torch.bool, device=boxes.tensor.device)
    for i in range(len(boxes_xywh_abs)):
        box_xywh = make_int_box(boxes_xywh_abs[i])
        labels_i = resample_fine_and_coarse_segm_to_bbox(predictor_output[i], box_xywh)
        x, y, w, h = box_xywh
        masks[i, y : y + h, x : x + w] = labels_i > 0
    return BitMasks(masks)