Spaces:
Paused
Paused
File size: 6,041 Bytes
938e515 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 |
# Copyright (c) Facebook, Inc. and its affiliates.
from typing import Any
import torch
from torch.nn import functional as F
from detectron2.structures import BitMasks, Boxes, BoxMode
from .base import IntTupleBox, make_int_box
from .to_mask import ImageSizeType
def resample_coarse_segm_tensor_to_bbox(coarse_segm: torch.Tensor, box_xywh_abs: IntTupleBox):
"""
Resample coarse segmentation tensor to the given
bounding box and derive labels for each pixel of the bounding box
Args:
coarse_segm: float tensor of shape [1, K, Hout, Wout]
box_xywh_abs (tuple of 4 int): bounding box given by its upper-left
corner coordinates, width (W) and height (H)
Return:
Labels for each pixel of the bounding box, a long tensor of size [1, H, W]
"""
x, y, w, h = box_xywh_abs
w = max(int(w), 1)
h = max(int(h), 1)
labels = F.interpolate(coarse_segm, (h, w), mode="bilinear", align_corners=False).argmax(dim=1)
return labels
def resample_fine_and_coarse_segm_tensors_to_bbox(
fine_segm: torch.Tensor, coarse_segm: torch.Tensor, box_xywh_abs: IntTupleBox
):
"""
Resample fine and coarse segmentation tensors to the given
bounding box and derive labels for each pixel of the bounding box
Args:
fine_segm: float tensor of shape [1, C, Hout, Wout]
coarse_segm: float tensor of shape [1, K, Hout, Wout]
box_xywh_abs (tuple of 4 int): bounding box given by its upper-left
corner coordinates, width (W) and height (H)
Return:
Labels for each pixel of the bounding box, a long tensor of size [1, H, W]
"""
x, y, w, h = box_xywh_abs
w = max(int(w), 1)
h = max(int(h), 1)
# coarse segmentation
coarse_segm_bbox = F.interpolate(
coarse_segm,
(h, w),
mode="bilinear",
align_corners=False,
).argmax(dim=1)
# combined coarse and fine segmentation
labels = (
F.interpolate(fine_segm, (h, w), mode="bilinear", align_corners=False).argmax(dim=1)
* (coarse_segm_bbox > 0).long()
)
return labels
def resample_fine_and_coarse_segm_to_bbox(predictor_output: Any, box_xywh_abs: IntTupleBox):
"""
Resample fine and coarse segmentation outputs from a predictor to the given
bounding box and derive labels for each pixel of the bounding box
Args:
predictor_output: DensePose predictor output that contains segmentation
results to be resampled
box_xywh_abs (tuple of 4 int): bounding box given by its upper-left
corner coordinates, width (W) and height (H)
Return:
Labels for each pixel of the bounding box, a long tensor of size [1, H, W]
"""
return resample_fine_and_coarse_segm_tensors_to_bbox(
predictor_output.fine_segm,
predictor_output.coarse_segm,
box_xywh_abs,
)
def predictor_output_with_coarse_segm_to_mask(
predictor_output: Any, boxes: Boxes, image_size_hw: ImageSizeType
) -> BitMasks:
"""
Convert predictor output with coarse and fine segmentation to a mask.
Assumes that predictor output has the following attributes:
- coarse_segm (tensor of size [N, D, H, W]): coarse segmentation
unnormalized scores for N instances; D is the number of coarse
segmentation labels, H and W is the resolution of the estimate
Args:
predictor_output: DensePose predictor output to be converted to mask
boxes (Boxes): bounding boxes that correspond to the DensePose
predictor outputs
image_size_hw (tuple [int, int]): image height Himg and width Wimg
Return:
BitMasks that contain a bool tensor of size [N, Himg, Wimg] with
a mask of the size of the image for each instance
"""
H, W = image_size_hw
boxes_xyxy_abs = boxes.tensor.clone()
boxes_xywh_abs = BoxMode.convert(boxes_xyxy_abs, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
N = len(boxes_xywh_abs)
masks = torch.zeros((N, H, W), dtype=torch.bool, device=boxes.tensor.device)
for i in range(len(boxes_xywh_abs)):
box_xywh = make_int_box(boxes_xywh_abs[i])
box_mask = resample_coarse_segm_tensor_to_bbox(predictor_output[i].coarse_segm, box_xywh)
x, y, w, h = box_xywh
masks[i, y : y + h, x : x + w] = box_mask
return BitMasks(masks)
def predictor_output_with_fine_and_coarse_segm_to_mask(
predictor_output: Any, boxes: Boxes, image_size_hw: ImageSizeType
) -> BitMasks:
"""
Convert predictor output with coarse and fine segmentation to a mask.
Assumes that predictor output has the following attributes:
- coarse_segm (tensor of size [N, D, H, W]): coarse segmentation
unnormalized scores for N instances; D is the number of coarse
segmentation labels, H and W is the resolution of the estimate
- fine_segm (tensor of size [N, C, H, W]): fine segmentation
unnormalized scores for N instances; C is the number of fine
segmentation labels, H and W is the resolution of the estimate
Args:
predictor_output: DensePose predictor output to be converted to mask
boxes (Boxes): bounding boxes that correspond to the DensePose
predictor outputs
image_size_hw (tuple [int, int]): image height Himg and width Wimg
Return:
BitMasks that contain a bool tensor of size [N, Himg, Wimg] with
a mask of the size of the image for each instance
"""
H, W = image_size_hw
boxes_xyxy_abs = boxes.tensor.clone()
boxes_xywh_abs = BoxMode.convert(boxes_xyxy_abs, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
N = len(boxes_xywh_abs)
masks = torch.zeros((N, H, W), dtype=torch.bool, device=boxes.tensor.device)
for i in range(len(boxes_xywh_abs)):
box_xywh = make_int_box(boxes_xywh_abs[i])
labels_i = resample_fine_and_coarse_segm_to_bbox(predictor_output[i], box_xywh)
x, y, w, h = box_xywh
masks[i, y : y + h, x : x + w] = labels_i > 0
return BitMasks(masks)
|