Spaces:
Runtime error
Runtime error
File size: 7,726 Bytes
cc0dd3c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 |
# Copyright (c) OpenMMLab. All rights reserved.
from typing import Optional, Tuple
import cv2
import numpy as np
from mmpose.registry import KEYPOINT_CODECS
from .base import BaseKeypointCodec
from .utils import (generate_offset_heatmap, generate_udp_gaussian_heatmaps,
get_heatmap_maximum, refine_keypoints_dark_udp)
@KEYPOINT_CODECS.register_module()
class UDPHeatmap(BaseKeypointCodec):
r"""Generate keypoint heatmaps by Unbiased Data Processing (UDP).
See the paper: `The Devil is in the Details: Delving into Unbiased Data
Processing for Human Pose Estimation`_ by Huang et al (2020) for details.
Note:
- instance number: N
- keypoint number: K
- keypoint dimension: D
- image size: [w, h]
- heatmap size: [W, H]
Encoded:
- heatmap (np.ndarray): The generated heatmap in shape (C_out, H, W)
where [W, H] is the `heatmap_size`, and the C_out is the output
channel number which depends on the `heatmap_type`. If
`heatmap_type=='gaussian'`, C_out equals to keypoint number K;
if `heatmap_type=='combined'`, C_out equals to K*3
(x_offset, y_offset and class label)
- keypoint_weights (np.ndarray): The target weights in shape (K,)
Args:
input_size (tuple): Image size in [w, h]
heatmap_size (tuple): Heatmap size in [W, H]
heatmap_type (str): The heatmap type to encode the keypoitns. Options
are:
- ``'gaussian'``: Gaussian heatmap
- ``'combined'``: Combination of a binary label map and offset
maps for X and Y axes.
sigma (float): The sigma value of the Gaussian heatmap when
``heatmap_type=='gaussian'``. Defaults to 2.0
radius_factor (float): The radius factor of the binary label
map when ``heatmap_type=='combined'``. The positive region is
defined as the neighbor of the keypoit with the radius
:math:`r=radius_factor*max(W, H)`. Defaults to 0.0546875
blur_kernel_size (int): The Gaussian blur kernel size of the heatmap
modulation in DarkPose. Defaults to 11
.. _`The Devil is in the Details: Delving into Unbiased Data Processing for
Human Pose Estimation`: https://arxiv.org/abs/1911.07524
"""
def __init__(self,
input_size: Tuple[int, int],
heatmap_size: Tuple[int, int],
heatmap_type: str = 'gaussian',
sigma: float = 2.,
radius_factor: float = 0.0546875,
blur_kernel_size: int = 11) -> None:
super().__init__()
self.input_size = input_size
self.heatmap_size = heatmap_size
self.sigma = sigma
self.radius_factor = radius_factor
self.heatmap_type = heatmap_type
self.blur_kernel_size = blur_kernel_size
self.scale_factor = ((np.array(input_size) - 1) /
(np.array(heatmap_size) - 1)).astype(np.float32)
if self.heatmap_type not in {'gaussian', 'combined'}:
raise ValueError(
f'{self.__class__.__name__} got invalid `heatmap_type` value'
f'{self.heatmap_type}. Should be one of '
'{"gaussian", "combined"}')
def encode(self,
keypoints: np.ndarray,
keypoints_visible: Optional[np.ndarray] = None) -> dict:
"""Encode keypoints into heatmaps. Note that the original keypoint
coordinates should be in the input image space.
Args:
keypoints (np.ndarray): Keypoint coordinates in shape (N, K, D)
keypoints_visible (np.ndarray): Keypoint visibilities in shape
(N, K)
Returns:
dict:
- heatmap (np.ndarray): The generated heatmap in shape
(C_out, H, W) where [W, H] is the `heatmap_size`, and the
C_out is the output channel number which depends on the
`heatmap_type`. If `heatmap_type=='gaussian'`, C_out equals to
keypoint number K; if `heatmap_type=='combined'`, C_out
equals to K*3 (x_offset, y_offset and class label)
- keypoint_weights (np.ndarray): The target weights in shape
(K,)
"""
assert keypoints.shape[0] == 1, (
f'{self.__class__.__name__} only support single-instance '
'keypoint encoding')
if keypoints_visible is None:
keypoints_visible = np.ones(keypoints.shape[:2], dtype=np.float32)
if self.heatmap_type == 'gaussian':
heatmaps, keypoint_weights = generate_udp_gaussian_heatmaps(
heatmap_size=self.heatmap_size,
keypoints=keypoints / self.scale_factor,
keypoints_visible=keypoints_visible,
sigma=self.sigma)
elif self.heatmap_type == 'combined':
heatmaps, keypoint_weights = generate_offset_heatmap(
heatmap_size=self.heatmap_size,
keypoints=keypoints / self.scale_factor,
keypoints_visible=keypoints_visible,
radius_factor=self.radius_factor)
else:
raise ValueError(
f'{self.__class__.__name__} got invalid `heatmap_type` value'
f'{self.heatmap_type}. Should be one of '
'{"gaussian", "combined"}')
encoded = dict(heatmaps=heatmaps, keypoint_weights=keypoint_weights)
return encoded
def decode(self, encoded: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
"""Decode keypoint coordinates from heatmaps. The decoded keypoint
coordinates are in the input image space.
Args:
encoded (np.ndarray): Heatmaps in shape (K, H, W)
Returns:
tuple:
- keypoints (np.ndarray): Decoded keypoint coordinates in shape
(N, K, D)
- scores (np.ndarray): The keypoint scores in shape (N, K). It
usually represents the confidence of the keypoint prediction
"""
heatmaps = encoded.copy()
if self.heatmap_type == 'gaussian':
keypoints, scores = get_heatmap_maximum(heatmaps)
# unsqueeze the instance dimension for single-instance results
keypoints = keypoints[None]
scores = scores[None]
keypoints = refine_keypoints_dark_udp(
keypoints, heatmaps, blur_kernel_size=self.blur_kernel_size)
elif self.heatmap_type == 'combined':
_K, H, W = heatmaps.shape
K = _K // 3
for cls_heatmap in heatmaps[::3]:
# Apply Gaussian blur on classification maps
ks = 2 * self.blur_kernel_size + 1
cv2.GaussianBlur(cls_heatmap, (ks, ks), 0, cls_heatmap)
# valid radius
radius = self.radius_factor * max(W, H)
x_offset = heatmaps[1::3].flatten() * radius
y_offset = heatmaps[2::3].flatten() * radius
keypoints, scores = get_heatmap_maximum(heatmaps=heatmaps[::3])
index = (keypoints[..., 0] + keypoints[..., 1] * W).flatten()
index += W * H * np.arange(0, K)
index = index.astype(int)
keypoints += np.stack((x_offset[index], y_offset[index]), axis=-1)
# unsqueeze the instance dimension for single-instance results
keypoints = keypoints[None].astype(np.float32)
scores = scores[None]
W, H = self.heatmap_size
keypoints = keypoints / [W - 1, H - 1] * self.input_size
return keypoints, scores
|