# Copyright (c) OpenMMLab. All rights reserved.
from typing import Optional, Tuple
import cv2
import numpy as np
from mmpose.registry import KEYPOINT_CODECS
from .base import BaseKeypointCodec
from .utils import (generate_offset_heatmap, generate_udp_gaussian_heatmaps,
get_heatmap_maximum, refine_keypoints_dark_udp)
class UDPHeatmap(BaseKeypointCodec):
r"""Generate keypoint heatmaps by Unbiased Data Processing (UDP).
See the paper: `The Devil is in the Details: Delving into Unbiased Data
Processing for Human Pose Estimation`_ by Huang et al (2020) for details.
- instance number: N
- keypoint number: K
- keypoint dimension: D
- image size: [w, h]
- heatmap size: [W, H]
- heatmap (np.ndarray): The generated heatmap in shape (C_out, H, W)
where [W, H] is the `heatmap_size`, and the C_out is the output
channel number which depends on the `heatmap_type`. If
`heatmap_type=='gaussian'`, C_out equals to keypoint number K;
if `heatmap_type=='combined'`, C_out equals to K*3
(x_offset, y_offset and class label)
- keypoint_weights (np.ndarray): The target weights in shape (K,)
input_size (tuple): Image size in [w, h]
heatmap_size (tuple): Heatmap size in [W, H]
heatmap_type (str): The heatmap type to encode the keypoitns. Options
- ``'gaussian'``: Gaussian heatmap
- ``'combined'``: Combination of a binary label map and offset
maps for X and Y axes.
sigma (float): The sigma value of the Gaussian heatmap when
``heatmap_type=='gaussian'``. Defaults to 2.0
radius_factor (float): The radius factor of the binary label
map when ``heatmap_type=='combined'``. The positive region is
defined as the neighbor of the keypoit with the radius
:math:`r=radius_factor*max(W, H)`. Defaults to 0.0546875
blur_kernel_size (int): The Gaussian blur kernel size of the heatmap
modulation in DarkPose. Defaults to 11
.. _`The Devil is in the Details: Delving into Unbiased Data Processing for
Human Pose Estimation`:
def __init__(self,
input_size: Tuple[int, int],
heatmap_size: Tuple[int, int],
heatmap_type: str = 'gaussian',
sigma: float = 2.,
radius_factor: float = 0.0546875,
blur_kernel_size: int = 11) -> None:
self.input_size = input_size
self.heatmap_size = heatmap_size
self.sigma = sigma
self.radius_factor = radius_factor
self.heatmap_type = heatmap_type
self.blur_kernel_size = blur_kernel_size
self.scale_factor = ((np.array(input_size) - 1) /
(np.array(heatmap_size) - 1)).astype(np.float32)
if self.heatmap_type not in {'gaussian', 'combined'}:
raise ValueError(
f'{self.__class__.__name__} got invalid `heatmap_type` value'
f'{self.heatmap_type}. Should be one of '
'{"gaussian", "combined"}')
def encode(self,
keypoints: np.ndarray,
keypoints_visible: Optional[np.ndarray] = None) -> dict:
"""Encode keypoints into heatmaps. Note that the original keypoint
coordinates should be in the input image space.
keypoints (np.ndarray): Keypoint coordinates in shape (N, K, D)
keypoints_visible (np.ndarray): Keypoint visibilities in shape
(N, K)
- heatmap (np.ndarray): The generated heatmap in shape
(C_out, H, W) where [W, H] is the `heatmap_size`, and the
C_out is the output channel number which depends on the
`heatmap_type`. If `heatmap_type=='gaussian'`, C_out equals to
keypoint number K; if `heatmap_type=='combined'`, C_out
equals to K*3 (x_offset, y_offset and class label)
- keypoint_weights (np.ndarray): The target weights in shape
assert keypoints.shape[0] == 1, (
f'{self.__class__.__name__} only support single-instance '
'keypoint encoding')
if keypoints_visible is None:
keypoints_visible = np.ones(keypoints.shape[:2], dtype=np.float32)
if self.heatmap_type == 'gaussian':
heatmaps, keypoint_weights = generate_udp_gaussian_heatmaps(
keypoints=keypoints / self.scale_factor,
elif self.heatmap_type == 'combined':
heatmaps, keypoint_weights = generate_offset_heatmap(
keypoints=keypoints / self.scale_factor,
raise ValueError(
f'{self.__class__.__name__} got invalid `heatmap_type` value'
f'{self.heatmap_type}. Should be one of '
'{"gaussian", "combined"}')
encoded = dict(heatmaps=heatmaps, keypoint_weights=keypoint_weights)
return encoded
def decode(self, encoded: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
"""Decode keypoint coordinates from heatmaps. The decoded keypoint
coordinates are in the input image space.
encoded (np.ndarray): Heatmaps in shape (K, H, W)
- keypoints (np.ndarray): Decoded keypoint coordinates in shape
(N, K, D)
- scores (np.ndarray): The keypoint scores in shape (N, K). It
usually represents the confidence of the keypoint prediction
heatmaps = encoded.copy()
if self.heatmap_type == 'gaussian':
keypoints, scores = get_heatmap_maximum(heatmaps)
# unsqueeze the instance dimension for single-instance results
keypoints = keypoints[None]
scores = scores[None]
keypoints = refine_keypoints_dark_udp(
keypoints, heatmaps, blur_kernel_size=self.blur_kernel_size)
elif self.heatmap_type == 'combined':
_K, H, W = heatmaps.shape
K = _K // 3
for cls_heatmap in heatmaps[::3]:
# Apply Gaussian blur on classification maps
ks = 2 * self.blur_kernel_size + 1
cv2.GaussianBlur(cls_heatmap, (ks, ks), 0, cls_heatmap)
# valid radius
radius = self.radius_factor * max(W, H)
x_offset = heatmaps[1::3].flatten() * radius
y_offset = heatmaps[2::3].flatten() * radius
keypoints, scores = get_heatmap_maximum(heatmaps=heatmaps[::3])
index = (keypoints[..., 0] + keypoints[..., 1] * W).flatten()
index += W * H * np.arange(0, K)
index = index.astype(int)
keypoints += np.stack((x_offset[index], y_offset[index]), axis=-1)
# unsqueeze the instance dimension for single-instance results
keypoints = keypoints[None].astype(np.float32)
scores = scores[None]
W, H = self.heatmap_size
keypoints = keypoints / [W - 1, H - 1] * self.input_size
return keypoints, scores