Spaces:
Runtime error
Runtime error
File size: 11,832 Bytes
cc0dd3c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 |
# Copyright (c) OpenMMLab. All rights reserved.
from typing import Optional, Tuple
import numpy as np
from mmpose.codecs.utils import get_heatmap_maximum, get_simcc_maximum
def _calc_distances(preds: np.ndarray, gts: np.ndarray, mask: np.ndarray,
norm_factor: np.ndarray) -> np.ndarray:
"""Calculate the normalized distances between preds and target.
Note:
- instance number: N
- keypoint number: K
- keypoint dimension: D (normally, D=2 or D=3)
Args:
preds (np.ndarray[N, K, D]): Predicted keypoint location.
gts (np.ndarray[N, K, D]): Groundtruth keypoint location.
mask (np.ndarray[N, K]): Visibility of the target. False for invisible
joints, and True for visible. Invisible joints will be ignored for
accuracy calculation.
norm_factor (np.ndarray[N, D]): Normalization factor.
Typical value is heatmap_size.
Returns:
np.ndarray[K, N]: The normalized distances. \
If target keypoints are missing, the distance is -1.
"""
N, K, _ = preds.shape
# set mask=0 when norm_factor==0
_mask = mask.copy()
_mask[np.where((norm_factor == 0).sum(1))[0], :] = False
distances = np.full((N, K), -1, dtype=np.float32)
# handle invalid values
norm_factor[np.where(norm_factor <= 0)] = 1e6
distances[_mask] = np.linalg.norm(
((preds - gts) / norm_factor[:, None, :])[_mask], axis=-1)
return distances.T
def _distance_acc(distances: np.ndarray, thr: float = 0.5) -> float:
"""Return the percentage below the distance threshold, while ignoring
distances values with -1.
Note:
- instance number: N
Args:
distances (np.ndarray[N, ]): The normalized distances.
thr (float): Threshold of the distances.
Returns:
float: Percentage of distances below the threshold. \
If all target keypoints are missing, return -1.
"""
distance_valid = distances != -1
num_distance_valid = distance_valid.sum()
if num_distance_valid > 0:
return (distances[distance_valid] < thr).sum() / num_distance_valid
return -1
def keypoint_pck_accuracy(pred: np.ndarray, gt: np.ndarray, mask: np.ndarray,
thr: np.ndarray, norm_factor: np.ndarray) -> tuple:
"""Calculate the pose accuracy of PCK for each individual keypoint and the
averaged accuracy across all keypoints for coordinates.
Note:
PCK metric measures accuracy of the localization of the body joints.
The distances between predicted positions and the ground-truth ones
are typically normalized by the bounding box size.
The threshold (thr) of the normalized distance is commonly set
as 0.05, 0.1 or 0.2 etc.
- instance number: N
- keypoint number: K
Args:
pred (np.ndarray[N, K, 2]): Predicted keypoint location.
gt (np.ndarray[N, K, 2]): Groundtruth keypoint location.
mask (np.ndarray[N, K]): Visibility of the target. False for invisible
joints, and True for visible. Invisible joints will be ignored for
accuracy calculation.
thr (float): Threshold of PCK calculation.
norm_factor (np.ndarray[N, 2]): Normalization factor for H&W.
Returns:
tuple: A tuple containing keypoint accuracy.
- acc (np.ndarray[K]): Accuracy of each keypoint.
- avg_acc (float): Averaged accuracy across all keypoints.
- cnt (int): Number of valid keypoints.
"""
distances = _calc_distances(pred, gt, mask, norm_factor)
acc = np.array([_distance_acc(d, thr) for d in distances])
valid_acc = acc[acc >= 0]
cnt = len(valid_acc)
avg_acc = valid_acc.mean() if cnt > 0 else 0
return acc, avg_acc, cnt
def keypoint_auc(pred: np.ndarray,
gt: np.ndarray,
mask: np.ndarray,
norm_factor: np.ndarray,
num_thrs: int = 20) -> float:
"""Calculate the Area under curve (AUC) of keypoint PCK accuracy.
Note:
- instance number: N
- keypoint number: K
Args:
pred (np.ndarray[N, K, 2]): Predicted keypoint location.
gt (np.ndarray[N, K, 2]): Groundtruth keypoint location.
mask (np.ndarray[N, K]): Visibility of the target. False for invisible
joints, and True for visible. Invisible joints will be ignored for
accuracy calculation.
norm_factor (float): Normalization factor.
num_thrs (int): number of thresholds to calculate auc.
Returns:
float: Area under curve (AUC) of keypoint PCK accuracy.
"""
nor = np.tile(np.array([[norm_factor, norm_factor]]), (pred.shape[0], 1))
thrs = [1.0 * i / num_thrs for i in range(num_thrs)]
avg_accs = []
for thr in thrs:
_, avg_acc, _ = keypoint_pck_accuracy(pred, gt, mask, thr, nor)
avg_accs.append(avg_acc)
auc = 0
for i in range(num_thrs):
auc += 1.0 / num_thrs * avg_accs[i]
return auc
def keypoint_nme(pred: np.ndarray, gt: np.ndarray, mask: np.ndarray,
normalize_factor: np.ndarray) -> float:
"""Calculate the normalized mean error (NME).
Note:
- instance number: N
- keypoint number: K
Args:
pred (np.ndarray[N, K, 2]): Predicted keypoint location.
gt (np.ndarray[N, K, 2]): Groundtruth keypoint location.
mask (np.ndarray[N, K]): Visibility of the target. False for invisible
joints, and True for visible. Invisible joints will be ignored for
accuracy calculation.
normalize_factor (np.ndarray[N, 2]): Normalization factor.
Returns:
float: normalized mean error
"""
distances = _calc_distances(pred, gt, mask, normalize_factor)
distance_valid = distances[distances != -1]
return distance_valid.sum() / max(1, len(distance_valid))
def keypoint_epe(pred: np.ndarray, gt: np.ndarray, mask: np.ndarray) -> float:
"""Calculate the end-point error.
Note:
- instance number: N
- keypoint number: K
Args:
pred (np.ndarray[N, K, 2]): Predicted keypoint location.
gt (np.ndarray[N, K, 2]): Groundtruth keypoint location.
mask (np.ndarray[N, K]): Visibility of the target. False for invisible
joints, and True for visible. Invisible joints will be ignored for
accuracy calculation.
Returns:
float: Average end-point error.
"""
distances = _calc_distances(
pred, gt, mask,
np.ones((pred.shape[0], pred.shape[2]), dtype=np.float32))
distance_valid = distances[distances != -1]
return distance_valid.sum() / max(1, len(distance_valid))
def pose_pck_accuracy(output: np.ndarray,
target: np.ndarray,
mask: np.ndarray,
thr: float = 0.05,
normalize: Optional[np.ndarray] = None) -> tuple:
"""Calculate the pose accuracy of PCK for each individual keypoint and the
averaged accuracy across all keypoints from heatmaps.
Note:
PCK metric measures accuracy of the localization of the body joints.
The distances between predicted positions and the ground-truth ones
are typically normalized by the bounding box size.
The threshold (thr) of the normalized distance is commonly set
as 0.05, 0.1 or 0.2 etc.
- batch_size: N
- num_keypoints: K
- heatmap height: H
- heatmap width: W
Args:
output (np.ndarray[N, K, H, W]): Model output heatmaps.
target (np.ndarray[N, K, H, W]): Groundtruth heatmaps.
mask (np.ndarray[N, K]): Visibility of the target. False for invisible
joints, and True for visible. Invisible joints will be ignored for
accuracy calculation.
thr (float): Threshold of PCK calculation. Default 0.05.
normalize (np.ndarray[N, 2]): Normalization factor for H&W.
Returns:
tuple: A tuple containing keypoint accuracy.
- np.ndarray[K]: Accuracy of each keypoint.
- float: Averaged accuracy across all keypoints.
- int: Number of valid keypoints.
"""
N, K, H, W = output.shape
if K == 0:
return None, 0, 0
if normalize is None:
normalize = np.tile(np.array([[H, W]]), (N, 1))
pred, _ = get_heatmap_maximum(output)
gt, _ = get_heatmap_maximum(target)
return keypoint_pck_accuracy(pred, gt, mask, thr, normalize)
def simcc_pck_accuracy(output: Tuple[np.ndarray, np.ndarray],
target: Tuple[np.ndarray, np.ndarray],
simcc_split_ratio: float,
mask: np.ndarray,
thr: float = 0.05,
normalize: Optional[np.ndarray] = None) -> tuple:
"""Calculate the pose accuracy of PCK for each individual keypoint and the
averaged accuracy across all keypoints from SimCC.
Note:
PCK metric measures accuracy of the localization of the body joints.
The distances between predicted positions and the ground-truth ones
are typically normalized by the bounding box size.
The threshold (thr) of the normalized distance is commonly set
as 0.05, 0.1 or 0.2 etc.
- instance number: N
- keypoint number: K
Args:
output (Tuple[np.ndarray, np.ndarray]): Model predicted SimCC.
target (Tuple[np.ndarray, np.ndarray]): Groundtruth SimCC.
mask (np.ndarray[N, K]): Visibility of the target. False for invisible
joints, and True for visible. Invisible joints will be ignored for
accuracy calculation.
thr (float): Threshold of PCK calculation. Default 0.05.
normalize (np.ndarray[N, 2]): Normalization factor for H&W.
Returns:
tuple: A tuple containing keypoint accuracy.
- np.ndarray[K]: Accuracy of each keypoint.
- float: Averaged accuracy across all keypoints.
- int: Number of valid keypoints.
"""
pred_x, pred_y = output
gt_x, gt_y = target
N, _, Wx = pred_x.shape
_, _, Wy = pred_y.shape
W, H = int(Wx / simcc_split_ratio), int(Wy / simcc_split_ratio)
if normalize is None:
normalize = np.tile(np.array([[H, W]]), (N, 1))
pred_coords, _ = get_simcc_maximum(pred_x, pred_y)
pred_coords /= simcc_split_ratio
gt_coords, _ = get_simcc_maximum(gt_x, gt_y)
gt_coords /= simcc_split_ratio
return keypoint_pck_accuracy(pred_coords, gt_coords, mask, thr, normalize)
def multilabel_classification_accuracy(pred: np.ndarray,
gt: np.ndarray,
mask: np.ndarray,
thr: float = 0.5) -> float:
"""Get multi-label classification accuracy.
Note:
- batch size: N
- label number: L
Args:
pred (np.ndarray[N, L, 2]): model predicted labels.
gt (np.ndarray[N, L, 2]): ground-truth labels.
mask (np.ndarray[N, 1] or np.ndarray[N, L] ): reliability of
ground-truth labels.
thr (float): Threshold for calculating accuracy.
Returns:
float: multi-label classification accuracy.
"""
# we only compute accuracy on the samples with ground-truth of all labels.
valid = (mask > 0).min(axis=1) if mask.ndim == 2 else (mask > 0)
pred, gt = pred[valid], gt[valid]
if pred.shape[0] == 0:
acc = 0.0 # when no sample is with gt labels, set acc to 0.
else:
# The classification of a sample is regarded as correct
# only if it's correct for all labels.
acc = (((pred - thr) * (gt - thr)) > 0).all(axis=1).mean()
return acc
|