Spaces:
Build error
Build error
# Copyright (c) OpenMMLab. All rights reserved. | |
import numpy as np | |
import torch.nn as nn | |
from mmcv.cnn import normal_init | |
from mmpose.core.evaluation import (keypoint_pck_accuracy, | |
keypoints_from_regression) | |
from mmpose.core.post_processing import fliplr_regression | |
from mmpose.models.builder import HEADS, build_loss | |
class DeepposeRegressionHead(nn.Module): | |
"""Deeppose regression head with fully connected layers. | |
"DeepPose: Human Pose Estimation via Deep Neural Networks". | |
Args: | |
in_channels (int): Number of input channels | |
num_joints (int): Number of joints | |
loss_keypoint (dict): Config for keypoint loss. Default: None. | |
""" | |
def __init__(self, | |
in_channels, | |
num_joints, | |
loss_keypoint=None, | |
train_cfg=None, | |
test_cfg=None): | |
super().__init__() | |
self.in_channels = in_channels | |
self.num_joints = num_joints | |
self.loss = build_loss(loss_keypoint) | |
self.train_cfg = {} if train_cfg is None else train_cfg | |
self.test_cfg = {} if test_cfg is None else test_cfg | |
self.fc = nn.Linear(self.in_channels, self.num_joints * 2) | |
def forward(self, x): | |
"""Forward function.""" | |
output = self.fc(x) | |
N, C = output.shape | |
return output.reshape([N, C // 2, 2]) | |
def get_loss(self, output, target, target_weight): | |
"""Calculate top-down keypoint loss. | |
Note: | |
- batch_size: N | |
- num_keypoints: K | |
Args: | |
output (torch.Tensor[N, K, 2]): Output keypoints. | |
target (torch.Tensor[N, K, 2]): Target keypoints. | |
target_weight (torch.Tensor[N, K, 2]): | |
Weights across different joint types. | |
""" | |
losses = dict() | |
assert not isinstance(self.loss, nn.Sequential) | |
assert target.dim() == 3 and target_weight.dim() == 3 | |
losses['reg_loss'] = self.loss(output, target, target_weight) | |
return losses | |
def get_accuracy(self, output, target, target_weight): | |
"""Calculate accuracy for top-down keypoint loss. | |
Note: | |
- batch_size: N | |
- num_keypoints: K | |
Args: | |
output (torch.Tensor[N, K, 2]): Output keypoints. | |
target (torch.Tensor[N, K, 2]): Target keypoints. | |
target_weight (torch.Tensor[N, K, 2]): | |
Weights across different joint types. | |
""" | |
accuracy = dict() | |
N = output.shape[0] | |
_, avg_acc, cnt = keypoint_pck_accuracy( | |
output.detach().cpu().numpy(), | |
target.detach().cpu().numpy(), | |
target_weight[:, :, 0].detach().cpu().numpy() > 0, | |
thr=0.05, | |
normalize=np.ones((N, 2), dtype=np.float32)) | |
accuracy['acc_pose'] = avg_acc | |
return accuracy | |
def inference_model(self, x, flip_pairs=None): | |
"""Inference function. | |
Returns: | |
output_regression (np.ndarray): Output regression. | |
Args: | |
x (torch.Tensor[N, K, 2]): Input features. | |
flip_pairs (None | list[tuple()): | |
Pairs of keypoints which are mirrored. | |
""" | |
output = self.forward(x) | |
if flip_pairs is not None: | |
output_regression = fliplr_regression( | |
output.detach().cpu().numpy(), flip_pairs) | |
else: | |
output_regression = output.detach().cpu().numpy() | |
return output_regression | |
def decode(self, img_metas, output, **kwargs): | |
"""Decode the keypoints from output regression. | |
Args: | |
img_metas (list(dict)): Information about data augmentation | |
By default this includes: | |
- "image_file: path to the image file | |
- "center": center of the bbox | |
- "scale": scale of the bbox | |
- "rotation": rotation of the bbox | |
- "bbox_score": score of bbox | |
output (np.ndarray[N, K, 2]): predicted regression vector. | |
kwargs: dict contains 'img_size'. | |
img_size (tuple(img_width, img_height)): input image size. | |
""" | |
batch_size = len(img_metas) | |
if 'bbox_id' in img_metas[0]: | |
bbox_ids = [] | |
else: | |
bbox_ids = None | |
c = np.zeros((batch_size, 2), dtype=np.float32) | |
s = np.zeros((batch_size, 2), dtype=np.float32) | |
image_paths = [] | |
score = np.ones(batch_size) | |
for i in range(batch_size): | |
c[i, :] = img_metas[i]['center'] | |
s[i, :] = img_metas[i]['scale'] | |
image_paths.append(img_metas[i]['image_file']) | |
if 'bbox_score' in img_metas[i]: | |
score[i] = np.array(img_metas[i]['bbox_score']).reshape(-1) | |
if bbox_ids is not None: | |
bbox_ids.append(img_metas[i]['bbox_id']) | |
preds, maxvals = keypoints_from_regression(output, c, s, | |
kwargs['img_size']) | |
all_preds = np.zeros((batch_size, preds.shape[1], 3), dtype=np.float32) | |
all_boxes = np.zeros((batch_size, 6), dtype=np.float32) | |
all_preds[:, :, 0:2] = preds[:, :, 0:2] | |
all_preds[:, :, 2:3] = maxvals | |
all_boxes[:, 0:2] = c[:, 0:2] | |
all_boxes[:, 2:4] = s[:, 0:2] | |
all_boxes[:, 4] = np.prod(s * 200.0, axis=1) | |
all_boxes[:, 5] = score | |
result = {} | |
result['preds'] = all_preds | |
result['boxes'] = all_boxes | |
result['image_paths'] = image_paths | |
result['bbox_ids'] = bbox_ids | |
return result | |
def init_weights(self): | |
normal_init(self.fc, mean=0, std=0.01, bias=0) | |