Spaces:

geopavlakos
/

HaMeR

Build error

App Files Files Community

HaMeR / mmpose /models /heads /deeppose_regression_head.py

geopavlakos

Initial commit

d7a991a over 1 year ago

raw

history blame contribute delete

5.79 kB

	# Copyright (c) OpenMMLab. All rights reserved.
	import numpy as np
	import torch.nn as nn
	from mmcv.cnn import normal_init

	from mmpose.core.evaluation import (keypoint_pck_accuracy,
	keypoints_from_regression)
	from mmpose.core.post_processing import fliplr_regression
	from mmpose.models.builder import HEADS, build_loss


	@HEADS.register_module()
	class DeepposeRegressionHead(nn.Module):
	"""Deeppose regression head with fully connected layers.

	"DeepPose: Human Pose Estimation via Deep Neural Networks".

	Args:
	in_channels (int): Number of input channels
	num_joints (int): Number of joints
	loss_keypoint (dict): Config for keypoint loss. Default: None.
	"""

	def __init__(self,
	in_channels,
	num_joints,
	loss_keypoint=None,
	train_cfg=None,
	test_cfg=None):
	super().__init__()

	self.in_channels = in_channels
	self.num_joints = num_joints

	self.loss = build_loss(loss_keypoint)

	self.train_cfg = {} if train_cfg is None else train_cfg
	self.test_cfg = {} if test_cfg is None else test_cfg

	self.fc = nn.Linear(self.in_channels, self.num_joints * 2)

	def forward(self, x):
	"""Forward function."""
	output = self.fc(x)
	N, C = output.shape
	return output.reshape([N, C // 2, 2])

	def get_loss(self, output, target, target_weight):
	"""Calculate top-down keypoint loss.

	Note:
	- batch_size: N
	- num_keypoints: K

	Args:
	output (torch.Tensor[N, K, 2]): Output keypoints.
	target (torch.Tensor[N, K, 2]): Target keypoints.
	target_weight (torch.Tensor[N, K, 2]):
	Weights across different joint types.
	"""

	losses = dict()
	assert not isinstance(self.loss, nn.Sequential)
	assert target.dim() == 3 and target_weight.dim() == 3
	losses['reg_loss'] = self.loss(output, target, target_weight)

	return losses

	def get_accuracy(self, output, target, target_weight):
	"""Calculate accuracy for top-down keypoint loss.

	Note:
	- batch_size: N
	- num_keypoints: K

	Args:
	output (torch.Tensor[N, K, 2]): Output keypoints.
	target (torch.Tensor[N, K, 2]): Target keypoints.
	target_weight (torch.Tensor[N, K, 2]):
	Weights across different joint types.
	"""

	accuracy = dict()

	N = output.shape[0]

	_, avg_acc, cnt = keypoint_pck_accuracy(
	output.detach().cpu().numpy(),
	target.detach().cpu().numpy(),
	target_weight[:, :, 0].detach().cpu().numpy() > 0,
	thr=0.05,
	normalize=np.ones((N, 2), dtype=np.float32))
	accuracy['acc_pose'] = avg_acc

	return accuracy

	def inference_model(self, x, flip_pairs=None):
	"""Inference function.

	Returns:
	output_regression (np.ndarray): Output regression.

	Args:
	x (torch.Tensor[N, K, 2]): Input features.
	flip_pairs (None \| list[tuple()):
	Pairs of keypoints which are mirrored.
	"""
	output = self.forward(x)

	if flip_pairs is not None:
	output_regression = fliplr_regression(
	output.detach().cpu().numpy(), flip_pairs)
	else:
	output_regression = output.detach().cpu().numpy()
	return output_regression

	def decode(self, img_metas, output, **kwargs):
	"""Decode the keypoints from output regression.

	Args:
	img_metas (list(dict)): Information about data augmentation
	By default this includes:

	- "image_file: path to the image file
	- "center": center of the bbox
	- "scale": scale of the bbox
	- "rotation": rotation of the bbox
	- "bbox_score": score of bbox
	output (np.ndarray[N, K, 2]): predicted regression vector.
	kwargs: dict contains 'img_size'.
	img_size (tuple(img_width, img_height)): input image size.
	"""
	batch_size = len(img_metas)

	if 'bbox_id' in img_metas[0]:
	bbox_ids = []
	else:
	bbox_ids = None

	c = np.zeros((batch_size, 2), dtype=np.float32)
	s = np.zeros((batch_size, 2), dtype=np.float32)
	image_paths = []
	score = np.ones(batch_size)
	for i in range(batch_size):
	c[i, :] = img_metas[i]['center']
	s[i, :] = img_metas[i]['scale']
	image_paths.append(img_metas[i]['image_file'])

	if 'bbox_score' in img_metas[i]:
	score[i] = np.array(img_metas[i]['bbox_score']).reshape(-1)
	if bbox_ids is not None:
	bbox_ids.append(img_metas[i]['bbox_id'])

	preds, maxvals = keypoints_from_regression(output, c, s,
	kwargs['img_size'])

	all_preds = np.zeros((batch_size, preds.shape[1], 3), dtype=np.float32)
	all_boxes = np.zeros((batch_size, 6), dtype=np.float32)
	all_preds[:, :, 0:2] = preds[:, :, 0:2]
	all_preds[:, :, 2:3] = maxvals
	all_boxes[:, 0:2] = c[:, 0:2]
	all_boxes[:, 2:4] = s[:, 0:2]
	all_boxes[:, 4] = np.prod(s * 200.0, axis=1)
	all_boxes[:, 5] = score

	result = {}

	result['preds'] = all_preds
	result['boxes'] = all_boxes
	result['image_paths'] = image_paths
	result['bbox_ids'] = bbox_ids

	return result

	def init_weights(self):
	normal_init(self.fc, mean=0, std=0.01, bias=0)