IDM-VTON-demo

Paused

IDM-VTON-demo / densepose /modeling /losses /chart.py

IDM-VTON

update IDM-VTON Demo

938e515 7 months ago

14.2 kB

	# Copyright (c) Facebook, Inc. and its affiliates.

	from typing import Any, List
	import torch
	from torch.nn import functional as F

	from detectron2.config import CfgNode
	from detectron2.structures import Instances

	from .mask_or_segm import MaskOrSegmentationLoss
	from .registry import DENSEPOSE_LOSS_REGISTRY
	from .utils import (
	BilinearInterpolationHelper,
	ChartBasedAnnotationsAccumulator,
	LossDict,
	extract_packed_annotations_from_matches,
	)


	@DENSEPOSE_LOSS_REGISTRY.register()
	class DensePoseChartLoss:
	"""
	DensePose loss for chart-based training. A mesh is split into charts,
	each chart is given a label (I) and parametrized by 2 coordinates referred to
	as U and V. Ground truth consists of a number of points annotated with
	I, U and V values and coarse segmentation S defined for all pixels of the
	object bounding box. In some cases (see `COARSE_SEGM_TRAINED_BY_MASKS`),
	semantic segmentation annotations can be used as ground truth inputs as well.

	Estimated values are tensors:
	* U coordinates, tensor of shape [N, C, S, S]
	* V coordinates, tensor of shape [N, C, S, S]
	* fine segmentation estimates, tensor of shape [N, C, S, S] with raw unnormalized
	scores for each fine segmentation label at each location
	* coarse segmentation estimates, tensor of shape [N, D, S, S] with raw unnormalized
	scores for each coarse segmentation label at each location
	where N is the number of detections, C is the number of fine segmentation
	labels, S is the estimate size ( = width = height) and D is the number of
	coarse segmentation channels.

	The losses are:
	* regression (smooth L1) loss for U and V coordinates
	* cross entropy loss for fine (I) and coarse (S) segmentations
	Each loss has an associated weight
	"""

	def __init__(self, cfg: CfgNode):
	"""
	Initialize chart-based loss from configuration options

	Args:
	cfg (CfgNode): configuration options
	"""
	# fmt: off
	self.heatmap_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.HEATMAP_SIZE
	self.w_points = cfg.MODEL.ROI_DENSEPOSE_HEAD.POINT_REGRESSION_WEIGHTS
	self.w_part = cfg.MODEL.ROI_DENSEPOSE_HEAD.PART_WEIGHTS
	self.w_segm = cfg.MODEL.ROI_DENSEPOSE_HEAD.INDEX_WEIGHTS
	self.n_segm_chan = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_COARSE_SEGM_CHANNELS
	# fmt: on
	self.segm_trained_by_masks = cfg.MODEL.ROI_DENSEPOSE_HEAD.COARSE_SEGM_TRAINED_BY_MASKS
	self.segm_loss = MaskOrSegmentationLoss(cfg)

	def __call__(
	self, proposals_with_gt: List[Instances], densepose_predictor_outputs: Any, **kwargs
	) -> LossDict:
	"""
	Produce chart-based DensePose losses

	Args:
	proposals_with_gt (list of Instances): detections with associated ground truth data
	densepose_predictor_outputs: an object of a dataclass that contains predictor outputs
	with estimated values; assumed to have the following attributes:
	* coarse_segm - coarse segmentation estimates, tensor of shape [N, D, S, S]
	* fine_segm - fine segmentation estimates, tensor of shape [N, C, S, S]
	* u - U coordinate estimates per fine labels, tensor of shape [N, C, S, S]
	* v - V coordinate estimates per fine labels, tensor of shape [N, C, S, S]
	where N is the number of detections, C is the number of fine segmentation
	labels, S is the estimate size ( = width = height) and D is the number of
	coarse segmentation channels.

	Return:
	dict: str -> tensor: dict of losses with the following entries:
	* `loss_densepose_U`: smooth L1 loss for U coordinate estimates
	* `loss_densepose_V`: smooth L1 loss for V coordinate estimates
	* `loss_densepose_I`: cross entropy for raw unnormalized scores for fine
	segmentation estimates given ground truth labels;
	* `loss_densepose_S`: cross entropy for raw unnormalized scores for coarse
	segmentation estimates given ground truth labels;
	"""
	# densepose outputs are computed for all images and all bounding boxes;
	# i.e. if a batch has 4 images with (3, 1, 2, 1) proposals respectively,
	# the outputs will have size(0) == 3+1+2+1 == 7

	if not len(proposals_with_gt):
	return self.produce_fake_densepose_losses(densepose_predictor_outputs)

	accumulator = ChartBasedAnnotationsAccumulator()
	packed_annotations = extract_packed_annotations_from_matches(proposals_with_gt, accumulator)

	# NOTE: we need to keep the same computation graph on all the GPUs to
	# perform reduction properly. Hence even if we have no data on one
	# of the GPUs, we still need to generate the computation graph.
	# Add fake (zero) loss in the form Tensor.sum() * 0
	if packed_annotations is None:
	return self.produce_fake_densepose_losses(densepose_predictor_outputs)

	h, w = densepose_predictor_outputs.u.shape[2:]
	interpolator = BilinearInterpolationHelper.from_matches(
	packed_annotations,
	(h, w),
	)

	j_valid_fg = interpolator.j_valid * ( # pyre-ignore[16]
	packed_annotations.fine_segm_labels_gt > 0
	)
	# pyre-fixme[6]: For 1st param expected `Tensor` but got `int`.
	if not torch.any(j_valid_fg):
	return self.produce_fake_densepose_losses(densepose_predictor_outputs)

	losses_uv = self.produce_densepose_losses_uv(
	proposals_with_gt,
	densepose_predictor_outputs,
	packed_annotations,
	interpolator,
	j_valid_fg, # pyre-ignore[6]
	)

	losses_segm = self.produce_densepose_losses_segm(
	proposals_with_gt,
	densepose_predictor_outputs,
	packed_annotations,
	interpolator,
	j_valid_fg, # pyre-ignore[6]
	)

	return {losses_uv, losses_segm}

	def produce_fake_densepose_losses(self, densepose_predictor_outputs: Any) -> LossDict:
	"""
	Fake losses for fine segmentation and U/V coordinates. These are used when
	no suitable ground truth data was found in a batch. The loss has a value 0
	and is primarily used to construct the computation graph, so that
	`DistributedDataParallel` has similar graphs on all GPUs and can perform
	reduction properly.

	Args:
	densepose_predictor_outputs: DensePose predictor outputs, an object
	of a dataclass that is assumed to have the following attributes:
	* fine_segm - fine segmentation estimates, tensor of shape [N, C, S, S]
	* u - U coordinate estimates per fine labels, tensor of shape [N, C, S, S]
	* v - V coordinate estimates per fine labels, tensor of shape [N, C, S, S]
	Return:
	dict: str -> tensor: dict of losses with the following entries:
	* `loss_densepose_U`: has value 0
	* `loss_densepose_V`: has value 0
	* `loss_densepose_I`: has value 0
	* `loss_densepose_S`: has value 0
	"""
	losses_uv = self.produce_fake_densepose_losses_uv(densepose_predictor_outputs)
	losses_segm = self.produce_fake_densepose_losses_segm(densepose_predictor_outputs)
	return {losses_uv, losses_segm}

	def produce_fake_densepose_losses_uv(self, densepose_predictor_outputs: Any) -> LossDict:
	"""
	Fake losses for U/V coordinates. These are used when no suitable ground
	truth data was found in a batch. The loss has a value 0
	and is primarily used to construct the computation graph, so that
	`DistributedDataParallel` has similar graphs on all GPUs and can perform
	reduction properly.

	Args:
	densepose_predictor_outputs: DensePose predictor outputs, an object
	of a dataclass that is assumed to have the following attributes:
	* u - U coordinate estimates per fine labels, tensor of shape [N, C, S, S]
	* v - V coordinate estimates per fine labels, tensor of shape [N, C, S, S]
	Return:
	dict: str -> tensor: dict of losses with the following entries:
	* `loss_densepose_U`: has value 0
	* `loss_densepose_V`: has value 0
	"""
	return {
	"loss_densepose_U": densepose_predictor_outputs.u.sum() * 0,
	"loss_densepose_V": densepose_predictor_outputs.v.sum() * 0,
	}

	def produce_fake_densepose_losses_segm(self, densepose_predictor_outputs: Any) -> LossDict:
	"""
	Fake losses for fine / coarse segmentation. These are used when
	no suitable ground truth data was found in a batch. The loss has a value 0
	and is primarily used to construct the computation graph, so that
	`DistributedDataParallel` has similar graphs on all GPUs and can perform
	reduction properly.

	Args:
	densepose_predictor_outputs: DensePose predictor outputs, an object
	of a dataclass that is assumed to have the following attributes:
	* fine_segm - fine segmentation estimates, tensor of shape [N, C, S, S]
	* coarse_segm - coarse segmentation estimates, tensor of shape [N, D, S, S]
	Return:
	dict: str -> tensor: dict of losses with the following entries:
	* `loss_densepose_I`: has value 0
	* `loss_densepose_S`: has value 0, added only if `segm_trained_by_masks` is False
	"""
	losses = {
	"loss_densepose_I": densepose_predictor_outputs.fine_segm.sum() * 0,
	"loss_densepose_S": self.segm_loss.fake_value(densepose_predictor_outputs),
	}
	return losses

	def produce_densepose_losses_uv(
	self,
	proposals_with_gt: List[Instances],
	densepose_predictor_outputs: Any,
	packed_annotations: Any,
	interpolator: BilinearInterpolationHelper,
	j_valid_fg: torch.Tensor,
	) -> LossDict:
	"""
	Compute losses for U/V coordinates: smooth L1 loss between
	estimated coordinates and the ground truth.

	Args:
	proposals_with_gt (list of Instances): detections with associated ground truth data
	densepose_predictor_outputs: DensePose predictor outputs, an object
	of a dataclass that is assumed to have the following attributes:
	* u - U coordinate estimates per fine labels, tensor of shape [N, C, S, S]
	* v - V coordinate estimates per fine labels, tensor of shape [N, C, S, S]
	Return:
	dict: str -> tensor: dict of losses with the following entries:
	* `loss_densepose_U`: smooth L1 loss for U coordinate estimates
	* `loss_densepose_V`: smooth L1 loss for V coordinate estimates
	"""
	u_gt = packed_annotations.u_gt[j_valid_fg]
	u_est = interpolator.extract_at_points(densepose_predictor_outputs.u)[j_valid_fg]
	v_gt = packed_annotations.v_gt[j_valid_fg]
	v_est = interpolator.extract_at_points(densepose_predictor_outputs.v)[j_valid_fg]
	return {
	"loss_densepose_U": F.smooth_l1_loss(u_est, u_gt, reduction="sum") * self.w_points,
	"loss_densepose_V": F.smooth_l1_loss(v_est, v_gt, reduction="sum") * self.w_points,
	}

	def produce_densepose_losses_segm(
	self,
	proposals_with_gt: List[Instances],
	densepose_predictor_outputs: Any,
	packed_annotations: Any,
	interpolator: BilinearInterpolationHelper,
	j_valid_fg: torch.Tensor,
	) -> LossDict:
	"""
	Losses for fine / coarse segmentation: cross-entropy
	for segmentation unnormalized scores given ground truth labels at
	annotated points for fine segmentation and dense mask annotations
	for coarse segmentation.

	Args:
	proposals_with_gt (list of Instances): detections with associated ground truth data
	densepose_predictor_outputs: DensePose predictor outputs, an object
	of a dataclass that is assumed to have the following attributes:
	* fine_segm - fine segmentation estimates, tensor of shape [N, C, S, S]
	* coarse_segm - coarse segmentation estimates, tensor of shape [N, D, S, S]
	Return:
	dict: str -> tensor: dict of losses with the following entries:
	* `loss_densepose_I`: cross entropy for raw unnormalized scores for fine
	segmentation estimates given ground truth labels
	* `loss_densepose_S`: cross entropy for raw unnormalized scores for coarse
	segmentation estimates given ground truth labels;
	may be included if coarse segmentation is only trained
	using DensePose ground truth; if additional supervision through
	instance segmentation data is performed (`segm_trained_by_masks` is True),
	this loss is handled by `produce_mask_losses` instead
	"""
	fine_segm_gt = packed_annotations.fine_segm_labels_gt[
	interpolator.j_valid # pyre-ignore[16]
	]
	fine_segm_est = interpolator.extract_at_points(
	densepose_predictor_outputs.fine_segm,
	slice_fine_segm=slice(None),
	w_ylo_xlo=interpolator.w_ylo_xlo[:, None], # pyre-ignore[16]
	w_ylo_xhi=interpolator.w_ylo_xhi[:, None], # pyre-ignore[16]
	w_yhi_xlo=interpolator.w_yhi_xlo[:, None], # pyre-ignore[16]
	w_yhi_xhi=interpolator.w_yhi_xhi[:, None], # pyre-ignore[16]
	)[interpolator.j_valid, :]
	return {
	"loss_densepose_I": F.cross_entropy(fine_segm_est, fine_segm_gt.long()) * self.w_part,
	"loss_densepose_S": self.segm_loss(
	proposals_with_gt, densepose_predictor_outputs, packed_annotations
	)
	* self.w_segm,
	}