Spaces:

AndreasLH
/

Weakly-Supervised-3DOD

Sleeping

App Files Files Community

Weakly-Supervised-3DOD / cubercnn /modeling /roi_heads /fast_rcnn.py

AndreasLH

upload repo

56bd2b5 12 months ago

raw

history blame contribute delete

11.2 kB

	# Copyright (c) Meta Platforms, Inc. and affiliates
	from re import L
	import torch
	from torch.nn import functional as F
	from typing import List, Tuple

	from fvcore.nn import giou_loss, smooth_l1_loss
	from detectron2.utils.events import get_event_storage
	from detectron2.layers import cat, cross_entropy, nonzero_tuple, batched_nms
	from detectron2.structures import Instances, Boxes
	from detectron2.modeling.roi_heads.fast_rcnn import (
	FastRCNNOutputLayers, _log_classification_stats
	)
	from cubercnn.modeling.proposal_generator.rpn import matched_pairwise_iou

	def fast_rcnn_inference(
	boxes: List[torch.Tensor],
	scores: List[torch.Tensor],
	image_shapes: List[Tuple[int, int]],
	score_thresh: float,
	nms_thresh: float,
	topk_per_image: int,
	):
	"""
	Call `fast_rcnn_inference_single_image` for all images.

	Args:
	boxes (list[Tensor]): A list of Tensors of predicted class-specific or class-agnostic
	boxes for each image. Element i has shape (Ri, K * 4) if doing
	class-specific regression, or (Ri, 4) if doing class-agnostic
	regression, where Ri is the number of predicted objects for image i.
	This is compatible with the output of :meth:`FastRCNNOutputLayers.predict_boxes`.
	scores (list[Tensor]): A list of Tensors of predicted class scores for each image.
	Element i has shape (Ri, K + 1), where Ri is the number of predicted objects
	for image i. Compatible with the output of :meth:`FastRCNNOutputLayers.predict_probs`.
	image_shapes (list[tuple]): A list of (width, height) tuples for each image in the batch.
	score_thresh (float): Only return detections with a confidence score exceeding this
	threshold.
	nms_thresh (float): The threshold to use for box non-maximum suppression. Value in [0, 1].
	topk_per_image (int): The number of top scoring detections to return. Set < 0 to return
	all detections.

	Returns:
	instances: (list[Instances]): A list of N instances, one for each image in the batch,
	that stores the topk most confidence detections.
	kept_indices: (list[Tensor]): A list of 1D tensor of length of N, each element indicates
	the corresponding boxes/scores index in [0, Ri) from the input, for image i.
	"""
	result_per_image = [
	fast_rcnn_inference_single_image(
	boxes_per_image, scores_per_image, image_shape, score_thresh, nms_thresh, topk_per_image
	)
	for scores_per_image, boxes_per_image, image_shape in zip(scores, boxes, image_shapes)
	]
	return [x[0] for x in result_per_image], [x[1] for x in result_per_image]

	def fast_rcnn_inference_single_image(
	boxes,
	scores,
	image_shape: Tuple[int, int],
	score_thresh: float,
	nms_thresh: float,
	topk_per_image: int,
	):
	"""
	Single-image inference. Return bounding-box detection results by thresholding
	on scores and applying non-maximum suppression (NMS).

	Args:
	Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes
	per image.

	Returns:
	Same as `fast_rcnn_inference`, but for only one image.
	"""
	valid_mask = torch.isfinite(boxes).all(dim=1) & torch.isfinite(scores).all(dim=1)
	if not valid_mask.all():
	boxes = boxes[valid_mask]
	scores = scores[valid_mask]

	scores = scores[:, :-1]
	num_bbox_reg_classes = boxes.shape[1] // 4

	# Convert to Boxes to use the `clip` function ...
	boxes = Boxes(boxes.reshape(-1, 4))
	boxes.clip(image_shape)
	boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4) # R x C x 4

	# 1. Filter results based on detection scores. It can make NMS more efficient
	# by filtering out low-confidence detections.
	filter_mask = scores > score_thresh # R x K

	# R' x 2. First column contains indices of the R predictions;
	# Second column contains indices of classes.
	filter_inds = filter_mask.nonzero()
	if num_bbox_reg_classes == 1:
	boxes = boxes[filter_inds[:, 0], 0]
	else:
	boxes = boxes[filter_mask]

	scores_full = scores[filter_inds[:, 0]]
	scores = scores[filter_mask]

	# 2. Apply NMS for each class independently.
	keep = batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh)
	if topk_per_image >= 0:
	keep = keep[:topk_per_image]

	boxes, scores, filter_inds, scores_full = boxes[keep], scores[keep], filter_inds[keep], scores_full[keep]

	result = Instances(image_shape)
	result.pred_boxes = Boxes(boxes)
	result.scores = scores
	result.scores_full = scores_full
	result.pred_classes = filter_inds[:, 1]
	return result, filter_inds[:, 0]


	class FastRCNNOutputs(FastRCNNOutputLayers):

	def inference(self, predictions: Tuple[torch.Tensor, torch.Tensor], proposals: List[Instances]):
	"""
	Args:
	predictions: return values of :meth:`forward()`.
	proposals (list[Instances]): proposals that match the features that were
	used to compute predictions. The ``proposal_boxes`` field is expected.

	Returns:
	list[Instances]: same as `fast_rcnn_inference`.
	list[Tensor]: same as `fast_rcnn_inference`.
	"""
	boxes = self.predict_boxes(predictions, proposals)
	scores = self.predict_probs(predictions, proposals)

	image_shapes = [x.image_size for x in proposals]
	return fast_rcnn_inference(
	boxes,
	scores,
	image_shapes,
	self.test_score_thresh,
	self.test_nms_thresh,
	self.test_topk_per_image,
	)

	def losses(self, predictions, proposals):
	"""
	Args:
	predictions: return values of :meth:`forward()`.
	proposals (list[Instances]): proposals that match the features that were used
	to compute predictions. The fields ``proposal_boxes``, ``gt_boxes``,
	``gt_classes`` are expected.

	Returns:
	Dict[str, Tensor]: dict of losses
	"""
	scores, proposal_deltas = predictions

	# parse classification outputs
	gt_classes = (
	cat([p.gt_classes for p in proposals], dim=0) if len(proposals) else torch.empty(0)
	)

	# parse box regression outputs
	if len(proposals):
	proposal_boxes = cat([p.proposal_boxes.tensor for p in proposals], dim=0) # Nx4
	assert not proposal_boxes.requires_grad, "Proposals should not require gradients!"
	# If "gt_boxes" does not exist, the proposals must be all negative and
	# should not be included in regression loss computation.
	# Here we just use proposal_boxes as an arbitrary placeholder because its
	# value won't be used in self.box_reg_loss().
	gt_boxes = cat(
	[(p.gt_boxes if p.has("gt_boxes") else p.proposal_boxes).tensor for p in proposals],
	dim=0,
	)
	else:
	proposal_boxes = gt_boxes = torch.empty((0, 4), device=proposal_deltas.device)


	normalize_factor = max(gt_classes.numel(), 1.0)

	'''
	Standard Faster R-CNN losses
	'''
	_log_classification_stats(scores, gt_classes)
	loss_cls = cross_entropy(scores, gt_classes, reduction="mean")
	loss_box_reg = self.box_reg_loss(proposal_boxes, gt_boxes, proposal_deltas, gt_classes, reduction="none")
	loss_box_reg = (loss_box_reg).sum() / normalize_factor

	losses = {
	"BoxHead/loss_cls": loss_cls,
	"BoxHead/loss_box_reg": loss_box_reg,
	}

	return {k: v * self.loss_weight.get(k, 1.0) for k, v in losses.items()}

	def box_reg_loss(self, proposal_boxes, gt_boxes, pred_deltas, gt_classes, reduction='mean'):
	"""
	Args:
	All boxes are tensors with the same shape Rx(4 or 5).
	gt_classes is a long tensor of shape R, the gt class label of each proposal.
	R shall be the number of proposals.
	"""
	box_dim = proposal_boxes.shape[1] # 4 or 5

	# Regression loss is only computed for foreground proposals (those matched to a GT)
	fg_inds = nonzero_tuple((gt_classes >= 0) & (gt_classes < self.num_classes))[0]
	if pred_deltas.shape[1] == box_dim: # cls-agnostic regression
	fg_pred_deltas = pred_deltas[fg_inds]
	else:
	fg_pred_deltas = pred_deltas.view(-1, self.num_classes, box_dim)[
	fg_inds, gt_classes[fg_inds]
	]

	if reduction == 'mean':
	if self.box_reg_loss_type == "smooth_l1":
	gt_pred_deltas = self.box2box_transform.get_deltas(
	proposal_boxes[fg_inds],
	gt_boxes[fg_inds],
	)
	loss_box_reg = smooth_l1_loss(
	fg_pred_deltas, gt_pred_deltas, self.smooth_l1_beta, reduction="sum"
	)
	elif self.box_reg_loss_type == "giou":
	fg_pred_boxes = self.box2box_transform.apply_deltas(
	fg_pred_deltas, proposal_boxes[fg_inds]
	)
	loss_box_reg = giou_loss(fg_pred_boxes, gt_boxes[fg_inds], reduction="sum")
	else:
	raise ValueError(f"Invalid bbox reg loss type '{self.box_reg_loss_type}'")

	# The reg loss is normalized using the total number of regions (R), not the number
	# of foreground regions even though the box regression loss is only defined on
	# foreground regions. Why? Because doing so gives equal training influence to
	# each foreground example. To see how, consider two different minibatches:
	# (1) Contains a single foreground region
	# (2) Contains 100 foreground regions
	# If we normalize by the number of foreground regions, the single example in
	# minibatch (1) will be given 100 times as much influence as each foreground
	# example in minibatch (2). Normalizing by the total number of regions, R,
	# means that the single example in minibatch (1) and each of the 100 examples
	# in minibatch (2) are given equal influence.
	return loss_box_reg / max(gt_classes.numel(), 1.0) # return 0 if empty

	elif reduction == 'none':
	if self.box_reg_loss_type == "smooth_l1":
	gt_pred_deltas = self.box2box_transform.get_deltas(
	proposal_boxes[fg_inds],
	gt_boxes[fg_inds],
	)
	loss_box_reg = smooth_l1_loss(
	fg_pred_deltas, gt_pred_deltas, self.smooth_l1_beta, reduction="none"
	)
	else:
	raise ValueError(f"Invalid bbox reg loss type '{self.box_reg_loss_type}'")

	# return non-reduced type
	return loss_box_reg

	else:
	raise ValueError(f"Invalid bbox reg reduction type '{reduction}'")