Spaces:

SEA-AI
/

box-metrics

Sleeping

box-metrics / box-metrics.py

rename

f66e368 about 1 year ago

12.8 kB

	import evaluate
	import datasets
	import motmetrics as mm
	import numpy as np
	from seametrics.payload import Payload
	import torch
	from utils import bbox_iou, bbox_bep
	import datasets


	_CITATION = """\
	@InProceedings{huggingface:module,
	title = {A great new module},
	authors={huggingface, Inc.},
	year={2020}
	}\
	@article{milan2016mot16,
	title={MOT16: A benchmark for multi-object tracking},
	author={Milan, Anton and Leal-Taix{\'e}, Laura and Reid, Ian and Roth, Stefan and Schindler, Konrad},
	journal={arXiv preprint arXiv:1603.00831},
	year={2016}
	}
	"""

	_DESCRIPTION = """\
	The MOT Metrics module is designed to evaluate multi-object tracking (MOT)
	algorithms by computing various metrics based on predicted and ground truth bounding
	boxes. It serves as a crucial tool in assessing the performance of MOT systems,
	aiding in the iterative improvement of tracking algorithms."""


	_KWARGS_DESCRIPTION = """
	Calculates how good are predictions given some references, using certain scores
	Args:
	predictions: list of predictions to score. Each predictions
	should be a string with tokens separated by spaces.
	references: list of reference for each prediction. Each
	reference should be a string with tokens separated by spaces.
	max_iou (`float`, optional):
	If specified, this is the minimum Intersection over Union (IoU) threshold to consider a detection as a true positive.
	Default is 0.5.
	"""

	@evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
	class box_metrics(evaluate.Metric):

	def __init__(self, **kwargs):
	super().__init__(**kwargs)
	self.boxes = {}
	self.gt_field = "ground_truth_det"


	def _info(self):
	# TODO: Specifies the evaluate.EvaluationModuleInfo object
	return evaluate.MetricInfo(
	# This is the description that will appear on the modules page.
	module_type="metric",
	description=_DESCRIPTION,
	citation=_CITATION,
	inputs_description=_KWARGS_DESCRIPTION,
	# This defines the format of each prediction and reference
	features=datasets.Features({
	"predictions": datasets.Sequence(
	datasets.Sequence(datasets.Value("float"))
	),
	"references": datasets.Sequence(
	datasets.Sequence(datasets.Value("float"))
	)
	}),
	# Additional links to the codebase or references
	codebase_urls=["http://github.com/path/to/codebase/of/new_module"],
	reference_urls=["http://path.to.reference.url/new_module"]
	)


	def add_payload(self, payload: Payload):
	"""Convert a payload to the format of the tracking metrics library"""
	self.add(payload)

	def add(self, payload: Payload):
	"""Convert a payload to the format of the tracking metrics library"""
	self.gt_field = payload.gt_field_name
	for sequence in payload.sequences:
	self.boxes[sequence] = {}
	target = payload.sequences[sequence][self.gt_field]
	resolution = payload.sequences[sequence]["resolution"]
	target_tm = self.payload_labels_to_tm(target, resolution)
	self.boxes[sequence][self.gt_field] = target_tm

	for model in payload.models:
	preds = payload.sequences[sequence][model]
	preds_tm = self.payload_preds_to_rm(preds, resolution)
	self.boxes[sequence][model] = preds_tm

	def add_batch(self, predictions, references, sequence_name = "sequence"):
	"""Add a batch of predictions and references to the metric
	Mainly for testing purposes
	references: list of tm boxes as [n, 5] tensors
	box format: label, x1, y1, x2, y2
	predictions: dict of {model_name: list of tm boxes as [n, 6] tensors}
	box format: x1, y1, x2, y2, conf, label
	"""
	self.boxes[sequence_name] = {}
	self.boxes[sequence_name][self.gt_field] = []
	self.boxes[sequence_name][self.gt_field] = references
	for model in predictions:
	self.boxes[sequence_name][model] = predictions[model]


	def compute(self,
	iou_threshold: float = 0.01,
	only_tp = True):
	"""Compute the metric value"""

	output = {}

	for sequence in self.boxes:
	ious = np.array([])
	beps = np.array([])
	e_bottom_x = np.array([])
	e_bottom_y = np.array([])
	e_widths = np.array([])
	e_heights = np.array([])
	e_n_widths = np.array([])
	e_n_heights = np.array([])
	e_n_bottom_x = np.array([])
	e_n_bottom_y = np.array([])

	output[sequence] = {}

	labels = self.boxes[sequence][self.gt_field]
	for model in self.boxes[sequence]:
	detections = self.boxes[sequence][model]

	for i in range(len(detections)):

	frame_labels = labels[i]
	frame_detections = detections[i]

	iou = self.box_iou(frame_labels[:, 1:], frame_detections[:, :4])

	x = torch.where(iou > iou_threshold)

	if x[0].shape[0]:

	matches = torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy()

	if x[0].shape[0] > 1 and only_tp:
	matches = matches[matches[:, 2].argsort()[::-1]]
	matches = matches[np.unique(matches[:, 1], return_index=True)[1]]
	matches = matches[matches[:, 2].argsort()[::-1]]
	matches = matches[np.unique(matches[:, 0], return_index=True)[1]]

	else:
	matches = np.zeros((0, 3))

	labels_i, detections_i, ious_v = matches.transpose()
	labels_i = labels_i.astype(int)
	detections_i = detections_i.astype(int)

	for pair in zip(labels_i, detections_i, ious_v):
	iou = pair[2]
	t_box = frame_labels[pair[0]][1:]
	p_box = frame_detections[pair[1]][:4]

	bep = bbox_bep(t_box.unsqueeze(0), p_box.unsqueeze(0))
	if iou < 0:
	raise ValueError("IoU should be greater than 0, pls contact code maintainer")
	if bep < 0:
	raise ValueError("BEP should be greater than 0, pls contact code maintainer")


	t_xc = (p_box[0].item()+p_box[2].item())/2
	p_xc = (t_box[0].item()+t_box[2].item())/2
	t_yc = p_box[3].item()
	p_yc = t_box[3].item()
	t_w = t_box[2].item()-t_box[0].item()
	p_w = p_box[2].item()-p_box[0].item()
	t_h = t_box[3].item()-t_box[1].item()
	p_h = p_box[3].item()-p_box[1].item()

	if t_h < 10:
	continue

	ious = np.append(ious, iou)
	beps = np.append(beps, bep)

	e_widths = np.append(e_widths, p_w-t_w)
	e_heights = np.append(e_heights, p_h-t_h)
	e_bottom_x = np.append(e_bottom_x, p_xc-t_xc)
	e_bottom_y = np.append(e_bottom_y, p_yc-t_yc)

	e_n_widths = np.append(e_n_widths, (p_w-t_w)/t_w)
	e_n_heights = np.append(e_n_heights, (p_h-t_h)/t_h)
	e_n_bottom_x = np.append(e_n_bottom_x, (p_xc-t_xc)/t_w)
	e_n_bottom_y = np.append(e_n_bottom_y, (p_yc-t_yc)/t_h)

	output[sequence][model] = {
	"iou": np.mean(ious),
	"bep": np.mean(beps),
	"e_bottom_x_mean": np.mean(e_bottom_x),
	"e_bottom_y_mean": np.mean(e_bottom_y),
	"e_width_mean": np.mean(e_widths),
	"e_height_mean": np.mean(e_heights),
	"e_n_bottom_x_mean": np.mean(e_n_bottom_x),
	"e_n_bottom_y_mean": np.mean(e_n_bottom_y),
	"e_n_width_mean": np.mean(e_n_widths),
	"e_n_height_mean": np.mean(e_n_heights),
	"e_bottom_x_std": np.std(e_bottom_x),
	"e_bottom_y_std": np.std(e_bottom_y),
	"e_width_std": np.std(e_widths),
	"e_height_std": np.std(e_heights),
	"e_n_bottom_x_std": np.std(e_n_bottom_x),
	"e_n_bottom_y_std": np.std(e_n_bottom_y),
	"e_n_width_std": np.std(e_n_widths),
	"e_n_height_std": np.std(e_n_heights),
	"n_matches": len(e_n_heights),
	}

	return output

	@staticmethod
	def summarize(result):
	"""Summarize the results by model insteaf by sequence: model"""
	summary = {}
	for sequence in result:
	for model in result[sequence]:
	if model not in summary:
	summary[model] = {}
	for metric in result[sequence][model]:
	if metric not in summary[model]:
	summary[model][metric] = []
	summary[model][metric].append(result[sequence][model][metric])
	#average the results
	for model in summary:
	for metric in summary[model]:
	summary[model][metric] = np.mean(summary[model][metric])
	return summary

	@staticmethod
	def payload_labels_to_tm(labels, resolution):
	"""Convert the labels of a payload sequence to the format of torch metrics"""
	target_tm = []
	for frame in labels:
	target_tm_frame = []
	for det in frame:
	label = 0
	box = det["bounding_box"]
	x1, y1, x2, y2 = box[0], box[1], box[0]+box[2], box[1]+box[3]
	x1, y1, x2, y2 = x1resolution.width, y1resolution.height, x2resolution.width, y2resolution.height
	target_tm_frame.append([label, x1, y1, x2, y2])
	target_tm.append(torch.tensor(target_tm_frame) if len(target_tm_frame) > 0 else torch.empty((0, 5)))

	return target_tm

	@staticmethod
	def payload_preds_to_rm(preds, resolution):
	"""Convert the predictions of a payload sequence to the format of torch metrics"""
	preds_tm = []
	for frame in preds:
	pred_tm_frame = []
	for det in frame:
	label = 0
	box = det["bounding_box"]
	x1, y1, x2, y2 = box[0], box[1], box[0]+box[2], box[1]+box[3]
	x1, y1, x2, y2 = x1resolution.width, y1resolution.height, x2resolution.width, y2resolution.height
	conf = 1
	pred_tm_frame.append([x1, y1, x2, y2, conf, label])
	preds_tm.append(torch.tensor(pred_tm_frame) if len(pred_tm_frame) > 0 else torch.empty((0, 6)))

	return preds_tm

	@staticmethod
	def box_iou(box1, box2, eps=1e-7):
	# https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py
	"""
	Return intersection-over-union (Jaccard index) of boxes.
	Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
	Arguments:
	box1 (Tensor[N, 4])
	box2 (Tensor[M, 4])
	Returns:
	iou (Tensor[N, M]): the NxM matrix containing the pairwise
	IoU values for every element in boxes1 and boxes2
	"""

	# inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2)
	(a1, a2), (b1, b2) = box1.unsqueeze(1).chunk(2, 2), box2.unsqueeze(0).chunk(2, 2)
	inter = (torch.min(a2, b2) - torch.max(a1, b1)).clamp(0).prod(2)

	# IoU = inter / (area1 + area2 - inter)
	return inter / ((a2 - a1).prod(2) + (b2 - b1).prod(2) - inter + eps)