import evaluate import datasets import motmetrics as mm import numpy as np from seametrics.payload import Payload import torch from utils import bbox_iou, bbox_bep import datasets _CITATION = """\ @InProceedings{huggingface:module, title = {A great new module}, authors={huggingface, Inc.}, year={2020} }\ @article{milan2016mot16, title={MOT16: A benchmark for multi-object tracking}, author={Milan, Anton and Leal-Taix{\'e}, Laura and Reid, Ian and Roth, Stefan and Schindler, Konrad}, journal={arXiv preprint arXiv:1603.00831}, year={2016} } """ _DESCRIPTION = """\ The MOT Metrics module is designed to evaluate multi-object tracking (MOT) algorithms by computing various metrics based on predicted and ground truth bounding boxes. It serves as a crucial tool in assessing the performance of MOT systems, aiding in the iterative improvement of tracking algorithms.""" _KWARGS_DESCRIPTION = """ Calculates how good are predictions given some references, using certain scores Args: predictions: list of predictions to score. Each predictions should be a string with tokens separated by spaces. references: list of reference for each prediction. Each reference should be a string with tokens separated by spaces. max_iou (`float`, *optional*): If specified, this is the minimum Intersection over Union (IoU) threshold to consider a detection as a true positive. Default is 0.5. """ @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION) class box_metrics(evaluate.Metric): def __init__(self, **kwargs): super().__init__(**kwargs) self.boxes = {} self.gt_field = "ground_truth_det" def _info(self): # TODO: Specifies the evaluate.EvaluationModuleInfo object return evaluate.MetricInfo( # This is the description that will appear on the modules page. module_type="metric", description=_DESCRIPTION, citation=_CITATION, inputs_description=_KWARGS_DESCRIPTION, # This defines the format of each prediction and reference features=datasets.Features({ "predictions": datasets.Sequence( datasets.Sequence(datasets.Value("float")) ), "references": datasets.Sequence( datasets.Sequence(datasets.Value("float")) ) }), # Additional links to the codebase or references codebase_urls=["http://github.com/path/to/codebase/of/new_module"], reference_urls=["http://path.to.reference.url/new_module"] ) def add_payload(self, payload: Payload): """Convert a payload to the format of the tracking metrics library""" self.add(payload) def add(self, payload: Payload): """Convert a payload to the format of the tracking metrics library""" self.gt_field = payload.gt_field_name for sequence in payload.sequences: self.boxes[sequence] = {} target = payload.sequences[sequence][self.gt_field] resolution = payload.sequences[sequence]["resolution"] target_tm = self.payload_labels_to_tm(target, resolution) self.boxes[sequence][self.gt_field] = target_tm for model in payload.models: preds = payload.sequences[sequence][model] preds_tm = self.payload_preds_to_rm(preds, resolution) self.boxes[sequence][model] = preds_tm def add_batch(self, predictions, references, sequence_name = "sequence"): """Add a batch of predictions and references to the metric Mainly for testing purposes references: list of tm boxes as [n, 5] tensors box format: label, x1, y1, x2, y2 predictions: dict of {model_name: list of tm boxes as [n, 6] tensors} box format: x1, y1, x2, y2, conf, label """ self.boxes[sequence_name] = {} self.boxes[sequence_name][self.gt_field] = [] self.boxes[sequence_name][self.gt_field] = references for model in predictions: self.boxes[sequence_name][model] = predictions[model] def compute(self, iou_threshold: float = 0.01, only_tp = True): """Compute the metric value""" output = {} for sequence in self.boxes: ious = np.array([]) beps = np.array([]) e_bottom_x = np.array([]) e_bottom_y = np.array([]) e_widths = np.array([]) e_heights = np.array([]) e_n_widths = np.array([]) e_n_heights = np.array([]) e_n_bottom_x = np.array([]) e_n_bottom_y = np.array([]) output[sequence] = {} labels = self.boxes[sequence][self.gt_field] for model in self.boxes[sequence]: detections = self.boxes[sequence][model] for i in range(len(detections)): frame_labels = labels[i] frame_detections = detections[i] iou = self.box_iou(frame_labels[:, 1:], frame_detections[:, :4]) x = torch.where(iou > iou_threshold) if x[0].shape[0]: matches = torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy() if x[0].shape[0] > 1 and only_tp: matches = matches[matches[:, 2].argsort()[::-1]] matches = matches[np.unique(matches[:, 1], return_index=True)[1]] matches = matches[matches[:, 2].argsort()[::-1]] matches = matches[np.unique(matches[:, 0], return_index=True)[1]] else: matches = np.zeros((0, 3)) labels_i, detections_i, ious_v = matches.transpose() labels_i = labels_i.astype(int) detections_i = detections_i.astype(int) for pair in zip(labels_i, detections_i, ious_v): iou = pair[2] t_box = frame_labels[pair[0]][1:] p_box = frame_detections[pair[1]][:4] bep = bbox_bep(t_box.unsqueeze(0), p_box.unsqueeze(0)) if iou < 0: raise ValueError("IoU should be greater than 0, pls contact code maintainer") if bep < 0: raise ValueError("BEP should be greater than 0, pls contact code maintainer") t_xc = (p_box[0].item()+p_box[2].item())/2 p_xc = (t_box[0].item()+t_box[2].item())/2 t_yc = p_box[3].item() p_yc = t_box[3].item() t_w = t_box[2].item()-t_box[0].item() p_w = p_box[2].item()-p_box[0].item() t_h = t_box[3].item()-t_box[1].item() p_h = p_box[3].item()-p_box[1].item() if t_h < 10: continue ious = np.append(ious, iou) beps = np.append(beps, bep) e_widths = np.append(e_widths, p_w-t_w) e_heights = np.append(e_heights, p_h-t_h) e_bottom_x = np.append(e_bottom_x, p_xc-t_xc) e_bottom_y = np.append(e_bottom_y, p_yc-t_yc) e_n_widths = np.append(e_n_widths, (p_w-t_w)/t_w) e_n_heights = np.append(e_n_heights, (p_h-t_h)/t_h) e_n_bottom_x = np.append(e_n_bottom_x, (p_xc-t_xc)/t_w) e_n_bottom_y = np.append(e_n_bottom_y, (p_yc-t_yc)/t_h) output[sequence][model] = { "iou": np.mean(ious), "bep": np.mean(beps), "e_bottom_x_mean": np.mean(e_bottom_x), "e_bottom_y_mean": np.mean(e_bottom_y), "e_width_mean": np.mean(e_widths), "e_height_mean": np.mean(e_heights), "e_n_bottom_x_mean": np.mean(e_n_bottom_x), "e_n_bottom_y_mean": np.mean(e_n_bottom_y), "e_n_width_mean": np.mean(e_n_widths), "e_n_height_mean": np.mean(e_n_heights), "e_bottom_x_std": np.std(e_bottom_x), "e_bottom_y_std": np.std(e_bottom_y), "e_width_std": np.std(e_widths), "e_height_std": np.std(e_heights), "e_n_bottom_x_std": np.std(e_n_bottom_x), "e_n_bottom_y_std": np.std(e_n_bottom_y), "e_n_width_std": np.std(e_n_widths), "e_n_height_std": np.std(e_n_heights), "n_matches": len(e_n_heights), } return output @staticmethod def summarize(result): """Summarize the results by model insteaf by sequence: model""" summary = {} for sequence in result: for model in result[sequence]: if model not in summary: summary[model] = {} for metric in result[sequence][model]: if metric not in summary[model]: summary[model][metric] = [] summary[model][metric].append(result[sequence][model][metric]) #average the results for model in summary: for metric in summary[model]: summary[model][metric] = np.mean(summary[model][metric]) return summary @staticmethod def payload_labels_to_tm(labels, resolution): """Convert the labels of a payload sequence to the format of torch metrics""" target_tm = [] for frame in labels: target_tm_frame = [] for det in frame: label = 0 box = det["bounding_box"] x1, y1, x2, y2 = box[0], box[1], box[0]+box[2], box[1]+box[3] x1, y1, x2, y2 = x1*resolution.width, y1*resolution.height, x2*resolution.width, y2*resolution.height target_tm_frame.append([label, x1, y1, x2, y2]) target_tm.append(torch.tensor(target_tm_frame) if len(target_tm_frame) > 0 else torch.empty((0, 5))) return target_tm @staticmethod def payload_preds_to_rm(preds, resolution): """Convert the predictions of a payload sequence to the format of torch metrics""" preds_tm = [] for frame in preds: pred_tm_frame = [] for det in frame: label = 0 box = det["bounding_box"] x1, y1, x2, y2 = box[0], box[1], box[0]+box[2], box[1]+box[3] x1, y1, x2, y2 = x1*resolution.width, y1*resolution.height, x2*resolution.width, y2*resolution.height conf = 1 pred_tm_frame.append([x1, y1, x2, y2, conf, label]) preds_tm.append(torch.tensor(pred_tm_frame) if len(pred_tm_frame) > 0 else torch.empty((0, 6))) return preds_tm @staticmethod def box_iou(box1, box2, eps=1e-7): # https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py """ Return intersection-over-union (Jaccard index) of boxes. Both sets of boxes are expected to be in (x1, y1, x2, y2) format. Arguments: box1 (Tensor[N, 4]) box2 (Tensor[M, 4]) Returns: iou (Tensor[N, M]): the NxM matrix containing the pairwise IoU values for every element in boxes1 and boxes2 """ # inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2) (a1, a2), (b1, b2) = box1.unsqueeze(1).chunk(2, 2), box2.unsqueeze(0).chunk(2, 2) inter = (torch.min(a2, b2) - torch.max(a1, b1)).clamp(0).prod(2) # IoU = inter / (area1 + area2 - inter) return inter / ((a2 - a1).prod(2) + (b2 - b1).prod(2) - inter + eps)