Spaces:

SEA-AI
/

box-metrics

Running

File size: 16,160 Bytes

import evaluate
import datasets
import numpy as np
from seametrics.payload import Payload
import torch
import datasets

_CITATION = """\
@InProceedings{huggingface:module,
title = {A great new module},
authors={huggingface, Inc.},
year={2020}
}\
@article{milan2016mot16,
  title={MOT16: A benchmark for multi-object tracking},
  author={Milan, Anton and Leal-Taix{\'e}, Laura and Reid, Ian and Roth, Stefan and Schindler, Konrad},
  journal={arXiv preprint arXiv:1603.00831},
  year={2016}
}
"""

_DESCRIPTION = """\
The MOT Metrics module is designed to evaluate multi-object tracking (MOT) 
algorithms by computing various metrics based on predicted and ground truth bounding 
boxes. It serves as a crucial tool in assessing the performance of MOT systems, 
aiding in the iterative improvement of tracking algorithms."""


_KWARGS_DESCRIPTION = """
Calculates how good are predictions given some references, using certain scores
Args:
    predictions: list of predictions to score. Each predictions
        should be a string with tokens separated by spaces.
    references: list of reference for each prediction. Each
        reference should be a string with tokens separated by spaces.
    max_iou (`float`, *optional*):
        If specified, this is the minimum Intersection over Union (IoU) threshold to consider a detection as a true positive.
        Default is 0.5.
"""

@evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
class box_metrics(evaluate.Metric):

    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.boxes = {}
        self.gt_field = "ground_truth_det"


    def _info(self):
        # TODO: Specifies the evaluate.EvaluationModuleInfo object
        return evaluate.MetricInfo(
            # This is the description that will appear on the modules page.
            module_type="metric",
            description=_DESCRIPTION,
            citation=_CITATION,
            inputs_description=_KWARGS_DESCRIPTION,
            # This defines the format of each prediction and reference
            features=datasets.Features({
                "predictions": datasets.Sequence(
                                datasets.Sequence(datasets.Value("float"))
                            ),
                "references": datasets.Sequence(
                                datasets.Sequence(datasets.Value("float"))
                            )
            }),
            # Additional links to the codebase or references
            codebase_urls=["http://github.com/path/to/codebase/of/new_module"],
            reference_urls=["http://path.to.reference.url/new_module"]
        )


    def add_payload(self, payload: Payload):
        """Convert a payload to the format of the tracking metrics library"""
        self.add(payload)

    def add(self, payload: Payload):
        """Convert a payload to the format of the tracking metrics library"""
        self.gt_field = payload.gt_field_name
        for sequence in payload.sequences:
            self.boxes[sequence] = {}
            target = payload.sequences[sequence][self.gt_field]
            resolution = payload.sequences[sequence]["resolution"]
            target_tm = self.payload_labels_to_tm(target, resolution)
            self.boxes[sequence][self.gt_field] = target_tm

            for model in payload.models:
                preds = payload.sequences[sequence][model]
                preds_tm = self.payload_preds_to_rm(preds, resolution)
                self.boxes[sequence][model] = preds_tm
            
    def add_batch(self, predictions, references, sequence_name = "sequence"):
        """Add a batch of predictions and references to the metric
        Mainly for testing purposes
        references: list of tm boxes as [n, 5] tensors
                    box format: label, x1, y1, x2, y2
        predictions: dict of {model_name: list of tm boxes as [n, 6] tensors}
                    box format: x1, y1, x2, y2, conf, label
        """
        self.boxes[sequence_name] = {}
        self.boxes[sequence_name][self.gt_field] = []
        self.boxes[sequence_name][self.gt_field] = references
        for model in predictions:
            self.boxes[sequence_name][model] = predictions[model]


    def compute(self, 
                iou_threshold: float = 0.01, 
                only_tp = True):
        """Compute the metric value"""

        output = {}

        for sequence in self.boxes:
            ious = np.array([])
            beps = np.array([])
            e_bottom_x = np.array([])
            e_bottom_y = np.array([])
            e_widths = np.array([])
            e_heights = np.array([])
            e_n_widths = np.array([])
            e_n_heights = np.array([])
            e_n_bottom_x = np.array([])
            e_n_bottom_y = np.array([])
            
            output[sequence] = {}

            labels = self.boxes[sequence][self.gt_field]
            for model in self.boxes[sequence]:
                detections = self.boxes[sequence][model]

                for i in range(len(detections)):

                    frame_labels = labels[i]
                    frame_detections = detections[i]

                    iou = self.box_iou(frame_labels[:, 1:], frame_detections[:, :4])

                    x = torch.where(iou > iou_threshold)

                    if x[0].shape[0]:

                        matches = torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy()
                        
                        if x[0].shape[0] > 1 and only_tp:
                            matches = matches[matches[:, 2].argsort()[::-1]]
                            matches = matches[np.unique(matches[:, 1], return_index=True)[1]]
                            matches = matches[matches[:, 2].argsort()[::-1]]
                            matches = matches[np.unique(matches[:, 0], return_index=True)[1]]
                            
                    else:
                        matches = np.zeros((0, 3))

                    labels_i, detections_i, ious_v = matches.transpose()
                    labels_i = labels_i.astype(int)
                    detections_i = detections_i.astype(int)

                    for pair in zip(labels_i, detections_i, ious_v):
                        iou = pair[2]
                        t_box = frame_labels[pair[0]][1:]
                        p_box = frame_detections[pair[1]][:4]

                        bep = bbox_bep(t_box.unsqueeze(0), p_box.unsqueeze(0))
                        if iou < 0:
                            raise ValueError("IoU should be greater than 0, pls contact code maintainer")
                        if bep < 0:
                            raise ValueError("BEP should be greater than 0, pls contact code maintainer")


                        t_xc = (p_box[0].item()+p_box[2].item())/2
                        p_xc = (t_box[0].item()+t_box[2].item())/2
                        t_yc = p_box[3].item()
                        p_yc = t_box[3].item()
                        t_w = t_box[2].item()-t_box[0].item()
                        p_w = p_box[2].item()-p_box[0].item()
                        t_h = t_box[3].item()-t_box[1].item()
                        p_h = p_box[3].item()-p_box[1].item()
                        
                        if t_h < 10:
                            continue

                        ious = np.append(ious, iou)
                        beps = np.append(beps, bep)

                        e_widths = np.append(e_widths, p_w-t_w)
                        e_heights = np.append(e_heights, p_h-t_h)
                        e_bottom_x = np.append(e_bottom_x, p_xc-t_xc)
                        e_bottom_y = np.append(e_bottom_y, p_yc-t_yc)

                        e_n_widths = np.append(e_n_widths, (p_w-t_w)/t_w)
                        e_n_heights = np.append(e_n_heights, (p_h-t_h)/t_h)
                        e_n_bottom_x = np.append(e_n_bottom_x, (p_xc-t_xc)/t_w)
                        e_n_bottom_y = np.append(e_n_bottom_y, (p_yc-t_yc)/t_h)
            
            output[sequence][model] = {
                                    "iou": np.mean(ious),
                                    "bep": np.mean(beps),
                                    "e_bottom_x_mean": np.mean(e_bottom_x),
                                    "e_bottom_y_mean": np.mean(e_bottom_y),
                                    "e_width_mean": np.mean(e_widths),
                                    "e_height_mean": np.mean(e_heights),
                                    "e_n_bottom_x_mean": np.mean(e_n_bottom_x),
                                    "e_n_bottom_y_mean": np.mean(e_n_bottom_y),
                                    "e_n_width_mean": np.mean(e_n_widths),
                                    "e_n_height_mean": np.mean(e_n_heights),
                                    "e_bottom_x_std": np.std(e_bottom_x),
                                    "e_bottom_y_std": np.std(e_bottom_y),
                                    "e_width_std": np.std(e_widths),
                                    "e_height_std": np.std(e_heights),
                                    "e_n_bottom_x_std": np.std(e_n_bottom_x),
                                    "e_n_bottom_y_std": np.std(e_n_bottom_y),
                                    "e_n_width_std": np.std(e_n_widths),
                                    "e_n_height_std": np.std(e_n_heights),
                                    "n_matches": len(e_n_heights),
                                    }
            
        return output

    @staticmethod
    def summarize(result):
        """Summarize the results by model insteaf by sequence: model"""
        summary = {}
        for sequence in result:
            for model in result[sequence]:
                if model not in summary:
                    summary[model] = {}
                for metric in result[sequence][model]:
                    if metric not in summary[model]:
                        summary[model][metric] = []
                    summary[model][metric].append(result[sequence][model][metric])
        #average the results
        for model in summary:
            for metric in summary[model]:
                summary[model][metric] = np.mean(summary[model][metric])
        return summary
    
    @staticmethod
    def payload_labels_to_tm(labels, resolution):
        """Convert the labels of a payload sequence to the format of torch metrics"""
        target_tm = []
        for frame in labels:
            target_tm_frame = []
            for det in frame:
                label = 0
                box = det["bounding_box"]
                x1, y1, x2, y2 = box[0], box[1], box[0]+box[2], box[1]+box[3]
                x1, y1, x2, y2 = x1*resolution.width, y1*resolution.height, x2*resolution.width, y2*resolution.height
                target_tm_frame.append([label, x1, y1, x2, y2])
            target_tm.append(torch.tensor(target_tm_frame) if len(target_tm_frame) > 0 else torch.empty((0, 5)))

        return target_tm
    
    @staticmethod
    def payload_preds_to_rm(preds, resolution):
        """Convert the predictions of a payload sequence to the format of torch metrics"""
        preds_tm = []
        for frame in preds:
            pred_tm_frame = []
            for det in frame:
                label = 0
                box = det["bounding_box"]
                x1, y1, x2, y2 = box[0], box[1], box[0]+box[2], box[1]+box[3]
                x1, y1, x2, y2 = x1*resolution.width, y1*resolution.height, x2*resolution.width, y2*resolution.height
                conf = 1
                pred_tm_frame.append([x1, y1, x2, y2, conf, label])
            preds_tm.append(torch.tensor(pred_tm_frame) if len(pred_tm_frame) > 0 else torch.empty((0, 6)))

        return preds_tm
    
    @staticmethod
    def box_iou(box1, box2, eps=1e-7):
        # https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py
        """
        Return intersection-over-union (Jaccard index) of boxes.
        Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
        Arguments:
            box1 (Tensor[N, 4])
            box2 (Tensor[M, 4])
        Returns:
            iou (Tensor[N, M]): the NxM matrix containing the pairwise
                IoU values for every element in boxes1 and boxes2
        """

        # inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2)
        (a1, a2), (b1, b2) = box1.unsqueeze(1).chunk(2, 2), box2.unsqueeze(0).chunk(2, 2)
        inter = (torch.min(a2, b2) - torch.max(a1, b1)).clamp(0).prod(2)

        # IoU = inter / (area1 + area2 - inter)
        return inter / ((a2 - a1).prod(2) + (b2 - b1).prod(2) - inter + eps)
    

def bbox_bep(box1, box2, xywh=False, eps=1e-7, bep1 = True):
    """
    Calculates bottom edge proximity between two boxes
    
    Input shapes are box1(1,4) to box2(n,4)
    
    Implementation of bep2 from 
        Are object detection assessment criteria ready for maritime computer vision?
    """

    # Get the coordinates of bounding boxes
    if xywh:  # transform from xywh to xyxy
        (x1, y1, w1, h1), (x2, y2, w2, h2) = box1.chunk(4, -1), box2.chunk(4, -1)
        w1_, h1_, w2_, h2_ = w1 / 2, h1 / 2, w2 / 2, h2 / 2
        b1_x1, b1_x2, b1_y1, b1_y2 = x1 - w1_, x1 + w1_, y1 - h1_, y1 + h1_
        b2_x1, b2_x2, b2_y1, b2_y2 = x2 - w2_, x2 + w2_, y2 - h2_, y2 + h2_
    else:  # x1, y1, x2, y2 = box1
        b1_x1, b1_y1, b1_x2, b1_y2 = box1.chunk(4, -1)
        b2_x1, b2_y1, b2_x2, b2_y2 = box2.chunk(4, -1)
        w1, h1 = b1_x2 - b1_x1, (b1_y2 - b1_y1).clamp(eps)
        w2, h2 = b2_x2 - b2_x1, (b2_y2 - b2_y1).clamp(eps)

    # Bottom edge distance (absolute value)
    # xb = torch.abs(b2_x2 - b1_x1)
    xb = torch.min(b2_x2-b1_x1, b1_x2-b2_x1)
    xa = w2 - xb
    xc = w1 - xb
    ybe = torch.abs(b2_y2 - b1_y2)

    X2 = xb/(xb+xa)
    Y2 = 1-ybe/h2

    X1 = xb/(xb+xa+xc+eps)
    Y1 = 1-ybe/(torch.max(h2,h1)+eps)

    bep = X1*Y1 if bep1 else X2*Y2

    return bep

def bbox_iou(box1, box2, xywh=False, GIoU=False, DIoU=False, CIoU=False, eps=1e-7):
    """
    Calculates IoU, GIoU, DIoU, or CIoU between two boxes, supporting xywh/xyxy formats.

    Input shapes are box1(1,4) to box2(n,4).
    """

    # Get the coordinates of bounding boxes
    if xywh:  # transform from xywh to xyxy
        (x1, y1, w1, h1), (x2, y2, w2, h2) = box1.chunk(4, -1), box2.chunk(4, -1)
        w1_, h1_, w2_, h2_ = w1 / 2, h1 / 2, w2 / 2, h2 / 2
        b1_x1, b1_x2, b1_y1, b1_y2 = x1 - w1_, x1 + w1_, y1 - h1_, y1 + h1_
        b2_x1, b2_x2, b2_y1, b2_y2 = x2 - w2_, x2 + w2_, y2 - h2_, y2 + h2_
    else:  # x1, y1, x2, y2 = box1
        b1_x1, b1_y1, b1_x2, b1_y2 = box1.chunk(4, -1)
        b2_x1, b2_y1, b2_x2, b2_y2 = box2.chunk(4, -1)
        w1, h1 = b1_x2 - b1_x1, (b1_y2 - b1_y1).clamp(eps)
        w2, h2 = b2_x2 - b2_x1, (b2_y2 - b2_y1).clamp(eps)

    # Intersection area
    inter = (b1_x2.minimum(b2_x2) - b1_x1.maximum(b2_x1)).clamp(0) * (
        b1_y2.minimum(b2_y2) - b1_y1.maximum(b2_y1)
    ).clamp(0)

    # Union Area
    union = w1 * h1 + w2 * h2 - inter + eps

    # IoU
    iou = inter / union
    if CIoU or DIoU or GIoU:
        cw = b1_x2.maximum(b2_x2) - b1_x1.minimum(b2_x1)  # convex (smallest enclosing box) width
        ch = b1_y2.maximum(b2_y2) - b1_y1.minimum(b2_y1)  # convex height
        if CIoU or DIoU:  # Distance or Complete IoU https://arxiv.org/abs/1911.08287v1
            c2 = cw**2 + ch**2 + eps  # convex diagonal squared
            rho2 = ((b2_x1 + b2_x2 - b1_x1 - b1_x2) ** 2 + (b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2) / 4  # center dist ** 2
            if CIoU:  # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47
                v = (4 / math.pi**2) * (torch.atan(w2 / h2) - torch.atan(w1 / h1)).pow(2)
                with torch.no_grad():
                    alpha = v / (v - iou + (1 + eps))
                return iou - (rho2 / c2 + v * alpha)  # CIoU
            return iou - rho2 / c2  # DIoU
        c_area = cw * ch + eps  # convex area
        return iou - (c_area - union) / c_area  # GIoU https://arxiv.org/pdf/1902.09630.pdf
    return iou  # IoU