# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """TODO: Add a description here.""" from typing import List, Tuple, Optional, Literal import evaluate import datasets import numpy as np from modified_coco.pr_rec_f1 import PrecisionRecallF1Support _CITATION = """\ @InProceedings{coco:2020, title = {Microsoft {COCO:} Common Objects in Context}, authors={Tsung{-}Yi Lin and Michael Maire and Serge J. Belongie and James Hays and Pietro Perona and Deva Ramanan and Piotr Dollar and C. Lawrence Zitnick}, booktitle = {Computer Vision - {ECCV} 2014 - 13th European Conference, Zurich, Switzerland, September 6-12, 2014, Proceedings, Part {V}}, series = {Lecture Notes in Computer Science}, volume = {8693}, pages = {740--755}, publisher = {Springer}, year={2014} } """ _DESCRIPTION = """\ This evaluation metric is designed to give provide object detection metrics at different object size levels. It is based on a modified version of the commonly used COCO-evaluation metrics. """ _KWARGS_DESCRIPTION = """ Calculates object detection metrics given predicted and ground truth bounding boxes for a single image. Args: predictions: list of predictions to score. Each prediction should be a list containing the four co-ordinates that specify the bounding box. Co-ordinate format is as defined when instantiating the metric (parameter: bbox_type, defaults to xywh). references: list of reference for each prediction. Each prediction should be a list containing the four co-ordinates that specify the bounding box. Bounding box format should be the same as for the predictions. Returns: dict containing dicts for each specified area range with following items: 'range': specified area with [max_px_area, max_px_area] 'iouThr': min. IOU-threshold of a prediction with a ground truth box to be considered a correct prediction 'maxDets': maximum number of detections 'tp': number of true positive (correct) predictions 'fp': number of false positive (incorrect) predictions 'fn': number of false negative (missed) predictions 'duplicates': number of duplicate predictions 'precision': best possible score = 1, worst possible score = 0 large if few false positive predictions formula: tp/(fp+tp) 'recall' best possible score = 1, worst possible score = 0 large if few missed predictions formula: tp/(tp+fn) 'f1': best possible score = 1, worst possible score = 0 trades off precision and recall formula: 2*(precision*recall)/(precision+recall) 'support': number of ground truth bounding boxes considered in the evaluation, 'fpi': number of images with no ground truth but false positive predictions, 'nImgs': number of images considered in evaluation Examples: >>> module = evaluate.load("./detection_metric.py", iou_thresholds=0.9) >>> predictions = [ [ [10, 15, 20, 25], [45, 30, 10, 10] ],[ [14, 25, 6, 6], [10, 16, 6, 10] ] ] >>> references = [ [[10, 15, 20, 20]], [[30, 30, 5, 6]] ] >>> module.add_batch(predictions=predictions, references=references, predictions_scores=[[0.5,0.3],[0.8, 0.1]]) >>> result = module.compute() >>> print(result) {'all': { 'range': [0, 10000000000.0], 'iouThr': '0.00', 'maxDets': 100, 'tp': 1, 'fp': 3, 'fn': 1, 'duplicates': 0, 'precision': 0.25, 'recall': 0.5, 'f1': 0.3333333333333333, 'support': 2, 'fpi': 0, 'nImgs': 2 } } """ @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION) class DetectionMetric(evaluate.Metric): def __init__( self, area_ranges_tuples: List[Tuple[str, List[int]]] = [("all", [0, 1e5 ** 2])], iou_threshold: float = 1e-10, class_agnostic: bool = True, bbox_format: str = "xywh", iou_type: Literal["bbox", "segm"] = "bbox", **kwargs ): super().__init__(**kwargs) area_ranges = [v for _, v in area_ranges_tuples] area_ranges_labels = [k for k, _ in area_ranges_tuples] metric_params = dict( iou_thresholds=[iou_threshold], area_ranges=area_ranges, area_ranges_labels=area_ranges_labels, class_agnostic=class_agnostic, iou_type=iou_type, box_format=bbox_format ) self.coco_metric = PrecisionRecallF1Support(**metric_params) def _info(self): return evaluate.MetricInfo( # This is the description that will appear on the modules page. module_type="metric", description=_DESCRIPTION, citation=_CITATION, inputs_description=_KWARGS_DESCRIPTION, # This defines the format of each prediction and reference features=datasets.Features( { 'predictions': datasets.Sequence(feature=datasets.Sequence(datasets.Value("float"))), 'references': datasets.Sequence(feature=datasets.Sequence(datasets.Value("float"))), } ), # Additional links to the codebase or references codebase_urls=["https://github.com/SEA-AI/metrics/tree/main", "https://github.com/cocodataset/cocoapi/tree/master"] ) def add_batch( self, predictions, references, predictions_labels: Optional[np.ndarray] = None, predictions_scores: Optional[np.ndarray] = None, references_labels: Optional[np.ndarray] = None ): """Add predictions and ground truths of a single image to update the metric. Args: predictions (List[List[List[int]]]): predicted bounding boxes, shape: (n_images, m_pred_boxes, 4) references (List[List[List[int]]]): ground truth bounding boxes, shape: (n_images, l_gt_boxes, 4) predictions_labels (Optional[np.ndarray], optional): Labels of predicted bounding boxes, shape: (n_images, m_pred_boxes). Defaults to None. predictions_scores (Optional[np.ndarray], optional): Scores of predicted bounding boxes, shape: (n_images, m_pred_boxes). Defaults to None. references_labels (Optional[np.ndarray], optional): Labels of predicted bounding boxes, shape: (n_images, l_pred_boxes). Defaults to None. """ if predictions_labels is None: predictions_labels = [None]*len(predictions) if predictions_scores is None: predictions_scores = [None]*len(predictions) if references_labels is None: references_labels = [None]*len(references) for pred, ref, pred_score, pred_l, ref_l in zip(predictions, references, predictions_scores, predictions_labels, references_labels): preds, targets = self.process_preds_references(pred, ref, pred_l, pred_score, ref_l) self.coco_metric.update(preds, targets) super(evaluate.Metric, self).add_batch(predictions=predictions, references=references) def _compute( self, predictions, references ): """Returns the scores""" result = self.coco_metric.compute()["metrics"] return result @staticmethod def process_preds_references( predictions, references, predictions_labels: Optional[np.ndarray] = None, predictions_scores: Optional[np.ndarray] = None, references_labels: Optional[np.ndarray] = None ): if predictions_scores is None: predictions_scores = np.ones(shape=len(predictions), dtype=np.float32) else: predictions_scores = np.array(predictions_scores, dtype=np.float32) if predictions_labels is None: if references_labels is not None: print("Warning: Providing no prediction labels, but ground truth labels!") predictions_labels = np.zeros(shape=len(predictions), dtype=np.int16) else: predictions_labels = np.array(predictions_labels) if references_labels is None: references_labels = np.zeros(shape=len(references), dtype=np.int16) else: references_labels = np.array(references_labels) preds = [ dict( boxes=np.array(predictions), scores=predictions_scores, labels=predictions_labels ) ] target = [ dict( boxes=np.array(references), labels=references_labels ) ] return preds, target