# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """TODO: Add a description here.""" from typing import List, Literal, Tuple import datasets import evaluate import numpy as np from deprecated import deprecated from seametrics.detection import PrecisionRecallF1Support from seametrics.payload import Payload _CITATION = """\ @InProceedings{coco:2020, title = {Microsoft {COCO:} Common Objects in Context}, authors={Tsung{-}Yi Lin and Michael Maire and Serge J. Belongie and James Hays and Pietro Perona and Deva Ramanan and Piotr Dollar and C. Lawrence Zitnick}, booktitle = {Computer Vision - {ECCV} 2014 - 13th European Conference, Zurich, Switzerland, September 6-12, 2014, Proceedings, Part {V}}, series = {Lecture Notes in Computer Science}, volume = {8693}, pages = {740--755}, publisher = {Springer}, year={2014} } """ _DESCRIPTION = """\ This evaluation metric is designed to give provide object detection metrics at different object size levels. It is based on a modified version of the commonly used COCO-evaluation metrics. """ _KWARGS_DESCRIPTION = """ Calculates object detection metrics given predicted and ground truth bounding boxes for a single image. Args: predictions: list of predictions for each image. Each prediction should be a dict containing the following - 'boxes': list of bounding boxes, xywh in absolute pixel values - 'labels': list of labels for each bounding box - 'scores': list of scores for each bounding box references: list of ground truth annotations for each image. Each reference should be a dict containing the following - 'boxes': list of bounding boxes, xywh in absolute pixel values - 'labels': list of labels for each bounding box - 'area': list of areas for each bounding box Returns: dict containing dicts for each specified area range with following items: 'range': specified area with [max_px_area, max_px_area] 'iouThr': min. IOU-threshold of a prediction with a ground truth box to be considered a correct prediction 'maxDets': maximum number of detections 'tp': number of true positive (correct) predictions 'fp': number of false positive (incorrect) predictions 'fn': number of false negative (missed) predictions 'duplicates': number of duplicate predictions 'precision': best possible score = 1, worst possible score = 0 large if few false positive predictions formula: tp/(fp+tp) 'recall' best possible score = 1, worst possible score = 0 large if few missed predictions formula: tp/(tp+fn) 'f1': best possible score = 1, worst possible score = 0 trades off precision and recall formula: 2*(precision*recall)/(precision+recall) 'support': number of ground truth bounding boxes considered in the evaluation, 'fpi': number of images with no ground truth but false positive predictions, 'nImgs': number of images considered in evaluation Examples: >>> import evaluate >>> from seametrics.payload.processor import PayloadProcessor >>> payload = PayloadProcessor(...).payload >>> module = evaluate.load("SEA-AI/det-metrics", ...) >>> module.add_payload(payload) >>> result = module.compute() >>> print(result) {'all': { 'range': [0, 10000000000.0], 'iouThr': '0.00', 'maxDets': 100, 'tp': 1, 'fp': 3, 'fn': 1, 'duplicates': 0, 'precision': 0.25, 'recall': 0.5, 'f1': 0.3333333333333333, 'support': 2, 'fpi': 0, 'nImgs': 2 } } """ @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION) class DetectionMetric(evaluate.Metric): def __init__( self, area_ranges_tuples: List[Tuple[str, List[int]]] = [("all", [0, 1e5**2])], iou_threshold: List[float] = [1e-10], class_agnostic: bool = True, bbox_format: str = "xywh", iou_type: Literal["bbox", "segm"] = "bbox", **kwargs ): super().__init__(**kwargs) self.coco_metric = PrecisionRecallF1Support( iou_thresholds=( iou_threshold if isinstance(iou_threshold, list) else [iou_threshold] ), area_ranges=[v for _, v in area_ranges_tuples], area_ranges_labels=[k for k, _ in area_ranges_tuples], class_agnostic=class_agnostic, iou_type=iou_type, box_format=bbox_format, ) def _info(self): return evaluate.MetricInfo( # This is the description that will appear on the modules page. module_type="metric", description=_DESCRIPTION, citation=_CITATION, inputs_description=_KWARGS_DESCRIPTION, # This defines the format of each prediction and reference features=datasets.Features( { "predictions": [ datasets.Features( { "boxes": datasets.Sequence( datasets.Sequence(datasets.Value("float")) ), "labels": datasets.Sequence(datasets.Value("int64")), "scores": datasets.Sequence(datasets.Value("float")), } ) ], "references": [ datasets.Features( { "boxes": datasets.Sequence( datasets.Sequence(datasets.Value("float")) ), "labels": datasets.Sequence(datasets.Value("int64")), "area": datasets.Sequence(datasets.Value("float")), } ) ], } ), # Additional links to the codebase or references codebase_urls=[ "https://github.com/SEA-AI/seametrics/tree/main", "https://lightning.ai/docs/torchmetrics/stable/detection/mean_average_precision.html", ], ) def add(self, *, prediction, reference, **kwargs): """Adds a batch of predictions and references to the metric""" # in case the inputs are lists, convert them to numpy arrays prediction = self._preprocess(prediction) reference = self._preprocess(reference) self.coco_metric.update(prediction, reference) # does not impact the metric, but is required for the interface x_x super(evaluate.Metric, self).add( prediction=self._postprocess(prediction), references=self._postprocess(reference), **kwargs ) @deprecated(reason="Use `module.add_payload` instead") def add_batch(self, payload: Payload, model_name: str = None): """Takes as input a payload and adds the batch to the metric""" self.add_payload(payload, model_name) def _compute(self, *, predictions, references, **kwargs): """Called within the evaluate.Metric.compute() method""" return self.coco_metric.compute() def add_payload(self, payload: Payload, model_name: str = None): """Converts the payload to the format expected by the metric""" # import only if needed since fiftyone is not a direct dependency from seametrics.detection.utils import payload_to_det_metric predictions, references = payload_to_det_metric(payload, model_name) self.add(prediction=predictions, reference=references) return self def _preprocess(self, list_of_dicts): """Converts the lists to numpy arrays for type checking""" return [self._lists_to_np(d) for d in list_of_dicts] def _postprocess(self, list_of_dicts): """Converts the numpy arrays to lists for type checking""" return [self._np_to_lists(d) for d in list_of_dicts] def _np_to_lists(self, d): """datasets does not support numpy arrays for type checking""" for k, v in d.items(): if isinstance(v, dict): self._np_to_lists(v) elif isinstance(v, np.ndarray): d[k] = v.tolist() return d def _lists_to_np(self, d): """datasets does not support numpy arrays for type checking""" for k, v in d.items(): if isinstance(v, dict): self._lists_to_np(v) elif isinstance(v, list): d[k] = np.array(v) return d def compute_for_multiple_models(self, payload): results = {} for model_name in payload.models: self.add_payload(payload, model_name) results[model_name] = self._compute() return results def generate_confidence_curves(self, results, models, confidence_config = {"T":0, "R":0, "K":0, "A":0, "M":0}): import plotly.graph_objects as go from seametrics.detection.utils import get_confidence_metric_vals # Create traces fig = go.Figure() metrics = ['precision', 'recall', 'f1'] for model in models: plot_data = get_confidence_metric_vals( cocoeval=results[model['name']]['eval'], T=confidence_config['T'], R=confidence_config['R'], K=confidence_config['K'], A=confidence_config['A'], M=confidence_config['M'] ) for metric in metrics: fig.add_trace( go.Scatter( x=plot_data['conf'], y=plot_data[metric], mode='lines', name=f"{model['name'].split('_')[0]} {metric}", line=dict(dash=None if metric == 'f1' else 'dash'), ) ) fig.update_layout( title="Metric vs Confidence", hovermode='x unified', xaxis_title="Confidence", yaxis_title="Metric value") fig.show() return fig