det-metrics / det-metrics.py
Victoria Oberascher
add function to generate confidence curves
003e48f
raw
history blame
11.5 kB
# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""TODO: Add a description here."""
from typing import List, Literal, Tuple
import datasets
import evaluate
import numpy as np
from deprecated import deprecated
from seametrics.detection import PrecisionRecallF1Support
from seametrics.payload import Payload
_CITATION = """\
@InProceedings{coco:2020,
title = {Microsoft {COCO:} Common Objects in Context},
authors={Tsung{-}Yi Lin and
Michael Maire and
Serge J. Belongie and
James Hays and
Pietro Perona and
Deva Ramanan and
Piotr Dollar and
C. Lawrence Zitnick},
booktitle = {Computer Vision - {ECCV} 2014 - 13th European Conference, Zurich,
Switzerland, September 6-12, 2014, Proceedings, Part {V}},
series = {Lecture Notes in Computer Science},
volume = {8693},
pages = {740--755},
publisher = {Springer},
year={2014}
}
"""
_DESCRIPTION = """\
This evaluation metric is designed to give provide object detection metrics at
different object size levels. It is based on a modified version of the commonly used
COCO-evaluation metrics.
"""
_KWARGS_DESCRIPTION = """
Calculates object detection metrics given predicted and ground truth bounding boxes for
a single image.
Args:
predictions: list of predictions for each image. Each prediction should
be a dict containing the following
- 'boxes': list of bounding boxes, xywh in absolute pixel values
- 'labels': list of labels for each bounding box
- 'scores': list of scores for each bounding box
references: list of ground truth annotations for each image. Each reference should
be a dict containing the following
- 'boxes': list of bounding boxes, xywh in absolute pixel values
- 'labels': list of labels for each bounding box
- 'area': list of areas for each bounding box
Returns:
dict containing dicts for each specified area range with following items:
'range': specified area with [max_px_area, max_px_area]
'iouThr': min. IOU-threshold of a prediction with a ground truth box
to be considered a correct prediction
'maxDets': maximum number of detections
'tp': number of true positive (correct) predictions
'fp': number of false positive (incorrect) predictions
'fn': number of false negative (missed) predictions
'duplicates': number of duplicate predictions
'precision': best possible score = 1, worst possible score = 0
large if few false positive predictions
formula: tp/(fp+tp)
'recall' best possible score = 1, worst possible score = 0
large if few missed predictions
formula: tp/(tp+fn)
'f1': best possible score = 1, worst possible score = 0
trades off precision and recall
formula: 2*(precision*recall)/(precision+recall)
'support': number of ground truth bounding boxes considered in the evaluation,
'fpi': number of images with no ground truth but false positive predictions,
'nImgs': number of images considered in evaluation
Examples:
>>> import evaluate
>>> from seametrics.payload.processor import PayloadProcessor
>>> payload = PayloadProcessor(...).payload
>>> module = evaluate.load("SEA-AI/det-metrics", ...)
>>> module.add_payload(payload)
>>> result = module.compute()
>>> print(result)
{'all': {
'range': [0, 10000000000.0],
'iouThr': '0.00',
'maxDets': 100,
'tp': 1,
'fp': 3,
'fn': 1,
'duplicates': 0,
'precision': 0.25,
'recall': 0.5,
'f1': 0.3333333333333333,
'support': 2,
'fpi': 0,
'nImgs': 2
}
}
"""
@evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
class DetectionMetric(evaluate.Metric):
def __init__(
self,
area_ranges_tuples: List[Tuple[str, List[int]]] = [("all", [0, 1e5**2])],
iou_threshold: List[float] = [1e-10],
class_agnostic: bool = True,
bbox_format: str = "xywh",
iou_type: Literal["bbox", "segm"] = "bbox",
**kwargs
):
super().__init__(**kwargs)
self.coco_metric = PrecisionRecallF1Support(
iou_thresholds=(
iou_threshold if isinstance(iou_threshold, list) else [iou_threshold]
),
area_ranges=[v for _, v in area_ranges_tuples],
area_ranges_labels=[k for k, _ in area_ranges_tuples],
class_agnostic=class_agnostic,
iou_type=iou_type,
box_format=bbox_format,
)
def _info(self):
return evaluate.MetricInfo(
# This is the description that will appear on the modules page.
module_type="metric",
description=_DESCRIPTION,
citation=_CITATION,
inputs_description=_KWARGS_DESCRIPTION,
# This defines the format of each prediction and reference
features=datasets.Features(
{
"predictions": [
datasets.Features(
{
"boxes": datasets.Sequence(
datasets.Sequence(datasets.Value("float"))
),
"labels": datasets.Sequence(datasets.Value("int64")),
"scores": datasets.Sequence(datasets.Value("float")),
}
)
],
"references": [
datasets.Features(
{
"boxes": datasets.Sequence(
datasets.Sequence(datasets.Value("float"))
),
"labels": datasets.Sequence(datasets.Value("int64")),
"area": datasets.Sequence(datasets.Value("float")),
}
)
],
}
),
# Additional links to the codebase or references
codebase_urls=[
"https://github.com/SEA-AI/seametrics/tree/main",
"https://lightning.ai/docs/torchmetrics/stable/detection/mean_average_precision.html",
],
)
def add(self, *, prediction, reference, **kwargs):
"""Adds a batch of predictions and references to the metric"""
# in case the inputs are lists, convert them to numpy arrays
prediction = self._preprocess(prediction)
reference = self._preprocess(reference)
self.coco_metric.update(prediction, reference)
# does not impact the metric, but is required for the interface x_x
super(evaluate.Metric, self).add(
prediction=self._postprocess(prediction),
references=self._postprocess(reference),
**kwargs
)
@deprecated(reason="Use `module.add_payload` instead")
def add_batch(self, payload: Payload, model_name: str = None):
"""Takes as input a payload and adds the batch to the metric"""
self.add_payload(payload, model_name)
def _compute(self, *, predictions, references, **kwargs):
"""Called within the evaluate.Metric.compute() method"""
return self.coco_metric.compute()
def add_payload(self, payload: Payload, model_name: str = None):
"""Converts the payload to the format expected by the metric"""
# import only if needed since fiftyone is not a direct dependency
from seametrics.detection.utils import payload_to_det_metric
predictions, references = payload_to_det_metric(payload, model_name)
self.add(prediction=predictions, reference=references)
return self
def _preprocess(self, list_of_dicts):
"""Converts the lists to numpy arrays for type checking"""
return [self._lists_to_np(d) for d in list_of_dicts]
def _postprocess(self, list_of_dicts):
"""Converts the numpy arrays to lists for type checking"""
return [self._np_to_lists(d) for d in list_of_dicts]
def _np_to_lists(self, d):
"""datasets does not support numpy arrays for type checking"""
for k, v in d.items():
if isinstance(v, dict):
self._np_to_lists(v)
elif isinstance(v, np.ndarray):
d[k] = v.tolist()
return d
def _lists_to_np(self, d):
"""datasets does not support numpy arrays for type checking"""
for k, v in d.items():
if isinstance(v, dict):
self._lists_to_np(v)
elif isinstance(v, list):
d[k] = np.array(v)
return d
def compute_for_multiple_models(self, payload):
results = {}
for model_name in payload.models:
self.add_payload(payload, model_name)
results[model_name] = self._compute()
return results
def generate_confidence_curves(self, results, models, confidence_config = {"T":0,
"R":0,
"K":0,
"A":0,
"M":0}):
import plotly.graph_objects as go
from seametrics.detection.utils import get_confidence_metric_vals
# Create traces
fig = go.Figure()
metrics = ['precision', 'recall', 'f1']
for model in models:
plot_data = get_confidence_metric_vals(
cocoeval=results[model['name']]['eval'],
T=confidence_config['T'],
R=confidence_config['R'],
K=confidence_config['K'],
A=confidence_config['A'],
M=confidence_config['M']
)
for metric in metrics:
fig.add_trace(
go.Scatter(
x=plot_data['conf'],
y=plot_data[metric],
mode='lines',
name=f"{model['name'].split('_')[0]} {metric}",
line=dict(dash=None if metric == 'f1' else 'dash'),
)
)
fig.update_layout(
title="Metric vs Confidence",
hovermode='x unified',
xaxis_title="Confidence",
yaxis_title="Metric value")
fig.show()
return fig