Spaces:

SEA-AI
/

det-metrics

Running

det-metrics / det-metrics.py

Victoria Oberascher

add function to generate confidence curves

003e48f about 1 year ago

11.5 kB

	# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	"""TODO: Add a description here."""

	from typing import List, Literal, Tuple

	import datasets
	import evaluate
	import numpy as np
	from deprecated import deprecated
	from seametrics.detection import PrecisionRecallF1Support
	from seametrics.payload import Payload

	_CITATION = """\
	@InProceedings{coco:2020,
	title = {Microsoft {COCO:} Common Objects in Context},
	authors={Tsung{-}Yi Lin and
	Michael Maire and
	Serge J. Belongie and
	James Hays and
	Pietro Perona and
	Deva Ramanan and
	Piotr Dollar and
	C. Lawrence Zitnick},
	booktitle = {Computer Vision - {ECCV} 2014 - 13th European Conference, Zurich,
	Switzerland, September 6-12, 2014, Proceedings, Part {V}},
	series = {Lecture Notes in Computer Science},
	volume = {8693},
	pages = {740--755},
	publisher = {Springer},
	year={2014}
	}
	"""

	_DESCRIPTION = """\
	This evaluation metric is designed to give provide object detection metrics at
	different object size levels. It is based on a modified version of the commonly used
	COCO-evaluation metrics.
	"""


	_KWARGS_DESCRIPTION = """
	Calculates object detection metrics given predicted and ground truth bounding boxes for
	a single image.
	Args:
	predictions: list of predictions for each image. Each prediction should
	be a dict containing the following
	- 'boxes': list of bounding boxes, xywh in absolute pixel values
	- 'labels': list of labels for each bounding box
	- 'scores': list of scores for each bounding box
	references: list of ground truth annotations for each image. Each reference should
	be a dict containing the following
	- 'boxes': list of bounding boxes, xywh in absolute pixel values
	- 'labels': list of labels for each bounding box
	- 'area': list of areas for each bounding box
	Returns:
	dict containing dicts for each specified area range with following items:
	'range': specified area with [max_px_area, max_px_area]
	'iouThr': min. IOU-threshold of a prediction with a ground truth box
	to be considered a correct prediction
	'maxDets': maximum number of detections
	'tp': number of true positive (correct) predictions
	'fp': number of false positive (incorrect) predictions
	'fn': number of false negative (missed) predictions
	'duplicates': number of duplicate predictions
	'precision': best possible score = 1, worst possible score = 0
	large if few false positive predictions
	formula: tp/(fp+tp)
	'recall' best possible score = 1, worst possible score = 0
	large if few missed predictions
	formula: tp/(tp+fn)
	'f1': best possible score = 1, worst possible score = 0
	trades off precision and recall
	formula: 2(precisionrecall)/(precision+recall)
	'support': number of ground truth bounding boxes considered in the evaluation,
	'fpi': number of images with no ground truth but false positive predictions,
	'nImgs': number of images considered in evaluation
	Examples:
	>>> import evaluate
	>>> from seametrics.payload.processor import PayloadProcessor
	>>> payload = PayloadProcessor(...).payload
	>>> module = evaluate.load("SEA-AI/det-metrics", ...)
	>>> module.add_payload(payload)
	>>> result = module.compute()
	>>> print(result)
	{'all': {
	'range': [0, 10000000000.0],
	'iouThr': '0.00',
	'maxDets': 100,
	'tp': 1,
	'fp': 3,
	'fn': 1,
	'duplicates': 0,
	'precision': 0.25,
	'recall': 0.5,
	'f1': 0.3333333333333333,
	'support': 2,
	'fpi': 0,
	'nImgs': 2
	}
	}
	"""


	@evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
	class DetectionMetric(evaluate.Metric):
	def __init__(
	self,
	area_ranges_tuples: List[Tuple[str, List[int]]] = [("all", [0, 1e5**2])],
	iou_threshold: List[float] = [1e-10],
	class_agnostic: bool = True,
	bbox_format: str = "xywh",
	iou_type: Literal["bbox", "segm"] = "bbox",
	**kwargs
	):
	super().__init__(**kwargs)
	self.coco_metric = PrecisionRecallF1Support(
	iou_thresholds=(
	iou_threshold if isinstance(iou_threshold, list) else [iou_threshold]
	),
	area_ranges=[v for _, v in area_ranges_tuples],
	area_ranges_labels=[k for k, _ in area_ranges_tuples],
	class_agnostic=class_agnostic,
	iou_type=iou_type,
	box_format=bbox_format,
	)

	def _info(self):
	return evaluate.MetricInfo(
	# This is the description that will appear on the modules page.
	module_type="metric",
	description=_DESCRIPTION,
	citation=_CITATION,
	inputs_description=_KWARGS_DESCRIPTION,
	# This defines the format of each prediction and reference
	features=datasets.Features(
	{
	"predictions": [
	datasets.Features(
	{
	"boxes": datasets.Sequence(
	datasets.Sequence(datasets.Value("float"))
	),
	"labels": datasets.Sequence(datasets.Value("int64")),
	"scores": datasets.Sequence(datasets.Value("float")),
	}
	)
	],
	"references": [
	datasets.Features(
	{
	"boxes": datasets.Sequence(
	datasets.Sequence(datasets.Value("float"))
	),
	"labels": datasets.Sequence(datasets.Value("int64")),
	"area": datasets.Sequence(datasets.Value("float")),
	}
	)
	],
	}
	),
	# Additional links to the codebase or references
	codebase_urls=[
	"https://github.com/SEA-AI/seametrics/tree/main",
	"https://lightning.ai/docs/torchmetrics/stable/detection/mean_average_precision.html",
	],
	)

	def add(self, , prediction, reference, *kwargs):
	"""Adds a batch of predictions and references to the metric"""
	# in case the inputs are lists, convert them to numpy arrays
	prediction = self._preprocess(prediction)
	reference = self._preprocess(reference)

	self.coco_metric.update(prediction, reference)

	# does not impact the metric, but is required for the interface x_x
	super(evaluate.Metric, self).add(
	prediction=self._postprocess(prediction),
	references=self._postprocess(reference),
	**kwargs
	)

	@deprecated(reason="Use `module.add_payload` instead")
	def add_batch(self, payload: Payload, model_name: str = None):
	"""Takes as input a payload and adds the batch to the metric"""
	self.add_payload(payload, model_name)

	def _compute(self, , predictions, references, *kwargs):
	"""Called within the evaluate.Metric.compute() method"""
	return self.coco_metric.compute()

	def add_payload(self, payload: Payload, model_name: str = None):
	"""Converts the payload to the format expected by the metric"""
	# import only if needed since fiftyone is not a direct dependency
	from seametrics.detection.utils import payload_to_det_metric

	predictions, references = payload_to_det_metric(payload, model_name)
	self.add(prediction=predictions, reference=references)
	return self

	def _preprocess(self, list_of_dicts):
	"""Converts the lists to numpy arrays for type checking"""
	return [self._lists_to_np(d) for d in list_of_dicts]

	def _postprocess(self, list_of_dicts):
	"""Converts the numpy arrays to lists for type checking"""
	return [self._np_to_lists(d) for d in list_of_dicts]

	def _np_to_lists(self, d):
	"""datasets does not support numpy arrays for type checking"""
	for k, v in d.items():
	if isinstance(v, dict):
	self._np_to_lists(v)
	elif isinstance(v, np.ndarray):
	d[k] = v.tolist()
	return d

	def _lists_to_np(self, d):
	"""datasets does not support numpy arrays for type checking"""
	for k, v in d.items():
	if isinstance(v, dict):
	self._lists_to_np(v)
	elif isinstance(v, list):
	d[k] = np.array(v)
	return d

	def compute_for_multiple_models(self, payload):
	results = {}
	for model_name in payload.models:
	self.add_payload(payload, model_name)
	results[model_name] = self._compute()

	return results


	def generate_confidence_curves(self, results, models, confidence_config = {"T":0,
	"R":0,
	"K":0,
	"A":0,
	"M":0}):

	import plotly.graph_objects as go
	from seametrics.detection.utils import get_confidence_metric_vals

	# Create traces
	fig = go.Figure()
	metrics = ['precision', 'recall', 'f1']
	for model in models:
	plot_data = get_confidence_metric_vals(
	cocoeval=results[model['name']]['eval'],
	T=confidence_config['T'],
	R=confidence_config['R'],
	K=confidence_config['K'],
	A=confidence_config['A'],
	M=confidence_config['M']
	)

	for metric in metrics:
	fig.add_trace(
	go.Scatter(
	x=plot_data['conf'],
	y=plot_data[metric],
	mode='lines',
	name=f"{model['name'].split('_')[0]} {metric}",
	line=dict(dash=None if metric == 'f1' else 'dash'),
	)
	)

	fig.update_layout(
	title="Metric vs Confidence",
	hovermode='x unified',
	xaxis_title="Confidence",
	yaxis_title="Metric value")
	fig.show()

	return fig