Spaces:

mfumanelli
/

geometric_mean

Build error

App Files Files Community

geometric_mean / geometric_mean.py

mfumanelli

Updating module

4560eb1 almost 3 years ago

raw

history blame contribute delete

6.11 kB

	# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	"""Geometric mean metric."""

	import datasets
	from imblearn.metrics import geometric_mean_score
	import evaluate

	_DESCRIPTION = """
	The geometric mean (G-mean) is the root of the product of class-wise sensitivity. This measure
	tries to maximize the accuracy on each of the classes while keeping these accuracies balanced. For binary
	classification G-mean is the squared root of the product of the sensitivity and specificity. For multi-class problems
	it is a higher root of the product of sensitivity for each class.
	"""

	_KWARGS_DESCRIPTION = """
	Calculates how good are predictions given some references, using certain scores
	Args:
	predictions (`list` of `int`): Predicted labels.
	references (`list` of `int`): Ground truth labels.
	labels (`list` of `int`): The set of labels to include when average != 'binary', and their order if average is None. Labels present in the data can be excluded, for example to calculate a multiclass average ignoring a majority negative class, while labels not present in the data will result in 0 components in a macro average. Defaults to None.
	pos_label ('string' or `int`): The class to report if average='binary' and the data is binary. If the data are multiclass, this will be ignored; setting labels=[pos_label] and average != 'binary' will report scores for that label only. Defaults to 1.
	average (`string`): If None, the scores for each class are returned. Otherwise, this determines the type of averaging performed on the data. Defaults to `'multiclass'`.

	- 'binary': Only report results for the class specified by pos_label. This is applicable only if targets (y_{true,pred}) are binary.
	- 'micro': Calculate metrics globally by counting the total true positives, false negatives and false positives.
	- 'macro': Calculate metrics for each label, and find their unweighted mean. This does not take label imbalance into account.
	- 'weighted': Calculate metrics for each label, and find their average, weighted by support (the number of true instances for each label).
	- 'samples': Calculate metrics for each instance, and find their average (only meaningful for multilabel classification where this differs from accuracy_score).

	sample_weight (`list` of `float`): Sample weights. Defaults to None.
	correction (`float`): Substitutes sensitivity of unrecognized classes from zero to a given value. Defaults to 0.0.

	Returns:
	geometric_mean (`float` or `array` of `float`): geometric mean score or list of geometric mean scores, depending on the value passed to `average`. Minimum possible value is 0. Maximum possible value is 1. Higher geometric mean scores are better.

	Examples:
	Example 1-A simple binary example
	>>> geometric_mean = evaluate.load("geometric_mean")
	>>> results = geometric_mean.compute(references=[0, 1, 0, 1, 0], predictions=[0, 0, 1, 1, 0])
	>>> print(round(res['geometric-mean'], 2))
	0.58
	Example 2-The same simple binary example as in Example 1, but with `sample_weight` included.
	>>> geometric_mean = evaluate.load("geometric_mean")
	>>> results = geometric_mean.compute(references=[0, 1, 0, 1, 0], predictions=[0, 0, 1, 1, 0], sample_weight=[0.9, 0.5, 3.9, 1.2, 0.3])
	>>> print(round(results['geometric-mean'], 2))
	0.35
	Example 3-A multiclass example, with `average` equal to `macro`.
	>>> predictions = [0, 2, 1, 0, 0, 1]
	>>> references = [0, 1, 2, 0, 1, 2]
	>>> results = geometric_mean.compute(predictions=predictions, references=references, average="macro")
	>>> print(round(results['geometric-mean'], 2))
	0.47
	"""

	_CITATION = """
	@article{imbalanced-learn,
	title={Imbalanced-learn: A Python Toolbox to Tackle the Curse of
	Imbalanced Datasets in Machine Learning},
	author={Lemaˆıtre, G. and Nogueira, F. and Aridas, C.},
	journal={Journal of Machine Learning Research},
	volume={18},
	pages={1-5},
	year={2017}
	}
	"""


	@evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
	class GeometricMean(evaluate.Metric):
	def _info(self):
	return evaluate.MetricInfo(
	module_type="metric",
	description=_DESCRIPTION,
	citation=_CITATION,
	inputs_description=_KWARGS_DESCRIPTION,
	# This defines the format of each prediction and reference
	features=datasets.Features(
	{
	"predictions": datasets.Sequence(datasets.Value("int32")),
	"references": datasets.Sequence(datasets.Value("int32")),
	}
	if self.config_name == "multilabel"
	else {
	"predictions": datasets.Value("int32"),
	"references": datasets.Value("int32"),
	}
	),
	reference_urls=["http://glemaitre.github.io/imbalanced-learn/generated/imblearn.metrics.geometric_mean_score.html#:~:text=The%20geometric%20mean%20(G%2Dmean,of%20the%20sensitivity%20and%20specificity."],
	)

	def _compute(self, predictions, references, labels=None, pos_label=1, average="multiclass", sample_weight=None, correction=0.0):
	score = geometric_mean_score(
	references, predictions, labels=labels, pos_label=pos_label, average=average, sample_weight=sample_weight, correction=correction
	)
	return {"geometric-mean": float(score) if score.size == 1 else score}