Spaces:
Runtime error
Runtime error
File size: 5,373 Bytes
a74f0e4 345d42d 2d26f82 345d42d 2d26f82 345d42d 2d26f82 345d42d 2d26f82 345d42d 2d26f82 345d42d 2d26f82 345d42d 2d26f82 a74f0e4 345d42d 2d26f82 345d42d 2d26f82 345d42d 2d26f82 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 |
"""ROC Curve metric."""
import datasets
import evaluate
from sklearn.metrics import roc_curve
_DESCRIPTION = """
Compute Receiver operating characteristic (ROC).
Note: this implementation is restricted to the binary classification task.
"""
_KWARGS_DESCRIPTION = """
Args:
y_true : ndarray of shape (n_samples,)
True binary labels. If labels are not either {-1, 1} or {0, 1}, then
pos_label should be explicitly given.
y_score : ndarray of shape (n_samples,)
Target scores, can either be probability estimates of the positive
class, confidence values, or non-thresholded measure of decisions
(as returned by "decision_function" on some classifiers).
pos_label : int or str, default=None
The label of the positive class.
When ``pos_label=None``, if `y_true` is in {-1, 1} or {0, 1},
``pos_label`` is set to 1, otherwise an error will be raised.
sample_weight : array-like of shape (n_samples,), default=None
Sample weights.
drop_intermediate : bool, default=True
Whether to drop some suboptimal thresholds which would not appear
on a plotted ROC curve. This is useful in order to create lighter
ROC curves.
.. versionadded:: 0.17
parameter *drop_intermediate*.
Returns:
fpr : ndarray of shape (>2,)
Increasing false positive rates such that element i is the false
positive rate of predictions with score >= `thresholds[i]`.
tpr : ndarray of shape (>2,)
Increasing true positive rates such that element `i` is the true
positive rate of predictions with score >= `thresholds[i]`.
thresholds : ndarray of shape = (n_thresholds,)
Decreasing thresholds on the decision function used to compute
fpr and tpr. `thresholds[0]` represents no instances being predicted
and is arbitrarily set to `max(y_score) + 1`.
See Also:
RocCurveDisplay.from_estimator : Plot Receiver Operating Characteristic
(ROC) curve given an estimator and some data.
RocCurveDisplay.from_predictions : Plot Receiver Operating Characteristic
(ROC) curve given the true and predicted values.
det_curve: Compute error rates for different probability thresholds.
roc_auc_score : Compute the area under the ROC curve.
Notes:
Since the thresholds are sorted from low to high values, they
are reversed upon returning them to ensure they correspond to both ``fpr``
and ``tpr``, which are sorted in reversed order during their calculation.
References:
.. [1] `Wikipedia entry for the Receiver operating characteristic
<https://en.wikipedia.org/wiki/Receiver_operating_characteristic>`_
.. [2] Fawcett T. An introduction to ROC analysis[J]. Pattern Recognition
Letters, 2006, 27(8):861-874.
Examples:
>>> import numpy as np
>>> from sklearn import metrics
>>> y = np.array([1, 1, 2, 2])
>>> scores = np.array([0.1, 0.4, 0.35, 0.8])
>>> fpr, tpr, thresholds = metrics.roc_curve(y, scores, pos_label=2)
>>> fpr
array([0. , 0. , 0.5, 0.5, 1. ])
>>> tpr
array([0. , 0.5, 0.5, 1. , 1. ])
>>> thresholds
array([1.8 , 0.8 , 0.4 , 0.35, 0.1 ])
"""
_CITATION = """
@article{scikit-learn,
title={Scikit-learn: Machine Learning in {P}ython},
author={Pedregosa, F. and Varoquaux, G. and Gramfort, A. and Michel, V.
and Thirion, B. and Grisel, O. and Blondel, M. and Prettenhofer, P.
and Weiss, R. and Dubourg, V. and Vanderplas, J. and Passos, A. and
Cournapeau, D. and Brucher, M. and Perrot, M. and Duchesnay, E.},
journal={Journal of Machine Learning Research},
volume={12},
pages={2825--2830},
year={2011}
}
"""
@evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
class RocCurve(evaluate.Metric):
def _info(self):
return evaluate.MetricInfo(
description=_DESCRIPTION,
citation=_CITATION,
inputs_description=_KWARGS_DESCRIPTION,
features=datasets.Features(
{
"prediction_scores": datasets.Sequence(datasets.Value("float")),
"references": datasets.Value("int32"),
}
if self.config_name == "multiclass"
else {
"references": datasets.Sequence(datasets.Value("int32")),
"prediction_scores": datasets.Sequence(datasets.Value("float")),
}
if self.config_name == "multilabel"
else {
"references": datasets.Value("int32"),
"prediction_scores": datasets.Value("float"),
}
),
reference_urls=[
"https://scikit-learn.org/stable/modules/generated/sklearn.metrics.roc_curve.html"
],
)
def _compute(
self,
references,
prediction_scores,
*,
pos_label=None,
sample_weight=None,
drop_intermediate=True
):
return {
"roc_curve": roc_curve(
y_true=references,
y_score=prediction_scores,
pos_label=pos_label,
sample_weight=sample_weight,
drop_intermediate=drop_intermediate,
)
}
|