|
"""Methods for calibrating predicted probabilities.""" |
|
|
|
|
|
|
|
|
|
import warnings |
|
from inspect import signature |
|
from math import log |
|
from numbers import Integral, Real |
|
|
|
import numpy as np |
|
from scipy.optimize import minimize |
|
from scipy.special import expit |
|
|
|
from sklearn.utils import Bunch |
|
|
|
from ._loss import HalfBinomialLoss |
|
from .base import ( |
|
BaseEstimator, |
|
ClassifierMixin, |
|
MetaEstimatorMixin, |
|
RegressorMixin, |
|
_fit_context, |
|
clone, |
|
) |
|
from .frozen import FrozenEstimator |
|
from .isotonic import IsotonicRegression |
|
from .model_selection import LeaveOneOut, check_cv, cross_val_predict |
|
from .preprocessing import LabelEncoder, label_binarize |
|
from .svm import LinearSVC |
|
from .utils import _safe_indexing, column_or_1d, get_tags, indexable |
|
from .utils._param_validation import ( |
|
HasMethods, |
|
Hidden, |
|
Interval, |
|
StrOptions, |
|
validate_params, |
|
) |
|
from .utils._plotting import _BinaryClassifierCurveDisplayMixin, _validate_style_kwargs |
|
from .utils._response import _get_response_values, _process_predict_proba |
|
from .utils.metadata_routing import ( |
|
MetadataRouter, |
|
MethodMapping, |
|
_routing_enabled, |
|
process_routing, |
|
) |
|
from .utils.multiclass import check_classification_targets |
|
from .utils.parallel import Parallel, delayed |
|
from .utils.validation import ( |
|
_check_method_params, |
|
_check_pos_label_consistency, |
|
_check_response_method, |
|
_check_sample_weight, |
|
_num_samples, |
|
check_consistent_length, |
|
check_is_fitted, |
|
) |
|
|
|
|
|
class CalibratedClassifierCV(ClassifierMixin, MetaEstimatorMixin, BaseEstimator): |
|
"""Probability calibration with isotonic regression or logistic regression. |
|
|
|
This class uses cross-validation to both estimate the parameters of a |
|
classifier and subsequently calibrate a classifier. With default |
|
`ensemble=True`, for each cv split it |
|
fits a copy of the base estimator to the training subset, and calibrates it |
|
using the testing subset. For prediction, predicted probabilities are |
|
averaged across these individual calibrated classifiers. When |
|
`ensemble=False`, cross-validation is used to obtain unbiased predictions, |
|
via :func:`~sklearn.model_selection.cross_val_predict`, which are then |
|
used for calibration. For prediction, the base estimator, trained using all |
|
the data, is used. This is the prediction method implemented when |
|
`probabilities=True` for :class:`~sklearn.svm.SVC` and :class:`~sklearn.svm.NuSVC` |
|
estimators (see :ref:`User Guide <scores_probabilities>` for details). |
|
|
|
Already fitted classifiers can be calibrated by wrapping the model in a |
|
:class:`~sklearn.frozen.FrozenEstimator`. In this case all provided |
|
data is used for calibration. The user has to take care manually that data |
|
for model fitting and calibration are disjoint. |
|
|
|
The calibration is based on the :term:`decision_function` method of the |
|
`estimator` if it exists, else on :term:`predict_proba`. |
|
|
|
Read more in the :ref:`User Guide <calibration>`. |
|
In order to learn more on the CalibratedClassifierCV class, see the |
|
following calibration examples: |
|
:ref:`sphx_glr_auto_examples_calibration_plot_calibration.py`, |
|
:ref:`sphx_glr_auto_examples_calibration_plot_calibration_curve.py`, and |
|
:ref:`sphx_glr_auto_examples_calibration_plot_calibration_multiclass.py`. |
|
|
|
Parameters |
|
---------- |
|
estimator : estimator instance, default=None |
|
The classifier whose output need to be calibrated to provide more |
|
accurate `predict_proba` outputs. The default classifier is |
|
a :class:`~sklearn.svm.LinearSVC`. |
|
|
|
.. versionadded:: 1.2 |
|
|
|
method : {'sigmoid', 'isotonic'}, default='sigmoid' |
|
The method to use for calibration. Can be 'sigmoid' which |
|
corresponds to Platt's method (i.e. a logistic regression model) or |
|
'isotonic' which is a non-parametric approach. It is not advised to |
|
use isotonic calibration with too few calibration samples |
|
``(<<1000)`` since it tends to overfit. |
|
|
|
cv : int, cross-validation generator, or iterable, default=None |
|
Determines the cross-validation splitting strategy. |
|
Possible inputs for cv are: |
|
|
|
- None, to use the default 5-fold cross-validation, |
|
- integer, to specify the number of folds. |
|
- :term:`CV splitter`, |
|
- An iterable yielding (train, test) splits as arrays of indices. |
|
|
|
For integer/None inputs, if ``y`` is binary or multiclass, |
|
:class:`~sklearn.model_selection.StratifiedKFold` is used. If ``y`` is |
|
neither binary nor multiclass, :class:`~sklearn.model_selection.KFold` |
|
is used. |
|
|
|
Refer to the :ref:`User Guide <cross_validation>` for the various |
|
cross-validation strategies that can be used here. |
|
|
|
.. versionchanged:: 0.22 |
|
``cv`` default value if None changed from 3-fold to 5-fold. |
|
|
|
.. versionchanged:: 1.6 |
|
`"prefit"` is deprecated. Use :class:`~sklearn.frozen.FrozenEstimator` |
|
instead. |
|
|
|
n_jobs : int, default=None |
|
Number of jobs to run in parallel. |
|
``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. |
|
``-1`` means using all processors. |
|
|
|
Base estimator clones are fitted in parallel across cross-validation |
|
iterations. Therefore parallelism happens only when `cv != "prefit"`. |
|
|
|
See :term:`Glossary <n_jobs>` for more details. |
|
|
|
.. versionadded:: 0.24 |
|
|
|
ensemble : bool, or "auto", default="auto" |
|
Determines how the calibrator is fitted. |
|
|
|
"auto" will use `False` if the `estimator` is a |
|
:class:`~sklearn.frozen.FrozenEstimator`, and `True` otherwise. |
|
|
|
If `True`, the `estimator` is fitted using training data, and |
|
calibrated using testing data, for each `cv` fold. The final estimator |
|
is an ensemble of `n_cv` fitted classifier and calibrator pairs, where |
|
`n_cv` is the number of cross-validation folds. The output is the |
|
average predicted probabilities of all pairs. |
|
|
|
If `False`, `cv` is used to compute unbiased predictions, via |
|
:func:`~sklearn.model_selection.cross_val_predict`, which are then |
|
used for calibration. At prediction time, the classifier used is the |
|
`estimator` trained on all the data. |
|
Note that this method is also internally implemented in |
|
:mod:`sklearn.svm` estimators with the `probabilities=True` parameter. |
|
|
|
.. versionadded:: 0.24 |
|
|
|
.. versionchanged:: 1.6 |
|
`"auto"` option is added and is the default. |
|
|
|
Attributes |
|
---------- |
|
classes_ : ndarray of shape (n_classes,) |
|
The class labels. |
|
|
|
n_features_in_ : int |
|
Number of features seen during :term:`fit`. Only defined if the |
|
underlying estimator exposes such an attribute when fit. |
|
|
|
.. versionadded:: 0.24 |
|
|
|
feature_names_in_ : ndarray of shape (`n_features_in_`,) |
|
Names of features seen during :term:`fit`. Only defined if the |
|
underlying estimator exposes such an attribute when fit. |
|
|
|
.. versionadded:: 1.0 |
|
|
|
calibrated_classifiers_ : list (len() equal to cv or 1 if `ensemble=False`) |
|
The list of classifier and calibrator pairs. |
|
|
|
- When `ensemble=True`, `n_cv` fitted `estimator` and calibrator pairs. |
|
`n_cv` is the number of cross-validation folds. |
|
- When `ensemble=False`, the `estimator`, fitted on all the data, and fitted |
|
calibrator. |
|
|
|
.. versionchanged:: 0.24 |
|
Single calibrated classifier case when `ensemble=False`. |
|
|
|
See Also |
|
-------- |
|
calibration_curve : Compute true and predicted probabilities |
|
for a calibration curve. |
|
|
|
References |
|
---------- |
|
.. [1] Obtaining calibrated probability estimates from decision trees |
|
and naive Bayesian classifiers, B. Zadrozny & C. Elkan, ICML 2001 |
|
|
|
.. [2] Transforming Classifier Scores into Accurate Multiclass |
|
Probability Estimates, B. Zadrozny & C. Elkan, (KDD 2002) |
|
|
|
.. [3] Probabilistic Outputs for Support Vector Machines and Comparisons to |
|
Regularized Likelihood Methods, J. Platt, (1999) |
|
|
|
.. [4] Predicting Good Probabilities with Supervised Learning, |
|
A. Niculescu-Mizil & R. Caruana, ICML 2005 |
|
|
|
Examples |
|
-------- |
|
>>> from sklearn.datasets import make_classification |
|
>>> from sklearn.naive_bayes import GaussianNB |
|
>>> from sklearn.calibration import CalibratedClassifierCV |
|
>>> X, y = make_classification(n_samples=100, n_features=2, |
|
... n_redundant=0, random_state=42) |
|
>>> base_clf = GaussianNB() |
|
>>> calibrated_clf = CalibratedClassifierCV(base_clf, cv=3) |
|
>>> calibrated_clf.fit(X, y) |
|
CalibratedClassifierCV(...) |
|
>>> len(calibrated_clf.calibrated_classifiers_) |
|
3 |
|
>>> calibrated_clf.predict_proba(X)[:5, :] |
|
array([[0.110..., 0.889...], |
|
[0.072..., 0.927...], |
|
[0.928..., 0.071...], |
|
[0.928..., 0.071...], |
|
[0.071..., 0.928...]]) |
|
>>> from sklearn.model_selection import train_test_split |
|
>>> X, y = make_classification(n_samples=100, n_features=2, |
|
... n_redundant=0, random_state=42) |
|
>>> X_train, X_calib, y_train, y_calib = train_test_split( |
|
... X, y, random_state=42 |
|
... ) |
|
>>> base_clf = GaussianNB() |
|
>>> base_clf.fit(X_train, y_train) |
|
GaussianNB() |
|
>>> from sklearn.frozen import FrozenEstimator |
|
>>> calibrated_clf = CalibratedClassifierCV(FrozenEstimator(base_clf)) |
|
>>> calibrated_clf.fit(X_calib, y_calib) |
|
CalibratedClassifierCV(...) |
|
>>> len(calibrated_clf.calibrated_classifiers_) |
|
1 |
|
>>> calibrated_clf.predict_proba([[-0.5, 0.5]]) |
|
array([[0.936..., 0.063...]]) |
|
""" |
|
|
|
_parameter_constraints: dict = { |
|
"estimator": [ |
|
HasMethods(["fit", "predict_proba"]), |
|
HasMethods(["fit", "decision_function"]), |
|
None, |
|
], |
|
"method": [StrOptions({"isotonic", "sigmoid"})], |
|
"cv": ["cv_object", Hidden(StrOptions({"prefit"}))], |
|
"n_jobs": [Integral, None], |
|
"ensemble": ["boolean", StrOptions({"auto"})], |
|
} |
|
|
|
def __init__( |
|
self, |
|
estimator=None, |
|
*, |
|
method="sigmoid", |
|
cv=None, |
|
n_jobs=None, |
|
ensemble="auto", |
|
): |
|
self.estimator = estimator |
|
self.method = method |
|
self.cv = cv |
|
self.n_jobs = n_jobs |
|
self.ensemble = ensemble |
|
|
|
def _get_estimator(self): |
|
"""Resolve which estimator to return (default is LinearSVC)""" |
|
if self.estimator is None: |
|
|
|
|
|
estimator = LinearSVC(random_state=0) |
|
if _routing_enabled(): |
|
estimator.set_fit_request(sample_weight=True) |
|
else: |
|
estimator = self.estimator |
|
|
|
return estimator |
|
|
|
@_fit_context( |
|
|
|
prefer_skip_nested_validation=False |
|
) |
|
def fit(self, X, y, sample_weight=None, **fit_params): |
|
"""Fit the calibrated model. |
|
|
|
Parameters |
|
---------- |
|
X : array-like of shape (n_samples, n_features) |
|
Training data. |
|
|
|
y : array-like of shape (n_samples,) |
|
Target values. |
|
|
|
sample_weight : array-like of shape (n_samples,), default=None |
|
Sample weights. If None, then samples are equally weighted. |
|
|
|
**fit_params : dict |
|
Parameters to pass to the `fit` method of the underlying |
|
classifier. |
|
|
|
Returns |
|
------- |
|
self : object |
|
Returns an instance of self. |
|
""" |
|
check_classification_targets(y) |
|
X, y = indexable(X, y) |
|
if sample_weight is not None: |
|
sample_weight = _check_sample_weight(sample_weight, X) |
|
|
|
estimator = self._get_estimator() |
|
|
|
_ensemble = self.ensemble |
|
if _ensemble == "auto": |
|
_ensemble = not isinstance(estimator, FrozenEstimator) |
|
|
|
self.calibrated_classifiers_ = [] |
|
if self.cv == "prefit": |
|
|
|
warnings.warn( |
|
"The `cv='prefit'` option is deprecated in 1.6 and will be removed in" |
|
" 1.8. You can use CalibratedClassifierCV(FrozenEstimator(estimator))" |
|
" instead." |
|
) |
|
|
|
check_is_fitted(self.estimator, attributes=["classes_"]) |
|
self.classes_ = self.estimator.classes_ |
|
|
|
predictions, _ = _get_response_values( |
|
estimator, |
|
X, |
|
response_method=["decision_function", "predict_proba"], |
|
) |
|
if predictions.ndim == 1: |
|
|
|
predictions = predictions.reshape(-1, 1) |
|
|
|
calibrated_classifier = _fit_calibrator( |
|
estimator, |
|
predictions, |
|
y, |
|
self.classes_, |
|
self.method, |
|
sample_weight, |
|
) |
|
self.calibrated_classifiers_.append(calibrated_classifier) |
|
else: |
|
|
|
label_encoder_ = LabelEncoder().fit(y) |
|
self.classes_ = label_encoder_.classes_ |
|
|
|
if _routing_enabled(): |
|
routed_params = process_routing( |
|
self, |
|
"fit", |
|
sample_weight=sample_weight, |
|
**fit_params, |
|
) |
|
else: |
|
|
|
fit_parameters = signature(estimator.fit).parameters |
|
supports_sw = "sample_weight" in fit_parameters |
|
if sample_weight is not None and not supports_sw: |
|
estimator_name = type(estimator).__name__ |
|
warnings.warn( |
|
f"Since {estimator_name} does not appear to accept" |
|
" sample_weight, sample weights will only be used for the" |
|
" calibration itself. This can be caused by a limitation of" |
|
" the current scikit-learn API. See the following issue for" |
|
" more details:" |
|
" https://github.com/scikit-learn/scikit-learn/issues/21134." |
|
" Be warned that the result of the calibration is likely to be" |
|
" incorrect." |
|
) |
|
routed_params = Bunch() |
|
routed_params.splitter = Bunch(split={}) |
|
routed_params.estimator = Bunch(fit=fit_params) |
|
if sample_weight is not None and supports_sw: |
|
routed_params.estimator.fit["sample_weight"] = sample_weight |
|
|
|
|
|
|
|
if isinstance(self.cv, int): |
|
n_folds = self.cv |
|
elif hasattr(self.cv, "n_splits"): |
|
n_folds = self.cv.n_splits |
|
else: |
|
n_folds = None |
|
if n_folds and np.any(np.unique(y, return_counts=True)[1] < n_folds): |
|
raise ValueError( |
|
f"Requesting {n_folds}-fold " |
|
"cross-validation but provided less than " |
|
f"{n_folds} examples for at least one class." |
|
) |
|
if isinstance(self.cv, LeaveOneOut): |
|
raise ValueError( |
|
"LeaveOneOut cross-validation does not allow" |
|
"all classes to be present in test splits. " |
|
"Please use a cross-validation generator that allows " |
|
"all classes to appear in every test and train split." |
|
) |
|
cv = check_cv(self.cv, y, classifier=True) |
|
|
|
if _ensemble: |
|
parallel = Parallel(n_jobs=self.n_jobs) |
|
self.calibrated_classifiers_ = parallel( |
|
delayed(_fit_classifier_calibrator_pair)( |
|
clone(estimator), |
|
X, |
|
y, |
|
train=train, |
|
test=test, |
|
method=self.method, |
|
classes=self.classes_, |
|
sample_weight=sample_weight, |
|
fit_params=routed_params.estimator.fit, |
|
) |
|
for train, test in cv.split(X, y, **routed_params.splitter.split) |
|
) |
|
else: |
|
this_estimator = clone(estimator) |
|
method_name = _check_response_method( |
|
this_estimator, |
|
["decision_function", "predict_proba"], |
|
).__name__ |
|
predictions = cross_val_predict( |
|
estimator=this_estimator, |
|
X=X, |
|
y=y, |
|
cv=cv, |
|
method=method_name, |
|
n_jobs=self.n_jobs, |
|
params=routed_params.estimator.fit, |
|
) |
|
if len(self.classes_) == 2: |
|
|
|
if method_name == "predict_proba": |
|
|
|
predictions = _process_predict_proba( |
|
y_pred=predictions, |
|
target_type="binary", |
|
classes=self.classes_, |
|
pos_label=self.classes_[1], |
|
) |
|
predictions = predictions.reshape(-1, 1) |
|
|
|
this_estimator.fit(X, y, **routed_params.estimator.fit) |
|
|
|
|
|
calibrated_classifier = _fit_calibrator( |
|
this_estimator, |
|
predictions, |
|
y, |
|
self.classes_, |
|
self.method, |
|
sample_weight, |
|
) |
|
self.calibrated_classifiers_.append(calibrated_classifier) |
|
|
|
first_clf = self.calibrated_classifiers_[0].estimator |
|
if hasattr(first_clf, "n_features_in_"): |
|
self.n_features_in_ = first_clf.n_features_in_ |
|
if hasattr(first_clf, "feature_names_in_"): |
|
self.feature_names_in_ = first_clf.feature_names_in_ |
|
return self |
|
|
|
def predict_proba(self, X): |
|
"""Calibrated probabilities of classification. |
|
|
|
This function returns calibrated probabilities of classification |
|
according to each class on an array of test vectors X. |
|
|
|
Parameters |
|
---------- |
|
X : array-like of shape (n_samples, n_features) |
|
The samples, as accepted by `estimator.predict_proba`. |
|
|
|
Returns |
|
------- |
|
C : ndarray of shape (n_samples, n_classes) |
|
The predicted probas. |
|
""" |
|
check_is_fitted(self) |
|
|
|
|
|
mean_proba = np.zeros((_num_samples(X), len(self.classes_))) |
|
for calibrated_classifier in self.calibrated_classifiers_: |
|
proba = calibrated_classifier.predict_proba(X) |
|
mean_proba += proba |
|
|
|
mean_proba /= len(self.calibrated_classifiers_) |
|
|
|
return mean_proba |
|
|
|
def predict(self, X): |
|
"""Predict the target of new samples. |
|
|
|
The predicted class is the class that has the highest probability, |
|
and can thus be different from the prediction of the uncalibrated classifier. |
|
|
|
Parameters |
|
---------- |
|
X : array-like of shape (n_samples, n_features) |
|
The samples, as accepted by `estimator.predict`. |
|
|
|
Returns |
|
------- |
|
C : ndarray of shape (n_samples,) |
|
The predicted class. |
|
""" |
|
check_is_fitted(self) |
|
return self.classes_[np.argmax(self.predict_proba(X), axis=1)] |
|
|
|
def get_metadata_routing(self): |
|
"""Get metadata routing of this object. |
|
|
|
Please check :ref:`User Guide <metadata_routing>` on how the routing |
|
mechanism works. |
|
|
|
Returns |
|
------- |
|
routing : MetadataRouter |
|
A :class:`~sklearn.utils.metadata_routing.MetadataRouter` encapsulating |
|
routing information. |
|
""" |
|
router = ( |
|
MetadataRouter(owner=self.__class__.__name__) |
|
.add_self_request(self) |
|
.add( |
|
estimator=self._get_estimator(), |
|
method_mapping=MethodMapping().add(caller="fit", callee="fit"), |
|
) |
|
.add( |
|
splitter=self.cv, |
|
method_mapping=MethodMapping().add(caller="fit", callee="split"), |
|
) |
|
) |
|
return router |
|
|
|
def __sklearn_tags__(self): |
|
tags = super().__sklearn_tags__() |
|
tags.input_tags.sparse = get_tags(self._get_estimator()).input_tags.sparse |
|
return tags |
|
|
|
|
|
def _fit_classifier_calibrator_pair( |
|
estimator, |
|
X, |
|
y, |
|
train, |
|
test, |
|
method, |
|
classes, |
|
sample_weight=None, |
|
fit_params=None, |
|
): |
|
"""Fit a classifier/calibration pair on a given train/test split. |
|
|
|
Fit the classifier on the train set, compute its predictions on the test |
|
set and use the predictions as input to fit the calibrator along with the |
|
test labels. |
|
|
|
Parameters |
|
---------- |
|
estimator : estimator instance |
|
Cloned base estimator. |
|
|
|
X : array-like, shape (n_samples, n_features) |
|
Sample data. |
|
|
|
y : array-like, shape (n_samples,) |
|
Targets. |
|
|
|
train : ndarray, shape (n_train_indices,) |
|
Indices of the training subset. |
|
|
|
test : ndarray, shape (n_test_indices,) |
|
Indices of the testing subset. |
|
|
|
method : {'sigmoid', 'isotonic'} |
|
Method to use for calibration. |
|
|
|
classes : ndarray, shape (n_classes,) |
|
The target classes. |
|
|
|
sample_weight : array-like, default=None |
|
Sample weights for `X`. |
|
|
|
fit_params : dict, default=None |
|
Parameters to pass to the `fit` method of the underlying |
|
classifier. |
|
|
|
Returns |
|
------- |
|
calibrated_classifier : _CalibratedClassifier instance |
|
""" |
|
fit_params_train = _check_method_params(X, params=fit_params, indices=train) |
|
X_train, y_train = _safe_indexing(X, train), _safe_indexing(y, train) |
|
X_test, y_test = _safe_indexing(X, test), _safe_indexing(y, test) |
|
|
|
estimator.fit(X_train, y_train, **fit_params_train) |
|
|
|
predictions, _ = _get_response_values( |
|
estimator, |
|
X_test, |
|
response_method=["decision_function", "predict_proba"], |
|
) |
|
if predictions.ndim == 1: |
|
|
|
predictions = predictions.reshape(-1, 1) |
|
|
|
sw_test = None if sample_weight is None else _safe_indexing(sample_weight, test) |
|
calibrated_classifier = _fit_calibrator( |
|
estimator, predictions, y_test, classes, method, sample_weight=sw_test |
|
) |
|
return calibrated_classifier |
|
|
|
|
|
def _fit_calibrator(clf, predictions, y, classes, method, sample_weight=None): |
|
"""Fit calibrator(s) and return a `_CalibratedClassifier` |
|
instance. |
|
|
|
`n_classes` (i.e. `len(clf.classes_)`) calibrators are fitted. |
|
However, if `n_classes` equals 2, one calibrator is fitted. |
|
|
|
Parameters |
|
---------- |
|
clf : estimator instance |
|
Fitted classifier. |
|
|
|
predictions : array-like, shape (n_samples, n_classes) or (n_samples, 1) \ |
|
when binary. |
|
Raw predictions returned by the un-calibrated base classifier. |
|
|
|
y : array-like, shape (n_samples,) |
|
The targets. |
|
|
|
classes : ndarray, shape (n_classes,) |
|
All the prediction classes. |
|
|
|
method : {'sigmoid', 'isotonic'} |
|
The method to use for calibration. |
|
|
|
sample_weight : ndarray, shape (n_samples,), default=None |
|
Sample weights. If None, then samples are equally weighted. |
|
|
|
Returns |
|
------- |
|
pipeline : _CalibratedClassifier instance |
|
""" |
|
Y = label_binarize(y, classes=classes) |
|
label_encoder = LabelEncoder().fit(classes) |
|
pos_class_indices = label_encoder.transform(clf.classes_) |
|
calibrators = [] |
|
for class_idx, this_pred in zip(pos_class_indices, predictions.T): |
|
if method == "isotonic": |
|
calibrator = IsotonicRegression(out_of_bounds="clip") |
|
else: |
|
calibrator = _SigmoidCalibration() |
|
calibrator.fit(this_pred, Y[:, class_idx], sample_weight) |
|
calibrators.append(calibrator) |
|
|
|
pipeline = _CalibratedClassifier(clf, calibrators, method=method, classes=classes) |
|
return pipeline |
|
|
|
|
|
class _CalibratedClassifier: |
|
"""Pipeline-like chaining a fitted classifier and its fitted calibrators. |
|
|
|
Parameters |
|
---------- |
|
estimator : estimator instance |
|
Fitted classifier. |
|
|
|
calibrators : list of fitted estimator instances |
|
List of fitted calibrators (either 'IsotonicRegression' or |
|
'_SigmoidCalibration'). The number of calibrators equals the number of |
|
classes. However, if there are 2 classes, the list contains only one |
|
fitted calibrator. |
|
|
|
classes : array-like of shape (n_classes,) |
|
All the prediction classes. |
|
|
|
method : {'sigmoid', 'isotonic'}, default='sigmoid' |
|
The method to use for calibration. Can be 'sigmoid' which |
|
corresponds to Platt's method or 'isotonic' which is a |
|
non-parametric approach based on isotonic regression. |
|
""" |
|
|
|
def __init__(self, estimator, calibrators, *, classes, method="sigmoid"): |
|
self.estimator = estimator |
|
self.calibrators = calibrators |
|
self.classes = classes |
|
self.method = method |
|
|
|
def predict_proba(self, X): |
|
"""Calculate calibrated probabilities. |
|
|
|
Calculates classification calibrated probabilities |
|
for each class, in a one-vs-all manner, for `X`. |
|
|
|
Parameters |
|
---------- |
|
X : ndarray of shape (n_samples, n_features) |
|
The sample data. |
|
|
|
Returns |
|
------- |
|
proba : array, shape (n_samples, n_classes) |
|
The predicted probabilities. Can be exact zeros. |
|
""" |
|
predictions, _ = _get_response_values( |
|
self.estimator, |
|
X, |
|
response_method=["decision_function", "predict_proba"], |
|
) |
|
if predictions.ndim == 1: |
|
|
|
predictions = predictions.reshape(-1, 1) |
|
|
|
n_classes = len(self.classes) |
|
|
|
label_encoder = LabelEncoder().fit(self.classes) |
|
pos_class_indices = label_encoder.transform(self.estimator.classes_) |
|
|
|
proba = np.zeros((_num_samples(X), n_classes)) |
|
for class_idx, this_pred, calibrator in zip( |
|
pos_class_indices, predictions.T, self.calibrators |
|
): |
|
if n_classes == 2: |
|
|
|
|
|
class_idx += 1 |
|
proba[:, class_idx] = calibrator.predict(this_pred) |
|
|
|
|
|
if n_classes == 2: |
|
proba[:, 0] = 1.0 - proba[:, 1] |
|
else: |
|
denominator = np.sum(proba, axis=1)[:, np.newaxis] |
|
|
|
|
|
|
|
uniform_proba = np.full_like(proba, 1 / n_classes) |
|
proba = np.divide( |
|
proba, denominator, out=uniform_proba, where=denominator != 0 |
|
) |
|
|
|
|
|
proba[(1.0 < proba) & (proba <= 1.0 + 1e-5)] = 1.0 |
|
|
|
return proba |
|
|
|
|
|
|
|
|
|
def _sigmoid_calibration( |
|
predictions, y, sample_weight=None, max_abs_prediction_threshold=30 |
|
): |
|
"""Probability Calibration with sigmoid method (Platt 2000) |
|
|
|
Parameters |
|
---------- |
|
predictions : ndarray of shape (n_samples,) |
|
The decision function or predict proba for the samples. |
|
|
|
y : ndarray of shape (n_samples,) |
|
The targets. |
|
|
|
sample_weight : array-like of shape (n_samples,), default=None |
|
Sample weights. If None, then samples are equally weighted. |
|
|
|
Returns |
|
------- |
|
a : float |
|
The slope. |
|
|
|
b : float |
|
The intercept. |
|
|
|
References |
|
---------- |
|
Platt, "Probabilistic Outputs for Support Vector Machines" |
|
""" |
|
predictions = column_or_1d(predictions) |
|
y = column_or_1d(y) |
|
|
|
F = predictions |
|
|
|
scale_constant = 1.0 |
|
max_prediction = np.max(np.abs(F)) |
|
|
|
|
|
|
|
|
|
|
|
|
|
if max_prediction >= max_abs_prediction_threshold: |
|
scale_constant = max_prediction |
|
|
|
|
|
F = F / scale_constant |
|
|
|
|
|
|
|
|
|
mask_negative_samples = y <= 0 |
|
if sample_weight is not None: |
|
prior0 = (sample_weight[mask_negative_samples]).sum() |
|
prior1 = (sample_weight[~mask_negative_samples]).sum() |
|
else: |
|
prior0 = float(np.sum(mask_negative_samples)) |
|
prior1 = y.shape[0] - prior0 |
|
T = np.zeros_like(y, dtype=predictions.dtype) |
|
T[y > 0] = (prior1 + 1.0) / (prior1 + 2.0) |
|
T[y <= 0] = 1.0 / (prior0 + 2.0) |
|
|
|
bin_loss = HalfBinomialLoss() |
|
|
|
def loss_grad(AB): |
|
|
|
|
|
|
|
|
|
raw_prediction = -(AB[0] * F + AB[1]).astype(dtype=predictions.dtype) |
|
l, g = bin_loss.loss_gradient( |
|
y_true=T, |
|
raw_prediction=raw_prediction, |
|
sample_weight=sample_weight, |
|
) |
|
loss = l.sum() |
|
|
|
|
|
|
|
|
|
grad = np.asarray([-g @ F, -g.sum()], dtype=np.float64) |
|
return loss, grad |
|
|
|
AB0 = np.array([0.0, log((prior0 + 1.0) / (prior1 + 1.0))]) |
|
|
|
opt_result = minimize( |
|
loss_grad, |
|
AB0, |
|
method="L-BFGS-B", |
|
jac=True, |
|
options={ |
|
"gtol": 1e-6, |
|
"ftol": 64 * np.finfo(float).eps, |
|
}, |
|
) |
|
AB_ = opt_result.x |
|
|
|
|
|
|
|
|
|
return AB_[0] / scale_constant, AB_[1] |
|
|
|
|
|
class _SigmoidCalibration(RegressorMixin, BaseEstimator): |
|
"""Sigmoid regression model. |
|
|
|
Attributes |
|
---------- |
|
a_ : float |
|
The slope. |
|
|
|
b_ : float |
|
The intercept. |
|
""" |
|
|
|
def fit(self, X, y, sample_weight=None): |
|
"""Fit the model using X, y as training data. |
|
|
|
Parameters |
|
---------- |
|
X : array-like of shape (n_samples,) |
|
Training data. |
|
|
|
y : array-like of shape (n_samples,) |
|
Training target. |
|
|
|
sample_weight : array-like of shape (n_samples,), default=None |
|
Sample weights. If None, then samples are equally weighted. |
|
|
|
Returns |
|
------- |
|
self : object |
|
Returns an instance of self. |
|
""" |
|
X = column_or_1d(X) |
|
y = column_or_1d(y) |
|
X, y = indexable(X, y) |
|
|
|
self.a_, self.b_ = _sigmoid_calibration(X, y, sample_weight) |
|
return self |
|
|
|
def predict(self, T): |
|
"""Predict new data by linear interpolation. |
|
|
|
Parameters |
|
---------- |
|
T : array-like of shape (n_samples,) |
|
Data to predict from. |
|
|
|
Returns |
|
------- |
|
T_ : ndarray of shape (n_samples,) |
|
The predicted data. |
|
""" |
|
T = column_or_1d(T) |
|
return expit(-(self.a_ * T + self.b_)) |
|
|
|
|
|
@validate_params( |
|
{ |
|
"y_true": ["array-like"], |
|
"y_prob": ["array-like"], |
|
"pos_label": [Real, str, "boolean", None], |
|
"n_bins": [Interval(Integral, 1, None, closed="left")], |
|
"strategy": [StrOptions({"uniform", "quantile"})], |
|
}, |
|
prefer_skip_nested_validation=True, |
|
) |
|
def calibration_curve( |
|
y_true, |
|
y_prob, |
|
*, |
|
pos_label=None, |
|
n_bins=5, |
|
strategy="uniform", |
|
): |
|
"""Compute true and predicted probabilities for a calibration curve. |
|
|
|
The method assumes the inputs come from a binary classifier, and |
|
discretize the [0, 1] interval into bins. |
|
|
|
Calibration curves may also be referred to as reliability diagrams. |
|
|
|
Read more in the :ref:`User Guide <calibration>`. |
|
|
|
Parameters |
|
---------- |
|
y_true : array-like of shape (n_samples,) |
|
True targets. |
|
|
|
y_prob : array-like of shape (n_samples,) |
|
Probabilities of the positive class. |
|
|
|
pos_label : int, float, bool or str, default=None |
|
The label of the positive class. |
|
|
|
.. versionadded:: 1.1 |
|
|
|
n_bins : int, default=5 |
|
Number of bins to discretize the [0, 1] interval. A bigger number |
|
requires more data. Bins with no samples (i.e. without |
|
corresponding values in `y_prob`) will not be returned, thus the |
|
returned arrays may have less than `n_bins` values. |
|
|
|
strategy : {'uniform', 'quantile'}, default='uniform' |
|
Strategy used to define the widths of the bins. |
|
|
|
uniform |
|
The bins have identical widths. |
|
quantile |
|
The bins have the same number of samples and depend on `y_prob`. |
|
|
|
Returns |
|
------- |
|
prob_true : ndarray of shape (n_bins,) or smaller |
|
The proportion of samples whose class is the positive class, in each |
|
bin (fraction of positives). |
|
|
|
prob_pred : ndarray of shape (n_bins,) or smaller |
|
The mean predicted probability in each bin. |
|
|
|
References |
|
---------- |
|
Alexandru Niculescu-Mizil and Rich Caruana (2005) Predicting Good |
|
Probabilities With Supervised Learning, in Proceedings of the 22nd |
|
International Conference on Machine Learning (ICML). |
|
See section 4 (Qualitative Analysis of Predictions). |
|
|
|
Examples |
|
-------- |
|
>>> import numpy as np |
|
>>> from sklearn.calibration import calibration_curve |
|
>>> y_true = np.array([0, 0, 0, 0, 1, 1, 1, 1, 1]) |
|
>>> y_pred = np.array([0.1, 0.2, 0.3, 0.4, 0.65, 0.7, 0.8, 0.9, 1.]) |
|
>>> prob_true, prob_pred = calibration_curve(y_true, y_pred, n_bins=3) |
|
>>> prob_true |
|
array([0. , 0.5, 1. ]) |
|
>>> prob_pred |
|
array([0.2 , 0.525, 0.85 ]) |
|
""" |
|
y_true = column_or_1d(y_true) |
|
y_prob = column_or_1d(y_prob) |
|
check_consistent_length(y_true, y_prob) |
|
pos_label = _check_pos_label_consistency(pos_label, y_true) |
|
|
|
if y_prob.min() < 0 or y_prob.max() > 1: |
|
raise ValueError("y_prob has values outside [0, 1].") |
|
|
|
labels = np.unique(y_true) |
|
if len(labels) > 2: |
|
raise ValueError( |
|
f"Only binary classification is supported. Provided labels {labels}." |
|
) |
|
y_true = y_true == pos_label |
|
|
|
if strategy == "quantile": |
|
quantiles = np.linspace(0, 1, n_bins + 1) |
|
bins = np.percentile(y_prob, quantiles * 100) |
|
elif strategy == "uniform": |
|
bins = np.linspace(0.0, 1.0, n_bins + 1) |
|
else: |
|
raise ValueError( |
|
"Invalid entry to 'strategy' input. Strategy " |
|
"must be either 'quantile' or 'uniform'." |
|
) |
|
|
|
binids = np.searchsorted(bins[1:-1], y_prob) |
|
|
|
bin_sums = np.bincount(binids, weights=y_prob, minlength=len(bins)) |
|
bin_true = np.bincount(binids, weights=y_true, minlength=len(bins)) |
|
bin_total = np.bincount(binids, minlength=len(bins)) |
|
|
|
nonzero = bin_total != 0 |
|
prob_true = bin_true[nonzero] / bin_total[nonzero] |
|
prob_pred = bin_sums[nonzero] / bin_total[nonzero] |
|
|
|
return prob_true, prob_pred |
|
|
|
|
|
class CalibrationDisplay(_BinaryClassifierCurveDisplayMixin): |
|
"""Calibration curve (also known as reliability diagram) visualization. |
|
|
|
It is recommended to use |
|
:func:`~sklearn.calibration.CalibrationDisplay.from_estimator` or |
|
:func:`~sklearn.calibration.CalibrationDisplay.from_predictions` |
|
to create a `CalibrationDisplay`. All parameters are stored as attributes. |
|
|
|
Read more about calibration in the :ref:`User Guide <calibration>` and |
|
more about the scikit-learn visualization API in :ref:`visualizations`. |
|
|
|
For an example on how to use the visualization, see |
|
:ref:`sphx_glr_auto_examples_calibration_plot_calibration_curve.py`. |
|
|
|
.. versionadded:: 1.0 |
|
|
|
Parameters |
|
---------- |
|
prob_true : ndarray of shape (n_bins,) |
|
The proportion of samples whose class is the positive class (fraction |
|
of positives), in each bin. |
|
|
|
prob_pred : ndarray of shape (n_bins,) |
|
The mean predicted probability in each bin. |
|
|
|
y_prob : ndarray of shape (n_samples,) |
|
Probability estimates for the positive class, for each sample. |
|
|
|
estimator_name : str, default=None |
|
Name of estimator. If None, the estimator name is not shown. |
|
|
|
pos_label : int, float, bool or str, default=None |
|
The positive class when computing the calibration curve. |
|
By default, `pos_label` is set to `estimators.classes_[1]` when using |
|
`from_estimator` and set to 1 when using `from_predictions`. |
|
|
|
.. versionadded:: 1.1 |
|
|
|
Attributes |
|
---------- |
|
line_ : matplotlib Artist |
|
Calibration curve. |
|
|
|
ax_ : matplotlib Axes |
|
Axes with calibration curve. |
|
|
|
figure_ : matplotlib Figure |
|
Figure containing the curve. |
|
|
|
See Also |
|
-------- |
|
calibration_curve : Compute true and predicted probabilities for a |
|
calibration curve. |
|
CalibrationDisplay.from_predictions : Plot calibration curve using true |
|
and predicted labels. |
|
CalibrationDisplay.from_estimator : Plot calibration curve using an |
|
estimator and data. |
|
|
|
Examples |
|
-------- |
|
>>> from sklearn.datasets import make_classification |
|
>>> from sklearn.model_selection import train_test_split |
|
>>> from sklearn.linear_model import LogisticRegression |
|
>>> from sklearn.calibration import calibration_curve, CalibrationDisplay |
|
>>> X, y = make_classification(random_state=0) |
|
>>> X_train, X_test, y_train, y_test = train_test_split( |
|
... X, y, random_state=0) |
|
>>> clf = LogisticRegression(random_state=0) |
|
>>> clf.fit(X_train, y_train) |
|
LogisticRegression(random_state=0) |
|
>>> y_prob = clf.predict_proba(X_test)[:, 1] |
|
>>> prob_true, prob_pred = calibration_curve(y_test, y_prob, n_bins=10) |
|
>>> disp = CalibrationDisplay(prob_true, prob_pred, y_prob) |
|
>>> disp.plot() |
|
<...> |
|
""" |
|
|
|
def __init__( |
|
self, prob_true, prob_pred, y_prob, *, estimator_name=None, pos_label=None |
|
): |
|
self.prob_true = prob_true |
|
self.prob_pred = prob_pred |
|
self.y_prob = y_prob |
|
self.estimator_name = estimator_name |
|
self.pos_label = pos_label |
|
|
|
def plot(self, *, ax=None, name=None, ref_line=True, **kwargs): |
|
"""Plot visualization. |
|
|
|
Extra keyword arguments will be passed to |
|
:func:`matplotlib.pyplot.plot`. |
|
|
|
Parameters |
|
---------- |
|
ax : Matplotlib Axes, default=None |
|
Axes object to plot on. If `None`, a new figure and axes is |
|
created. |
|
|
|
name : str, default=None |
|
Name for labeling curve. If `None`, use `estimator_name` if |
|
not `None`, otherwise no labeling is shown. |
|
|
|
ref_line : bool, default=True |
|
If `True`, plots a reference line representing a perfectly |
|
calibrated classifier. |
|
|
|
**kwargs : dict |
|
Keyword arguments to be passed to :func:`matplotlib.pyplot.plot`. |
|
|
|
Returns |
|
------- |
|
display : :class:`~sklearn.calibration.CalibrationDisplay` |
|
Object that stores computed values. |
|
""" |
|
self.ax_, self.figure_, name = self._validate_plot_params(ax=ax, name=name) |
|
|
|
info_pos_label = ( |
|
f"(Positive class: {self.pos_label})" if self.pos_label is not None else "" |
|
) |
|
|
|
default_line_kwargs = {"marker": "s", "linestyle": "-"} |
|
if name is not None: |
|
default_line_kwargs["label"] = name |
|
line_kwargs = _validate_style_kwargs(default_line_kwargs, kwargs) |
|
|
|
ref_line_label = "Perfectly calibrated" |
|
existing_ref_line = ref_line_label in self.ax_.get_legend_handles_labels()[1] |
|
if ref_line and not existing_ref_line: |
|
self.ax_.plot([0, 1], [0, 1], "k:", label=ref_line_label) |
|
self.line_ = self.ax_.plot(self.prob_pred, self.prob_true, **line_kwargs)[0] |
|
|
|
|
|
self.ax_.legend(loc="lower right") |
|
|
|
xlabel = f"Mean predicted probability {info_pos_label}" |
|
ylabel = f"Fraction of positives {info_pos_label}" |
|
self.ax_.set(xlabel=xlabel, ylabel=ylabel) |
|
|
|
return self |
|
|
|
@classmethod |
|
def from_estimator( |
|
cls, |
|
estimator, |
|
X, |
|
y, |
|
*, |
|
n_bins=5, |
|
strategy="uniform", |
|
pos_label=None, |
|
name=None, |
|
ref_line=True, |
|
ax=None, |
|
**kwargs, |
|
): |
|
"""Plot calibration curve using a binary classifier and data. |
|
|
|
A calibration curve, also known as a reliability diagram, uses inputs |
|
from a binary classifier and plots the average predicted probability |
|
for each bin against the fraction of positive classes, on the |
|
y-axis. |
|
|
|
Extra keyword arguments will be passed to |
|
:func:`matplotlib.pyplot.plot`. |
|
|
|
Read more about calibration in the :ref:`User Guide <calibration>` and |
|
more about the scikit-learn visualization API in :ref:`visualizations`. |
|
|
|
.. versionadded:: 1.0 |
|
|
|
Parameters |
|
---------- |
|
estimator : estimator instance |
|
Fitted classifier or a fitted :class:`~sklearn.pipeline.Pipeline` |
|
in which the last estimator is a classifier. The classifier must |
|
have a :term:`predict_proba` method. |
|
|
|
X : {array-like, sparse matrix} of shape (n_samples, n_features) |
|
Input values. |
|
|
|
y : array-like of shape (n_samples,) |
|
Binary target values. |
|
|
|
n_bins : int, default=5 |
|
Number of bins to discretize the [0, 1] interval into when |
|
calculating the calibration curve. A bigger number requires more |
|
data. |
|
|
|
strategy : {'uniform', 'quantile'}, default='uniform' |
|
Strategy used to define the widths of the bins. |
|
|
|
- `'uniform'`: The bins have identical widths. |
|
- `'quantile'`: The bins have the same number of samples and depend |
|
on predicted probabilities. |
|
|
|
pos_label : int, float, bool or str, default=None |
|
The positive class when computing the calibration curve. |
|
By default, `estimators.classes_[1]` is considered as the |
|
positive class. |
|
|
|
.. versionadded:: 1.1 |
|
|
|
name : str, default=None |
|
Name for labeling curve. If `None`, the name of the estimator is |
|
used. |
|
|
|
ref_line : bool, default=True |
|
If `True`, plots a reference line representing a perfectly |
|
calibrated classifier. |
|
|
|
ax : matplotlib axes, default=None |
|
Axes object to plot on. If `None`, a new figure and axes is |
|
created. |
|
|
|
**kwargs : dict |
|
Keyword arguments to be passed to :func:`matplotlib.pyplot.plot`. |
|
|
|
Returns |
|
------- |
|
display : :class:`~sklearn.calibration.CalibrationDisplay`. |
|
Object that stores computed values. |
|
|
|
See Also |
|
-------- |
|
CalibrationDisplay.from_predictions : Plot calibration curve using true |
|
and predicted labels. |
|
|
|
Examples |
|
-------- |
|
>>> import matplotlib.pyplot as plt |
|
>>> from sklearn.datasets import make_classification |
|
>>> from sklearn.model_selection import train_test_split |
|
>>> from sklearn.linear_model import LogisticRegression |
|
>>> from sklearn.calibration import CalibrationDisplay |
|
>>> X, y = make_classification(random_state=0) |
|
>>> X_train, X_test, y_train, y_test = train_test_split( |
|
... X, y, random_state=0) |
|
>>> clf = LogisticRegression(random_state=0) |
|
>>> clf.fit(X_train, y_train) |
|
LogisticRegression(random_state=0) |
|
>>> disp = CalibrationDisplay.from_estimator(clf, X_test, y_test) |
|
>>> plt.show() |
|
""" |
|
y_prob, pos_label, name = cls._validate_and_get_response_values( |
|
estimator, |
|
X, |
|
y, |
|
response_method="predict_proba", |
|
pos_label=pos_label, |
|
name=name, |
|
) |
|
|
|
return cls.from_predictions( |
|
y, |
|
y_prob, |
|
n_bins=n_bins, |
|
strategy=strategy, |
|
pos_label=pos_label, |
|
name=name, |
|
ref_line=ref_line, |
|
ax=ax, |
|
**kwargs, |
|
) |
|
|
|
@classmethod |
|
def from_predictions( |
|
cls, |
|
y_true, |
|
y_prob, |
|
*, |
|
n_bins=5, |
|
strategy="uniform", |
|
pos_label=None, |
|
name=None, |
|
ref_line=True, |
|
ax=None, |
|
**kwargs, |
|
): |
|
"""Plot calibration curve using true labels and predicted probabilities. |
|
|
|
Calibration curve, also known as reliability diagram, uses inputs |
|
from a binary classifier and plots the average predicted probability |
|
for each bin against the fraction of positive classes, on the |
|
y-axis. |
|
|
|
Extra keyword arguments will be passed to |
|
:func:`matplotlib.pyplot.plot`. |
|
|
|
Read more about calibration in the :ref:`User Guide <calibration>` and |
|
more about the scikit-learn visualization API in :ref:`visualizations`. |
|
|
|
.. versionadded:: 1.0 |
|
|
|
Parameters |
|
---------- |
|
y_true : array-like of shape (n_samples,) |
|
True labels. |
|
|
|
y_prob : array-like of shape (n_samples,) |
|
The predicted probabilities of the positive class. |
|
|
|
n_bins : int, default=5 |
|
Number of bins to discretize the [0, 1] interval into when |
|
calculating the calibration curve. A bigger number requires more |
|
data. |
|
|
|
strategy : {'uniform', 'quantile'}, default='uniform' |
|
Strategy used to define the widths of the bins. |
|
|
|
- `'uniform'`: The bins have identical widths. |
|
- `'quantile'`: The bins have the same number of samples and depend |
|
on predicted probabilities. |
|
|
|
pos_label : int, float, bool or str, default=None |
|
The positive class when computing the calibration curve. |
|
By default `pos_label` is set to 1. |
|
|
|
.. versionadded:: 1.1 |
|
|
|
name : str, default=None |
|
Name for labeling curve. |
|
|
|
ref_line : bool, default=True |
|
If `True`, plots a reference line representing a perfectly |
|
calibrated classifier. |
|
|
|
ax : matplotlib axes, default=None |
|
Axes object to plot on. If `None`, a new figure and axes is |
|
created. |
|
|
|
**kwargs : dict |
|
Keyword arguments to be passed to :func:`matplotlib.pyplot.plot`. |
|
|
|
Returns |
|
------- |
|
display : :class:`~sklearn.calibration.CalibrationDisplay`. |
|
Object that stores computed values. |
|
|
|
See Also |
|
-------- |
|
CalibrationDisplay.from_estimator : Plot calibration curve using an |
|
estimator and data. |
|
|
|
Examples |
|
-------- |
|
>>> import matplotlib.pyplot as plt |
|
>>> from sklearn.datasets import make_classification |
|
>>> from sklearn.model_selection import train_test_split |
|
>>> from sklearn.linear_model import LogisticRegression |
|
>>> from sklearn.calibration import CalibrationDisplay |
|
>>> X, y = make_classification(random_state=0) |
|
>>> X_train, X_test, y_train, y_test = train_test_split( |
|
... X, y, random_state=0) |
|
>>> clf = LogisticRegression(random_state=0) |
|
>>> clf.fit(X_train, y_train) |
|
LogisticRegression(random_state=0) |
|
>>> y_prob = clf.predict_proba(X_test)[:, 1] |
|
>>> disp = CalibrationDisplay.from_predictions(y_test, y_prob) |
|
>>> plt.show() |
|
""" |
|
pos_label_validated, name = cls._validate_from_predictions_params( |
|
y_true, y_prob, sample_weight=None, pos_label=pos_label, name=name |
|
) |
|
|
|
prob_true, prob_pred = calibration_curve( |
|
y_true, y_prob, n_bins=n_bins, strategy=strategy, pos_label=pos_label |
|
) |
|
|
|
disp = cls( |
|
prob_true=prob_true, |
|
prob_pred=prob_pred, |
|
y_prob=y_prob, |
|
estimator_name=name, |
|
pos_label=pos_label_validated, |
|
) |
|
return disp.plot(ax=ax, ref_line=ref_line, **kwargs) |
|
|