|
|
|
|
|
|
|
import numpy as np |
|
|
|
from ..base import BaseEstimator, ClassifierMixin |
|
from ..utils._metadata_requests import RequestMethod |
|
from .metaestimators import available_if |
|
from .validation import ( |
|
_check_sample_weight, |
|
_num_samples, |
|
check_array, |
|
check_is_fitted, |
|
check_random_state, |
|
) |
|
|
|
|
|
class ArraySlicingWrapper: |
|
""" |
|
Parameters |
|
---------- |
|
array |
|
""" |
|
|
|
def __init__(self, array): |
|
self.array = array |
|
|
|
def __getitem__(self, aslice): |
|
return MockDataFrame(self.array[aslice]) |
|
|
|
|
|
class MockDataFrame: |
|
""" |
|
Parameters |
|
---------- |
|
array |
|
""" |
|
|
|
|
|
|
|
def __init__(self, array): |
|
self.array = array |
|
self.values = array |
|
self.shape = array.shape |
|
self.ndim = array.ndim |
|
|
|
self.iloc = ArraySlicingWrapper(array) |
|
|
|
def __len__(self): |
|
return len(self.array) |
|
|
|
def __array__(self, dtype=None): |
|
|
|
|
|
|
|
return self.array |
|
|
|
def __eq__(self, other): |
|
return MockDataFrame(self.array == other.array) |
|
|
|
def __ne__(self, other): |
|
return not self == other |
|
|
|
def take(self, indices, axis=0): |
|
return MockDataFrame(self.array.take(indices, axis=axis)) |
|
|
|
|
|
class CheckingClassifier(ClassifierMixin, BaseEstimator): |
|
"""Dummy classifier to test pipelining and meta-estimators. |
|
|
|
Checks some property of `X` and `y`in fit / predict. |
|
This allows testing whether pipelines / cross-validation or metaestimators |
|
changed the input. |
|
|
|
Can also be used to check if `fit_params` are passed correctly, and |
|
to force a certain score to be returned. |
|
|
|
Parameters |
|
---------- |
|
check_y, check_X : callable, default=None |
|
The callable used to validate `X` and `y`. These callable should return |
|
a bool where `False` will trigger an `AssertionError`. If `None`, the |
|
data is not validated. Default is `None`. |
|
|
|
check_y_params, check_X_params : dict, default=None |
|
The optional parameters to pass to `check_X` and `check_y`. If `None`, |
|
then no parameters are passed in. |
|
|
|
methods_to_check : "all" or list of str, default="all" |
|
The methods in which the checks should be applied. By default, |
|
all checks will be done on all methods (`fit`, `predict`, |
|
`predict_proba`, `decision_function` and `score`). |
|
|
|
foo_param : int, default=0 |
|
A `foo` param. When `foo > 1`, the output of :meth:`score` will be 1 |
|
otherwise it is 0. |
|
|
|
expected_sample_weight : bool, default=False |
|
Whether to check if a valid `sample_weight` was passed to `fit`. |
|
|
|
expected_fit_params : list of str, default=None |
|
A list of the expected parameters given when calling `fit`. |
|
|
|
Attributes |
|
---------- |
|
classes_ : int |
|
The classes seen during `fit`. |
|
|
|
n_features_in_ : int |
|
The number of features seen during `fit`. |
|
|
|
Examples |
|
-------- |
|
>>> from sklearn.utils._mocking import CheckingClassifier |
|
|
|
This helper allow to assert to specificities regarding `X` or `y`. In this |
|
case we expect `check_X` or `check_y` to return a boolean. |
|
|
|
>>> from sklearn.datasets import load_iris |
|
>>> X, y = load_iris(return_X_y=True) |
|
>>> clf = CheckingClassifier(check_X=lambda x: x.shape == (150, 4)) |
|
>>> clf.fit(X, y) |
|
CheckingClassifier(...) |
|
|
|
We can also provide a check which might raise an error. In this case, we |
|
expect `check_X` to return `X` and `check_y` to return `y`. |
|
|
|
>>> from sklearn.utils import check_array |
|
>>> clf = CheckingClassifier(check_X=check_array) |
|
>>> clf.fit(X, y) |
|
CheckingClassifier(...) |
|
""" |
|
|
|
def __init__( |
|
self, |
|
*, |
|
check_y=None, |
|
check_y_params=None, |
|
check_X=None, |
|
check_X_params=None, |
|
methods_to_check="all", |
|
foo_param=0, |
|
expected_sample_weight=None, |
|
expected_fit_params=None, |
|
random_state=None, |
|
): |
|
self.check_y = check_y |
|
self.check_y_params = check_y_params |
|
self.check_X = check_X |
|
self.check_X_params = check_X_params |
|
self.methods_to_check = methods_to_check |
|
self.foo_param = foo_param |
|
self.expected_sample_weight = expected_sample_weight |
|
self.expected_fit_params = expected_fit_params |
|
self.random_state = random_state |
|
|
|
def _check_X_y(self, X, y=None, should_be_fitted=True): |
|
"""Validate X and y and make extra check. |
|
|
|
Parameters |
|
---------- |
|
X : array-like of shape (n_samples, n_features) |
|
The data set. |
|
`X` is checked only if `check_X` is not `None` (default is None). |
|
y : array-like of shape (n_samples), default=None |
|
The corresponding target, by default `None`. |
|
`y` is checked only if `check_y` is not `None` (default is None). |
|
should_be_fitted : bool, default=True |
|
Whether or not the classifier should be already fitted. |
|
By default True. |
|
|
|
Returns |
|
------- |
|
X, y |
|
""" |
|
if should_be_fitted: |
|
check_is_fitted(self) |
|
if self.check_X is not None: |
|
params = {} if self.check_X_params is None else self.check_X_params |
|
checked_X = self.check_X(X, **params) |
|
if isinstance(checked_X, (bool, np.bool_)): |
|
assert checked_X |
|
else: |
|
X = checked_X |
|
if y is not None and self.check_y is not None: |
|
params = {} if self.check_y_params is None else self.check_y_params |
|
checked_y = self.check_y(y, **params) |
|
if isinstance(checked_y, (bool, np.bool_)): |
|
assert checked_y |
|
else: |
|
y = checked_y |
|
return X, y |
|
|
|
def fit(self, X, y, sample_weight=None, **fit_params): |
|
"""Fit classifier. |
|
|
|
Parameters |
|
---------- |
|
X : array-like of shape (n_samples, n_features) |
|
Training vector, where `n_samples` is the number of samples and |
|
`n_features` is the number of features. |
|
|
|
y : array-like of shape (n_samples, n_outputs) or (n_samples,), \ |
|
default=None |
|
Target relative to X for classification or regression; |
|
None for unsupervised learning. |
|
|
|
sample_weight : array-like of shape (n_samples,), default=None |
|
Sample weights. If None, then samples are equally weighted. |
|
|
|
**fit_params : dict of string -> object |
|
Parameters passed to the ``fit`` method of the estimator |
|
|
|
Returns |
|
------- |
|
self |
|
""" |
|
assert _num_samples(X) == _num_samples(y) |
|
if self.methods_to_check == "all" or "fit" in self.methods_to_check: |
|
X, y = self._check_X_y(X, y, should_be_fitted=False) |
|
self.n_features_in_ = np.shape(X)[1] |
|
self.classes_ = np.unique(check_array(y, ensure_2d=False, allow_nd=True)) |
|
if self.expected_fit_params: |
|
missing = set(self.expected_fit_params) - set(fit_params) |
|
if missing: |
|
raise AssertionError( |
|
f"Expected fit parameter(s) {list(missing)} not seen." |
|
) |
|
for key, value in fit_params.items(): |
|
if _num_samples(value) != _num_samples(X): |
|
raise AssertionError( |
|
f"Fit parameter {key} has length {_num_samples(value)}" |
|
f"; expected {_num_samples(X)}." |
|
) |
|
if self.expected_sample_weight: |
|
if sample_weight is None: |
|
raise AssertionError("Expected sample_weight to be passed") |
|
_check_sample_weight(sample_weight, X) |
|
|
|
return self |
|
|
|
def predict(self, X): |
|
"""Predict the first class seen in `classes_`. |
|
|
|
Parameters |
|
---------- |
|
X : array-like of shape (n_samples, n_features) |
|
The input data. |
|
|
|
Returns |
|
------- |
|
preds : ndarray of shape (n_samples,) |
|
Predictions of the first class seens in `classes_`. |
|
""" |
|
if self.methods_to_check == "all" or "predict" in self.methods_to_check: |
|
X, y = self._check_X_y(X) |
|
rng = check_random_state(self.random_state) |
|
return rng.choice(self.classes_, size=_num_samples(X)) |
|
|
|
def predict_proba(self, X): |
|
"""Predict probabilities for each class. |
|
|
|
Here, the dummy classifier will provide a probability of 1 for the |
|
first class of `classes_` and 0 otherwise. |
|
|
|
Parameters |
|
---------- |
|
X : array-like of shape (n_samples, n_features) |
|
The input data. |
|
|
|
Returns |
|
------- |
|
proba : ndarray of shape (n_samples, n_classes) |
|
The probabilities for each sample and class. |
|
""" |
|
if self.methods_to_check == "all" or "predict_proba" in self.methods_to_check: |
|
X, y = self._check_X_y(X) |
|
rng = check_random_state(self.random_state) |
|
proba = rng.randn(_num_samples(X), len(self.classes_)) |
|
proba = np.abs(proba, out=proba) |
|
proba /= np.sum(proba, axis=1)[:, np.newaxis] |
|
return proba |
|
|
|
def decision_function(self, X): |
|
"""Confidence score. |
|
|
|
Parameters |
|
---------- |
|
X : array-like of shape (n_samples, n_features) |
|
The input data. |
|
|
|
Returns |
|
------- |
|
decision : ndarray of shape (n_samples,) if n_classes == 2\ |
|
else (n_samples, n_classes) |
|
Confidence score. |
|
""" |
|
if ( |
|
self.methods_to_check == "all" |
|
or "decision_function" in self.methods_to_check |
|
): |
|
X, y = self._check_X_y(X) |
|
rng = check_random_state(self.random_state) |
|
if len(self.classes_) == 2: |
|
|
|
|
|
return rng.randn(_num_samples(X)) |
|
else: |
|
return rng.randn(_num_samples(X), len(self.classes_)) |
|
|
|
def score(self, X=None, Y=None): |
|
"""Fake score. |
|
|
|
Parameters |
|
---------- |
|
X : array-like of shape (n_samples, n_features) |
|
Input data, where `n_samples` is the number of samples and |
|
`n_features` is the number of features. |
|
|
|
Y : array-like of shape (n_samples, n_output) or (n_samples,) |
|
Target relative to X for classification or regression; |
|
None for unsupervised learning. |
|
|
|
Returns |
|
------- |
|
score : float |
|
Either 0 or 1 depending of `foo_param` (i.e. `foo_param > 1 => |
|
score=1` otherwise `score=0`). |
|
""" |
|
if self.methods_to_check == "all" or "score" in self.methods_to_check: |
|
self._check_X_y(X, Y) |
|
if self.foo_param > 1: |
|
score = 1.0 |
|
else: |
|
score = 0.0 |
|
return score |
|
|
|
def __sklearn_tags__(self): |
|
tags = super().__sklearn_tags__() |
|
tags._skip_test = True |
|
tags.input_tags.two_d_array = False |
|
tags.target_tags.one_d_labels = True |
|
return tags |
|
|
|
|
|
|
|
|
|
|
|
CheckingClassifier.set_fit_request = RequestMethod( |
|
name="fit", keys=[], validate_keys=False |
|
) |
|
|
|
|
|
class NoSampleWeightWrapper(BaseEstimator): |
|
"""Wrap estimator which will not expose `sample_weight`. |
|
|
|
Parameters |
|
---------- |
|
est : estimator, default=None |
|
The estimator to wrap. |
|
""" |
|
|
|
def __init__(self, est=None): |
|
self.est = est |
|
|
|
def fit(self, X, y): |
|
return self.est.fit(X, y) |
|
|
|
def predict(self, X): |
|
return self.est.predict(X) |
|
|
|
def predict_proba(self, X): |
|
return self.est.predict_proba(X) |
|
|
|
def __sklearn_tags__(self): |
|
tags = super().__sklearn_tags__() |
|
tags._skip_test = True |
|
return tags |
|
|
|
|
|
def _check_response(method): |
|
def check(self): |
|
return self.response_methods is not None and method in self.response_methods |
|
|
|
return check |
|
|
|
|
|
class _MockEstimatorOnOffPrediction(BaseEstimator): |
|
"""Estimator for which we can turn on/off the prediction methods. |
|
|
|
Parameters |
|
---------- |
|
response_methods: list of \ |
|
{"predict", "predict_proba", "decision_function"}, default=None |
|
List containing the response implemented by the estimator. When, the |
|
response is in the list, it will return the name of the response method |
|
when called. Otherwise, an `AttributeError` is raised. It allows to |
|
use `getattr` as any conventional estimator. By default, no response |
|
methods are mocked. |
|
""" |
|
|
|
def __init__(self, response_methods=None): |
|
self.response_methods = response_methods |
|
|
|
def fit(self, X, y): |
|
self.classes_ = np.unique(y) |
|
return self |
|
|
|
@available_if(_check_response("predict")) |
|
def predict(self, X): |
|
return "predict" |
|
|
|
@available_if(_check_response("predict_proba")) |
|
def predict_proba(self, X): |
|
return "predict_proba" |
|
|
|
@available_if(_check_response("decision_function")) |
|
def decision_function(self, X): |
|
return "decision_function" |
|
|