|
""" |
|
Logistic Regression |
|
""" |
|
|
|
|
|
|
|
|
|
import numbers |
|
import warnings |
|
from numbers import Integral, Real |
|
|
|
import numpy as np |
|
from joblib import effective_n_jobs |
|
from scipy import optimize |
|
|
|
from sklearn.metrics import get_scorer_names |
|
|
|
from .._loss.loss import HalfBinomialLoss, HalfMultinomialLoss |
|
from ..base import _fit_context |
|
from ..metrics import get_scorer |
|
from ..model_selection import check_cv |
|
from ..preprocessing import LabelBinarizer, LabelEncoder |
|
from ..svm._base import _fit_liblinear |
|
from ..utils import ( |
|
Bunch, |
|
check_array, |
|
check_consistent_length, |
|
check_random_state, |
|
compute_class_weight, |
|
) |
|
from ..utils._param_validation import Hidden, Interval, StrOptions |
|
from ..utils.extmath import row_norms, softmax |
|
from ..utils.metadata_routing import ( |
|
MetadataRouter, |
|
MethodMapping, |
|
_raise_for_params, |
|
_routing_enabled, |
|
process_routing, |
|
) |
|
from ..utils.multiclass import check_classification_targets |
|
from ..utils.optimize import _check_optimize_result, _newton_cg |
|
from ..utils.parallel import Parallel, delayed |
|
from ..utils.validation import ( |
|
_check_method_params, |
|
_check_sample_weight, |
|
check_is_fitted, |
|
validate_data, |
|
) |
|
from ._base import BaseEstimator, LinearClassifierMixin, SparseCoefMixin |
|
from ._glm.glm import NewtonCholeskySolver |
|
from ._linear_loss import LinearModelLoss |
|
from ._sag import sag_solver |
|
|
|
_LOGISTIC_SOLVER_CONVERGENCE_MSG = ( |
|
"Please also refer to the documentation for alternative solver options:\n" |
|
" https://scikit-learn.org/stable/modules/linear_model.html" |
|
"#logistic-regression" |
|
) |
|
|
|
|
|
def _check_solver(solver, penalty, dual): |
|
if solver not in ["liblinear", "saga"] and penalty not in ("l2", None): |
|
raise ValueError( |
|
f"Solver {solver} supports only 'l2' or None penalties, got {penalty} " |
|
"penalty." |
|
) |
|
if solver != "liblinear" and dual: |
|
raise ValueError(f"Solver {solver} supports only dual=False, got dual={dual}") |
|
|
|
if penalty == "elasticnet" and solver != "saga": |
|
raise ValueError( |
|
f"Only 'saga' solver supports elasticnet penalty, got solver={solver}." |
|
) |
|
|
|
if solver == "liblinear" and penalty is None: |
|
raise ValueError("penalty=None is not supported for the liblinear solver") |
|
|
|
return solver |
|
|
|
|
|
def _check_multi_class(multi_class, solver, n_classes): |
|
"""Computes the multi class type, either "multinomial" or "ovr". |
|
|
|
For `n_classes` > 2 and a solver that supports it, returns "multinomial". |
|
For all other cases, in particular binary classification, return "ovr". |
|
""" |
|
if multi_class == "auto": |
|
if solver in ("liblinear",): |
|
multi_class = "ovr" |
|
elif n_classes > 2: |
|
multi_class = "multinomial" |
|
else: |
|
multi_class = "ovr" |
|
if multi_class == "multinomial" and solver in ("liblinear",): |
|
raise ValueError("Solver %s does not support a multinomial backend." % solver) |
|
return multi_class |
|
|
|
|
|
def _logistic_regression_path( |
|
X, |
|
y, |
|
pos_class=None, |
|
Cs=10, |
|
fit_intercept=True, |
|
max_iter=100, |
|
tol=1e-4, |
|
verbose=0, |
|
solver="lbfgs", |
|
coef=None, |
|
class_weight=None, |
|
dual=False, |
|
penalty="l2", |
|
intercept_scaling=1.0, |
|
multi_class="auto", |
|
random_state=None, |
|
check_input=True, |
|
max_squared_sum=None, |
|
sample_weight=None, |
|
l1_ratio=None, |
|
n_threads=1, |
|
): |
|
"""Compute a Logistic Regression model for a list of regularization |
|
parameters. |
|
|
|
This is an implementation that uses the result of the previous model |
|
to speed up computations along the set of solutions, making it faster |
|
than sequentially calling LogisticRegression for the different parameters. |
|
Note that there will be no speedup with liblinear solver, since it does |
|
not handle warm-starting. |
|
|
|
Read more in the :ref:`User Guide <logistic_regression>`. |
|
|
|
Parameters |
|
---------- |
|
X : {array-like, sparse matrix} of shape (n_samples, n_features) |
|
Input data. |
|
|
|
y : array-like of shape (n_samples,) or (n_samples, n_targets) |
|
Input data, target values. |
|
|
|
pos_class : int, default=None |
|
The class with respect to which we perform a one-vs-all fit. |
|
If None, then it is assumed that the given problem is binary. |
|
|
|
Cs : int or array-like of shape (n_cs,), default=10 |
|
List of values for the regularization parameter or integer specifying |
|
the number of regularization parameters that should be used. In this |
|
case, the parameters will be chosen in a logarithmic scale between |
|
1e-4 and 1e4. |
|
|
|
fit_intercept : bool, default=True |
|
Whether to fit an intercept for the model. In this case the shape of |
|
the returned array is (n_cs, n_features + 1). |
|
|
|
max_iter : int, default=100 |
|
Maximum number of iterations for the solver. |
|
|
|
tol : float, default=1e-4 |
|
Stopping criterion. For the newton-cg and lbfgs solvers, the iteration |
|
will stop when ``max{|g_i | i = 1, ..., n} <= tol`` |
|
where ``g_i`` is the i-th component of the gradient. |
|
|
|
verbose : int, default=0 |
|
For the liblinear and lbfgs solvers set verbose to any positive |
|
number for verbosity. |
|
|
|
solver : {'lbfgs', 'liblinear', 'newton-cg', 'newton-cholesky', 'sag', 'saga'}, \ |
|
default='lbfgs' |
|
Numerical solver to use. |
|
|
|
coef : array-like of shape (n_features,), default=None |
|
Initialization value for coefficients of logistic regression. |
|
Useless for liblinear solver. |
|
|
|
class_weight : dict or 'balanced', default=None |
|
Weights associated with classes in the form ``{class_label: weight}``. |
|
If not given, all classes are supposed to have weight one. |
|
|
|
The "balanced" mode uses the values of y to automatically adjust |
|
weights inversely proportional to class frequencies in the input data |
|
as ``n_samples / (n_classes * np.bincount(y))``. |
|
|
|
Note that these weights will be multiplied with sample_weight (passed |
|
through the fit method) if sample_weight is specified. |
|
|
|
dual : bool, default=False |
|
Dual or primal formulation. Dual formulation is only implemented for |
|
l2 penalty with liblinear solver. Prefer dual=False when |
|
n_samples > n_features. |
|
|
|
penalty : {'l1', 'l2', 'elasticnet'}, default='l2' |
|
Used to specify the norm used in the penalization. The 'newton-cg', |
|
'sag' and 'lbfgs' solvers support only l2 penalties. 'elasticnet' is |
|
only supported by the 'saga' solver. |
|
|
|
intercept_scaling : float, default=1. |
|
Useful only when the solver 'liblinear' is used |
|
and self.fit_intercept is set to True. In this case, x becomes |
|
[x, self.intercept_scaling], |
|
i.e. a "synthetic" feature with constant value equal to |
|
intercept_scaling is appended to the instance vector. |
|
The intercept becomes ``intercept_scaling * synthetic_feature_weight``. |
|
|
|
Note! the synthetic feature weight is subject to l1/l2 regularization |
|
as all other features. |
|
To lessen the effect of regularization on synthetic feature weight |
|
(and therefore on the intercept) intercept_scaling has to be increased. |
|
|
|
multi_class : {'ovr', 'multinomial', 'auto'}, default='auto' |
|
If the option chosen is 'ovr', then a binary problem is fit for each |
|
label. For 'multinomial' the loss minimised is the multinomial loss fit |
|
across the entire probability distribution, *even when the data is |
|
binary*. 'multinomial' is unavailable when solver='liblinear'. |
|
'auto' selects 'ovr' if the data is binary, or if solver='liblinear', |
|
and otherwise selects 'multinomial'. |
|
|
|
.. versionadded:: 0.18 |
|
Stochastic Average Gradient descent solver for 'multinomial' case. |
|
.. versionchanged:: 0.22 |
|
Default changed from 'ovr' to 'auto' in 0.22. |
|
|
|
random_state : int, RandomState instance, default=None |
|
Used when ``solver`` == 'sag', 'saga' or 'liblinear' to shuffle the |
|
data. See :term:`Glossary <random_state>` for details. |
|
|
|
check_input : bool, default=True |
|
If False, the input arrays X and y will not be checked. |
|
|
|
max_squared_sum : float, default=None |
|
Maximum squared sum of X over samples. Used only in SAG solver. |
|
If None, it will be computed, going through all the samples. |
|
The value should be precomputed to speed up cross validation. |
|
|
|
sample_weight : array-like of shape(n_samples,), default=None |
|
Array of weights that are assigned to individual samples. |
|
If not provided, then each sample is given unit weight. |
|
|
|
l1_ratio : float, default=None |
|
The Elastic-Net mixing parameter, with ``0 <= l1_ratio <= 1``. Only |
|
used if ``penalty='elasticnet'``. Setting ``l1_ratio=0`` is equivalent |
|
to using ``penalty='l2'``, while setting ``l1_ratio=1`` is equivalent |
|
to using ``penalty='l1'``. For ``0 < l1_ratio <1``, the penalty is a |
|
combination of L1 and L2. |
|
|
|
n_threads : int, default=1 |
|
Number of OpenMP threads to use. |
|
|
|
Returns |
|
------- |
|
coefs : ndarray of shape (n_cs, n_features) or (n_cs, n_features + 1) |
|
List of coefficients for the Logistic Regression model. If |
|
fit_intercept is set to True then the second dimension will be |
|
n_features + 1, where the last item represents the intercept. For |
|
``multiclass='multinomial'``, the shape is (n_classes, n_cs, |
|
n_features) or (n_classes, n_cs, n_features + 1). |
|
|
|
Cs : ndarray |
|
Grid of Cs used for cross-validation. |
|
|
|
n_iter : array of shape (n_cs,) |
|
Actual number of iteration for each Cs. |
|
|
|
Notes |
|
----- |
|
You might get slightly different results with the solver liblinear than |
|
with the others since this uses LIBLINEAR which penalizes the intercept. |
|
|
|
.. versionchanged:: 0.19 |
|
The "copy" parameter was removed. |
|
""" |
|
if isinstance(Cs, numbers.Integral): |
|
Cs = np.logspace(-4, 4, Cs) |
|
|
|
solver = _check_solver(solver, penalty, dual) |
|
|
|
|
|
if check_input: |
|
X = check_array( |
|
X, |
|
accept_sparse="csr", |
|
dtype=np.float64, |
|
accept_large_sparse=solver not in ["liblinear", "sag", "saga"], |
|
) |
|
y = check_array(y, ensure_2d=False, dtype=None) |
|
check_consistent_length(X, y) |
|
n_samples, n_features = X.shape |
|
|
|
classes = np.unique(y) |
|
random_state = check_random_state(random_state) |
|
|
|
multi_class = _check_multi_class(multi_class, solver, len(classes)) |
|
if pos_class is None and multi_class != "multinomial": |
|
if classes.size > 2: |
|
raise ValueError("To fit OvR, use the pos_class argument") |
|
|
|
pos_class = classes[1] |
|
|
|
if sample_weight is not None or class_weight is not None: |
|
sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype, copy=True) |
|
|
|
|
|
|
|
|
|
le = LabelEncoder() |
|
if isinstance(class_weight, dict) or ( |
|
multi_class == "multinomial" and class_weight is not None |
|
): |
|
class_weight_ = compute_class_weight(class_weight, classes=classes, y=y) |
|
sample_weight *= class_weight_[le.fit_transform(y)] |
|
|
|
|
|
|
|
if multi_class == "ovr": |
|
w0 = np.zeros(n_features + int(fit_intercept), dtype=X.dtype) |
|
mask = y == pos_class |
|
y_bin = np.ones(y.shape, dtype=X.dtype) |
|
if solver == "liblinear": |
|
mask_classes = np.array([-1, 1]) |
|
y_bin[~mask] = -1.0 |
|
else: |
|
|
|
|
|
mask_classes = np.array([0, 1]) |
|
y_bin[~mask] = 0.0 |
|
|
|
|
|
if class_weight == "balanced": |
|
class_weight_ = compute_class_weight( |
|
class_weight, classes=mask_classes, y=y_bin |
|
) |
|
sample_weight *= class_weight_[le.fit_transform(y_bin)] |
|
|
|
else: |
|
if solver in ["sag", "saga", "lbfgs", "newton-cg", "newton-cholesky"]: |
|
|
|
|
|
|
|
|
|
le = LabelEncoder() |
|
Y_multi = le.fit_transform(y).astype(X.dtype, copy=False) |
|
else: |
|
|
|
lbin = LabelBinarizer() |
|
Y_multi = lbin.fit_transform(y) |
|
if Y_multi.shape[1] == 1: |
|
Y_multi = np.hstack([1 - Y_multi, Y_multi]) |
|
|
|
w0 = np.zeros( |
|
(classes.size, n_features + int(fit_intercept)), order="F", dtype=X.dtype |
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if solver in ["lbfgs", "newton-cg", "newton-cholesky"]: |
|
|
|
|
|
|
|
sw_sum = n_samples if sample_weight is None else np.sum(sample_weight) |
|
|
|
if coef is not None: |
|
|
|
if multi_class == "ovr": |
|
if coef.size not in (n_features, w0.size): |
|
raise ValueError( |
|
"Initialization coef is of shape %d, expected shape %d or %d" |
|
% (coef.size, n_features, w0.size) |
|
) |
|
w0[: coef.size] = coef |
|
else: |
|
|
|
|
|
n_classes = classes.size |
|
if n_classes == 2: |
|
n_classes = 1 |
|
|
|
if coef.shape[0] != n_classes or coef.shape[1] not in ( |
|
n_features, |
|
n_features + 1, |
|
): |
|
raise ValueError( |
|
"Initialization coef is of shape (%d, %d), expected " |
|
"shape (%d, %d) or (%d, %d)" |
|
% ( |
|
coef.shape[0], |
|
coef.shape[1], |
|
classes.size, |
|
n_features, |
|
classes.size, |
|
n_features + 1, |
|
) |
|
) |
|
|
|
if n_classes == 1: |
|
w0[0, : coef.shape[1]] = -coef |
|
w0[1, : coef.shape[1]] = coef |
|
else: |
|
w0[:, : coef.shape[1]] = coef |
|
|
|
if multi_class == "multinomial": |
|
if solver in ["lbfgs", "newton-cg", "newton-cholesky"]: |
|
|
|
|
|
|
|
|
|
w0 = w0.ravel(order="F") |
|
loss = LinearModelLoss( |
|
base_loss=HalfMultinomialLoss(n_classes=classes.size), |
|
fit_intercept=fit_intercept, |
|
) |
|
target = Y_multi |
|
if solver == "lbfgs": |
|
func = loss.loss_gradient |
|
elif solver == "newton-cg": |
|
func = loss.loss |
|
grad = loss.gradient |
|
hess = loss.gradient_hessian_product |
|
warm_start_sag = {"coef": w0.T} |
|
else: |
|
target = y_bin |
|
if solver == "lbfgs": |
|
loss = LinearModelLoss( |
|
base_loss=HalfBinomialLoss(), fit_intercept=fit_intercept |
|
) |
|
func = loss.loss_gradient |
|
elif solver == "newton-cg": |
|
loss = LinearModelLoss( |
|
base_loss=HalfBinomialLoss(), fit_intercept=fit_intercept |
|
) |
|
func = loss.loss |
|
grad = loss.gradient |
|
hess = loss.gradient_hessian_product |
|
elif solver == "newton-cholesky": |
|
loss = LinearModelLoss( |
|
base_loss=HalfBinomialLoss(), fit_intercept=fit_intercept |
|
) |
|
warm_start_sag = {"coef": np.expand_dims(w0, axis=1)} |
|
|
|
coefs = list() |
|
n_iter = np.zeros(len(Cs), dtype=np.int32) |
|
for i, C in enumerate(Cs): |
|
if solver == "lbfgs": |
|
l2_reg_strength = 1.0 / (C * sw_sum) |
|
iprint = [-1, 50, 1, 100, 101][ |
|
np.searchsorted(np.array([0, 1, 2, 3]), verbose) |
|
] |
|
opt_res = optimize.minimize( |
|
func, |
|
w0, |
|
method="L-BFGS-B", |
|
jac=True, |
|
args=(X, target, sample_weight, l2_reg_strength, n_threads), |
|
options={ |
|
"maxiter": max_iter, |
|
"maxls": 50, |
|
"iprint": iprint, |
|
"gtol": tol, |
|
"ftol": 64 * np.finfo(float).eps, |
|
}, |
|
) |
|
n_iter_i = _check_optimize_result( |
|
solver, |
|
opt_res, |
|
max_iter, |
|
extra_warning_msg=_LOGISTIC_SOLVER_CONVERGENCE_MSG, |
|
) |
|
w0, loss = opt_res.x, opt_res.fun |
|
elif solver == "newton-cg": |
|
l2_reg_strength = 1.0 / (C * sw_sum) |
|
args = (X, target, sample_weight, l2_reg_strength, n_threads) |
|
w0, n_iter_i = _newton_cg( |
|
grad_hess=hess, |
|
func=func, |
|
grad=grad, |
|
x0=w0, |
|
args=args, |
|
maxiter=max_iter, |
|
tol=tol, |
|
verbose=verbose, |
|
) |
|
elif solver == "newton-cholesky": |
|
l2_reg_strength = 1.0 / (C * sw_sum) |
|
sol = NewtonCholeskySolver( |
|
coef=w0, |
|
linear_loss=loss, |
|
l2_reg_strength=l2_reg_strength, |
|
tol=tol, |
|
max_iter=max_iter, |
|
n_threads=n_threads, |
|
verbose=verbose, |
|
) |
|
w0 = sol.solve(X=X, y=target, sample_weight=sample_weight) |
|
n_iter_i = sol.iteration |
|
elif solver == "liblinear": |
|
( |
|
coef_, |
|
intercept_, |
|
n_iter_i, |
|
) = _fit_liblinear( |
|
X, |
|
target, |
|
C, |
|
fit_intercept, |
|
intercept_scaling, |
|
None, |
|
penalty, |
|
dual, |
|
verbose, |
|
max_iter, |
|
tol, |
|
random_state, |
|
sample_weight=sample_weight, |
|
) |
|
if fit_intercept: |
|
w0 = np.concatenate([coef_.ravel(), intercept_]) |
|
else: |
|
w0 = coef_.ravel() |
|
|
|
|
|
n_iter_i = n_iter_i.item() |
|
|
|
elif solver in ["sag", "saga"]: |
|
if multi_class == "multinomial": |
|
target = target.astype(X.dtype, copy=False) |
|
loss = "multinomial" |
|
else: |
|
loss = "log" |
|
|
|
if penalty == "l1": |
|
alpha = 0.0 |
|
beta = 1.0 / C |
|
elif penalty == "l2": |
|
alpha = 1.0 / C |
|
beta = 0.0 |
|
else: |
|
alpha = (1.0 / C) * (1 - l1_ratio) |
|
beta = (1.0 / C) * l1_ratio |
|
|
|
w0, n_iter_i, warm_start_sag = sag_solver( |
|
X, |
|
target, |
|
sample_weight, |
|
loss, |
|
alpha, |
|
beta, |
|
max_iter, |
|
tol, |
|
verbose, |
|
random_state, |
|
False, |
|
max_squared_sum, |
|
warm_start_sag, |
|
is_saga=(solver == "saga"), |
|
) |
|
|
|
else: |
|
raise ValueError( |
|
"solver must be one of {'liblinear', 'lbfgs', " |
|
"'newton-cg', 'sag'}, got '%s' instead" % solver |
|
) |
|
|
|
if multi_class == "multinomial": |
|
n_classes = max(2, classes.size) |
|
if solver in ["lbfgs", "newton-cg", "newton-cholesky"]: |
|
multi_w0 = np.reshape(w0, (n_classes, -1), order="F") |
|
else: |
|
multi_w0 = w0 |
|
if n_classes == 2: |
|
multi_w0 = multi_w0[1][np.newaxis, :] |
|
coefs.append(multi_w0.copy()) |
|
else: |
|
coefs.append(w0.copy()) |
|
|
|
n_iter[i] = n_iter_i |
|
|
|
return np.array(coefs), np.array(Cs), n_iter |
|
|
|
|
|
|
|
def _log_reg_scoring_path( |
|
X, |
|
y, |
|
train, |
|
test, |
|
*, |
|
pos_class, |
|
Cs, |
|
scoring, |
|
fit_intercept, |
|
max_iter, |
|
tol, |
|
class_weight, |
|
verbose, |
|
solver, |
|
penalty, |
|
dual, |
|
intercept_scaling, |
|
multi_class, |
|
random_state, |
|
max_squared_sum, |
|
sample_weight, |
|
l1_ratio, |
|
score_params, |
|
): |
|
"""Computes scores across logistic_regression_path |
|
|
|
Parameters |
|
---------- |
|
X : {array-like, sparse matrix} of shape (n_samples, n_features) |
|
Training data. |
|
|
|
y : array-like of shape (n_samples,) or (n_samples, n_targets) |
|
Target labels. |
|
|
|
train : list of indices |
|
The indices of the train set. |
|
|
|
test : list of indices |
|
The indices of the test set. |
|
|
|
pos_class : int |
|
The class with respect to which we perform a one-vs-all fit. |
|
If None, then it is assumed that the given problem is binary. |
|
|
|
Cs : int or list of floats |
|
Each of the values in Cs describes the inverse of |
|
regularization strength. If Cs is as an int, then a grid of Cs |
|
values are chosen in a logarithmic scale between 1e-4 and 1e4. |
|
|
|
scoring : callable |
|
A string (see :ref:`scoring_parameter`) or |
|
a scorer callable object / function with signature |
|
``scorer(estimator, X, y)``. For a list of scoring functions |
|
that can be used, look at :mod:`sklearn.metrics`. |
|
|
|
fit_intercept : bool |
|
If False, then the bias term is set to zero. Else the last |
|
term of each coef_ gives us the intercept. |
|
|
|
max_iter : int |
|
Maximum number of iterations for the solver. |
|
|
|
tol : float |
|
Tolerance for stopping criteria. |
|
|
|
class_weight : dict or 'balanced' |
|
Weights associated with classes in the form ``{class_label: weight}``. |
|
If not given, all classes are supposed to have weight one. |
|
|
|
The "balanced" mode uses the values of y to automatically adjust |
|
weights inversely proportional to class frequencies in the input data |
|
as ``n_samples / (n_classes * np.bincount(y))`` |
|
|
|
Note that these weights will be multiplied with sample_weight (passed |
|
through the fit method) if sample_weight is specified. |
|
|
|
verbose : int |
|
For the liblinear and lbfgs solvers set verbose to any positive |
|
number for verbosity. |
|
|
|
solver : {'lbfgs', 'liblinear', 'newton-cg', 'newton-cholesky', 'sag', 'saga'} |
|
Decides which solver to use. |
|
|
|
penalty : {'l1', 'l2', 'elasticnet'} |
|
Used to specify the norm used in the penalization. The 'newton-cg', |
|
'sag' and 'lbfgs' solvers support only l2 penalties. 'elasticnet' is |
|
only supported by the 'saga' solver. |
|
|
|
dual : bool |
|
Dual or primal formulation. Dual formulation is only implemented for |
|
l2 penalty with liblinear solver. Prefer dual=False when |
|
n_samples > n_features. |
|
|
|
intercept_scaling : float |
|
Useful only when the solver 'liblinear' is used |
|
and self.fit_intercept is set to True. In this case, x becomes |
|
[x, self.intercept_scaling], |
|
i.e. a "synthetic" feature with constant value equals to |
|
intercept_scaling is appended to the instance vector. |
|
The intercept becomes intercept_scaling * synthetic feature weight |
|
Note! the synthetic feature weight is subject to l1/l2 regularization |
|
as all other features. |
|
To lessen the effect of regularization on synthetic feature weight |
|
(and therefore on the intercept) intercept_scaling has to be increased. |
|
|
|
multi_class : {'auto', 'ovr', 'multinomial'} |
|
If the option chosen is 'ovr', then a binary problem is fit for each |
|
label. For 'multinomial' the loss minimised is the multinomial loss fit |
|
across the entire probability distribution, *even when the data is |
|
binary*. 'multinomial' is unavailable when solver='liblinear'. |
|
|
|
random_state : int, RandomState instance |
|
Used when ``solver`` == 'sag', 'saga' or 'liblinear' to shuffle the |
|
data. See :term:`Glossary <random_state>` for details. |
|
|
|
max_squared_sum : float |
|
Maximum squared sum of X over samples. Used only in SAG solver. |
|
If None, it will be computed, going through all the samples. |
|
The value should be precomputed to speed up cross validation. |
|
|
|
sample_weight : array-like of shape(n_samples,) |
|
Array of weights that are assigned to individual samples. |
|
If not provided, then each sample is given unit weight. |
|
|
|
l1_ratio : float |
|
The Elastic-Net mixing parameter, with ``0 <= l1_ratio <= 1``. Only |
|
used if ``penalty='elasticnet'``. Setting ``l1_ratio=0`` is equivalent |
|
to using ``penalty='l2'``, while setting ``l1_ratio=1`` is equivalent |
|
to using ``penalty='l1'``. For ``0 < l1_ratio <1``, the penalty is a |
|
combination of L1 and L2. |
|
|
|
score_params : dict |
|
Parameters to pass to the `score` method of the underlying scorer. |
|
|
|
Returns |
|
------- |
|
coefs : ndarray of shape (n_cs, n_features) or (n_cs, n_features + 1) |
|
List of coefficients for the Logistic Regression model. If |
|
fit_intercept is set to True then the second dimension will be |
|
n_features + 1, where the last item represents the intercept. |
|
|
|
Cs : ndarray |
|
Grid of Cs used for cross-validation. |
|
|
|
scores : ndarray of shape (n_cs,) |
|
Scores obtained for each Cs. |
|
|
|
n_iter : ndarray of shape(n_cs,) |
|
Actual number of iteration for each Cs. |
|
""" |
|
X_train = X[train] |
|
X_test = X[test] |
|
y_train = y[train] |
|
y_test = y[test] |
|
|
|
sw_train, sw_test = None, None |
|
if sample_weight is not None: |
|
sample_weight = _check_sample_weight(sample_weight, X) |
|
sw_train = sample_weight[train] |
|
sw_test = sample_weight[test] |
|
|
|
coefs, Cs, n_iter = _logistic_regression_path( |
|
X_train, |
|
y_train, |
|
Cs=Cs, |
|
l1_ratio=l1_ratio, |
|
fit_intercept=fit_intercept, |
|
solver=solver, |
|
max_iter=max_iter, |
|
class_weight=class_weight, |
|
pos_class=pos_class, |
|
multi_class=multi_class, |
|
tol=tol, |
|
verbose=verbose, |
|
dual=dual, |
|
penalty=penalty, |
|
intercept_scaling=intercept_scaling, |
|
random_state=random_state, |
|
check_input=False, |
|
max_squared_sum=max_squared_sum, |
|
sample_weight=sw_train, |
|
) |
|
|
|
log_reg = LogisticRegression(solver=solver, multi_class=multi_class) |
|
|
|
|
|
if multi_class == "ovr": |
|
log_reg.classes_ = np.array([-1, 1]) |
|
elif multi_class == "multinomial": |
|
log_reg.classes_ = np.unique(y_train) |
|
else: |
|
raise ValueError( |
|
"multi_class should be either multinomial or ovr, got %d" % multi_class |
|
) |
|
|
|
if pos_class is not None: |
|
mask = y_test == pos_class |
|
y_test = np.ones(y_test.shape, dtype=np.float64) |
|
y_test[~mask] = -1.0 |
|
|
|
scores = list() |
|
|
|
scoring = get_scorer(scoring) |
|
for w in coefs: |
|
if multi_class == "ovr": |
|
w = w[np.newaxis, :] |
|
if fit_intercept: |
|
log_reg.coef_ = w[:, :-1] |
|
log_reg.intercept_ = w[:, -1] |
|
else: |
|
log_reg.coef_ = w |
|
log_reg.intercept_ = 0.0 |
|
|
|
if scoring is None: |
|
scores.append(log_reg.score(X_test, y_test, sample_weight=sw_test)) |
|
else: |
|
score_params = score_params or {} |
|
score_params = _check_method_params(X=X, params=score_params, indices=test) |
|
scores.append(scoring(log_reg, X_test, y_test, **score_params)) |
|
return coefs, Cs, np.array(scores), n_iter |
|
|
|
|
|
class LogisticRegression(LinearClassifierMixin, SparseCoefMixin, BaseEstimator): |
|
""" |
|
Logistic Regression (aka logit, MaxEnt) classifier. |
|
|
|
This class implements regularized logistic regression using the |
|
'liblinear' library, 'newton-cg', 'sag', 'saga' and 'lbfgs' solvers. **Note |
|
that regularization is applied by default**. It can handle both dense |
|
and sparse input. Use C-ordered arrays or CSR matrices containing 64-bit |
|
floats for optimal performance; any other input format will be converted |
|
(and copied). |
|
|
|
The 'newton-cg', 'sag', and 'lbfgs' solvers support only L2 regularization |
|
with primal formulation, or no regularization. The 'liblinear' solver |
|
supports both L1 and L2 regularization, with a dual formulation only for |
|
the L2 penalty. The Elastic-Net regularization is only supported by the |
|
'saga' solver. |
|
|
|
For :term:`multiclass` problems, only 'newton-cg', 'sag', 'saga' and 'lbfgs' |
|
handle multinomial loss. 'liblinear' and 'newton-cholesky' only handle binary |
|
classification but can be extended to handle multiclass by using |
|
:class:`~sklearn.multiclass.OneVsRestClassifier`. |
|
|
|
Read more in the :ref:`User Guide <logistic_regression>`. |
|
|
|
Parameters |
|
---------- |
|
penalty : {'l1', 'l2', 'elasticnet', None}, default='l2' |
|
Specify the norm of the penalty: |
|
|
|
- `None`: no penalty is added; |
|
- `'l2'`: add a L2 penalty term and it is the default choice; |
|
- `'l1'`: add a L1 penalty term; |
|
- `'elasticnet'`: both L1 and L2 penalty terms are added. |
|
|
|
.. warning:: |
|
Some penalties may not work with some solvers. See the parameter |
|
`solver` below, to know the compatibility between the penalty and |
|
solver. |
|
|
|
.. versionadded:: 0.19 |
|
l1 penalty with SAGA solver (allowing 'multinomial' + L1) |
|
|
|
dual : bool, default=False |
|
Dual (constrained) or primal (regularized, see also |
|
:ref:`this equation <regularized-logistic-loss>`) formulation. Dual formulation |
|
is only implemented for l2 penalty with liblinear solver. Prefer dual=False when |
|
n_samples > n_features. |
|
|
|
tol : float, default=1e-4 |
|
Tolerance for stopping criteria. |
|
|
|
C : float, default=1.0 |
|
Inverse of regularization strength; must be a positive float. |
|
Like in support vector machines, smaller values specify stronger |
|
regularization. |
|
|
|
fit_intercept : bool, default=True |
|
Specifies if a constant (a.k.a. bias or intercept) should be |
|
added to the decision function. |
|
|
|
intercept_scaling : float, default=1 |
|
Useful only when the solver 'liblinear' is used |
|
and self.fit_intercept is set to True. In this case, x becomes |
|
[x, self.intercept_scaling], |
|
i.e. a "synthetic" feature with constant value equal to |
|
intercept_scaling is appended to the instance vector. |
|
The intercept becomes ``intercept_scaling * synthetic_feature_weight``. |
|
|
|
Note! the synthetic feature weight is subject to l1/l2 regularization |
|
as all other features. |
|
To lessen the effect of regularization on synthetic feature weight |
|
(and therefore on the intercept) intercept_scaling has to be increased. |
|
|
|
class_weight : dict or 'balanced', default=None |
|
Weights associated with classes in the form ``{class_label: weight}``. |
|
If not given, all classes are supposed to have weight one. |
|
|
|
The "balanced" mode uses the values of y to automatically adjust |
|
weights inversely proportional to class frequencies in the input data |
|
as ``n_samples / (n_classes * np.bincount(y))``. |
|
|
|
Note that these weights will be multiplied with sample_weight (passed |
|
through the fit method) if sample_weight is specified. |
|
|
|
.. versionadded:: 0.17 |
|
*class_weight='balanced'* |
|
|
|
random_state : int, RandomState instance, default=None |
|
Used when ``solver`` == 'sag', 'saga' or 'liblinear' to shuffle the |
|
data. See :term:`Glossary <random_state>` for details. |
|
|
|
solver : {'lbfgs', 'liblinear', 'newton-cg', 'newton-cholesky', 'sag', 'saga'}, \ |
|
default='lbfgs' |
|
|
|
Algorithm to use in the optimization problem. Default is 'lbfgs'. |
|
To choose a solver, you might want to consider the following aspects: |
|
|
|
- For small datasets, 'liblinear' is a good choice, whereas 'sag' |
|
and 'saga' are faster for large ones; |
|
- For :term:`multiclass` problems, all solvers except 'liblinear' minimize the |
|
full multinomial loss; |
|
- 'liblinear' can only handle binary classification by default. To apply a |
|
one-versus-rest scheme for the multiclass setting one can wrap it with the |
|
:class:`~sklearn.multiclass.OneVsRestClassifier`. |
|
- 'newton-cholesky' is a good choice for |
|
`n_samples` >> `n_features * n_classes`, especially with one-hot encoded |
|
categorical features with rare categories. Be aware that the memory usage |
|
of this solver has a quadratic dependency on `n_features * n_classes` |
|
because it explicitly computes the full Hessian matrix. |
|
|
|
.. warning:: |
|
The choice of the algorithm depends on the penalty chosen and on |
|
(multinomial) multiclass support: |
|
|
|
================= ============================== ====================== |
|
solver penalty multinomial multiclass |
|
================= ============================== ====================== |
|
'lbfgs' 'l2', None yes |
|
'liblinear' 'l1', 'l2' no |
|
'newton-cg' 'l2', None yes |
|
'newton-cholesky' 'l2', None no |
|
'sag' 'l2', None yes |
|
'saga' 'elasticnet', 'l1', 'l2', None yes |
|
================= ============================== ====================== |
|
|
|
.. note:: |
|
'sag' and 'saga' fast convergence is only guaranteed on features |
|
with approximately the same scale. You can preprocess the data with |
|
a scaler from :mod:`sklearn.preprocessing`. |
|
|
|
.. seealso:: |
|
Refer to the :ref:`User Guide <Logistic_regression>` for more |
|
information regarding :class:`LogisticRegression` and more specifically the |
|
:ref:`Table <logistic_regression_solvers>` |
|
summarizing solver/penalty supports. |
|
|
|
.. versionadded:: 0.17 |
|
Stochastic Average Gradient descent solver. |
|
.. versionadded:: 0.19 |
|
SAGA solver. |
|
.. versionchanged:: 0.22 |
|
The default solver changed from 'liblinear' to 'lbfgs' in 0.22. |
|
.. versionadded:: 1.2 |
|
newton-cholesky solver. |
|
|
|
max_iter : int, default=100 |
|
Maximum number of iterations taken for the solvers to converge. |
|
|
|
multi_class : {'auto', 'ovr', 'multinomial'}, default='auto' |
|
If the option chosen is 'ovr', then a binary problem is fit for each |
|
label. For 'multinomial' the loss minimised is the multinomial loss fit |
|
across the entire probability distribution, *even when the data is |
|
binary*. 'multinomial' is unavailable when solver='liblinear'. |
|
'auto' selects 'ovr' if the data is binary, or if solver='liblinear', |
|
and otherwise selects 'multinomial'. |
|
|
|
.. versionadded:: 0.18 |
|
Stochastic Average Gradient descent solver for 'multinomial' case. |
|
.. versionchanged:: 0.22 |
|
Default changed from 'ovr' to 'auto' in 0.22. |
|
.. deprecated:: 1.5 |
|
``multi_class`` was deprecated in version 1.5 and will be removed in 1.7. |
|
From then on, the recommended 'multinomial' will always be used for |
|
`n_classes >= 3`. |
|
Solvers that do not support 'multinomial' will raise an error. |
|
Use `sklearn.multiclass.OneVsRestClassifier(LogisticRegression())` if you |
|
still want to use OvR. |
|
|
|
verbose : int, default=0 |
|
For the liblinear and lbfgs solvers set verbose to any positive |
|
number for verbosity. |
|
|
|
warm_start : bool, default=False |
|
When set to True, reuse the solution of the previous call to fit as |
|
initialization, otherwise, just erase the previous solution. |
|
Useless for liblinear solver. See :term:`the Glossary <warm_start>`. |
|
|
|
.. versionadded:: 0.17 |
|
*warm_start* to support *lbfgs*, *newton-cg*, *sag*, *saga* solvers. |
|
|
|
n_jobs : int, default=None |
|
Number of CPU cores used when parallelizing over classes if |
|
multi_class='ovr'". This parameter is ignored when the ``solver`` is |
|
set to 'liblinear' regardless of whether 'multi_class' is specified or |
|
not. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` |
|
context. ``-1`` means using all processors. |
|
See :term:`Glossary <n_jobs>` for more details. |
|
|
|
l1_ratio : float, default=None |
|
The Elastic-Net mixing parameter, with ``0 <= l1_ratio <= 1``. Only |
|
used if ``penalty='elasticnet'``. Setting ``l1_ratio=0`` is equivalent |
|
to using ``penalty='l2'``, while setting ``l1_ratio=1`` is equivalent |
|
to using ``penalty='l1'``. For ``0 < l1_ratio <1``, the penalty is a |
|
combination of L1 and L2. |
|
|
|
Attributes |
|
---------- |
|
|
|
classes_ : ndarray of shape (n_classes, ) |
|
A list of class labels known to the classifier. |
|
|
|
coef_ : ndarray of shape (1, n_features) or (n_classes, n_features) |
|
Coefficient of the features in the decision function. |
|
|
|
`coef_` is of shape (1, n_features) when the given problem is binary. |
|
In particular, when `multi_class='multinomial'`, `coef_` corresponds |
|
to outcome 1 (True) and `-coef_` corresponds to outcome 0 (False). |
|
|
|
intercept_ : ndarray of shape (1,) or (n_classes,) |
|
Intercept (a.k.a. bias) added to the decision function. |
|
|
|
If `fit_intercept` is set to False, the intercept is set to zero. |
|
`intercept_` is of shape (1,) when the given problem is binary. |
|
In particular, when `multi_class='multinomial'`, `intercept_` |
|
corresponds to outcome 1 (True) and `-intercept_` corresponds to |
|
outcome 0 (False). |
|
|
|
n_features_in_ : int |
|
Number of features seen during :term:`fit`. |
|
|
|
.. versionadded:: 0.24 |
|
|
|
feature_names_in_ : ndarray of shape (`n_features_in_`,) |
|
Names of features seen during :term:`fit`. Defined only when `X` |
|
has feature names that are all strings. |
|
|
|
.. versionadded:: 1.0 |
|
|
|
n_iter_ : ndarray of shape (n_classes,) or (1, ) |
|
Actual number of iterations for all classes. If binary or multinomial, |
|
it returns only 1 element. For liblinear solver, only the maximum |
|
number of iteration across all classes is given. |
|
|
|
.. versionchanged:: 0.20 |
|
|
|
In SciPy <= 1.0.0 the number of lbfgs iterations may exceed |
|
``max_iter``. ``n_iter_`` will now report at most ``max_iter``. |
|
|
|
See Also |
|
-------- |
|
SGDClassifier : Incrementally trained logistic regression (when given |
|
the parameter ``loss="log_loss"``). |
|
LogisticRegressionCV : Logistic regression with built-in cross validation. |
|
|
|
Notes |
|
----- |
|
The underlying C implementation uses a random number generator to |
|
select features when fitting the model. It is thus not uncommon, |
|
to have slightly different results for the same input data. If |
|
that happens, try with a smaller tol parameter. |
|
|
|
Predict output may not match that of standalone liblinear in certain |
|
cases. See :ref:`differences from liblinear <liblinear_differences>` |
|
in the narrative documentation. |
|
|
|
References |
|
---------- |
|
|
|
L-BFGS-B -- Software for Large-scale Bound-constrained Optimization |
|
Ciyou Zhu, Richard Byrd, Jorge Nocedal and Jose Luis Morales. |
|
http://users.iems.northwestern.edu/~nocedal/lbfgsb.html |
|
|
|
LIBLINEAR -- A Library for Large Linear Classification |
|
https://www.csie.ntu.edu.tw/~cjlin/liblinear/ |
|
|
|
SAG -- Mark Schmidt, Nicolas Le Roux, and Francis Bach |
|
Minimizing Finite Sums with the Stochastic Average Gradient |
|
https://hal.inria.fr/hal-00860051/document |
|
|
|
SAGA -- Defazio, A., Bach F. & Lacoste-Julien S. (2014). |
|
:arxiv:`"SAGA: A Fast Incremental Gradient Method With Support |
|
for Non-Strongly Convex Composite Objectives" <1407.0202>` |
|
|
|
Hsiang-Fu Yu, Fang-Lan Huang, Chih-Jen Lin (2011). Dual coordinate descent |
|
methods for logistic regression and maximum entropy models. |
|
Machine Learning 85(1-2):41-75. |
|
https://www.csie.ntu.edu.tw/~cjlin/papers/maxent_dual.pdf |
|
|
|
Examples |
|
-------- |
|
>>> from sklearn.datasets import load_iris |
|
>>> from sklearn.linear_model import LogisticRegression |
|
>>> X, y = load_iris(return_X_y=True) |
|
>>> clf = LogisticRegression(random_state=0).fit(X, y) |
|
>>> clf.predict(X[:2, :]) |
|
array([0, 0]) |
|
>>> clf.predict_proba(X[:2, :]) |
|
array([[9.8...e-01, 1.8...e-02, 1.4...e-08], |
|
[9.7...e-01, 2.8...e-02, ...e-08]]) |
|
>>> clf.score(X, y) |
|
0.97... |
|
|
|
For a comaprison of the LogisticRegression with other classifiers see: |
|
:ref:`sphx_glr_auto_examples_classification_plot_classification_probability.py`. |
|
""" |
|
|
|
_parameter_constraints: dict = { |
|
"penalty": [StrOptions({"l1", "l2", "elasticnet"}), None], |
|
"dual": ["boolean"], |
|
"tol": [Interval(Real, 0, None, closed="left")], |
|
"C": [Interval(Real, 0, None, closed="right")], |
|
"fit_intercept": ["boolean"], |
|
"intercept_scaling": [Interval(Real, 0, None, closed="neither")], |
|
"class_weight": [dict, StrOptions({"balanced"}), None], |
|
"random_state": ["random_state"], |
|
"solver": [ |
|
StrOptions( |
|
{"lbfgs", "liblinear", "newton-cg", "newton-cholesky", "sag", "saga"} |
|
) |
|
], |
|
"max_iter": [Interval(Integral, 0, None, closed="left")], |
|
"verbose": ["verbose"], |
|
"warm_start": ["boolean"], |
|
"n_jobs": [None, Integral], |
|
"l1_ratio": [Interval(Real, 0, 1, closed="both"), None], |
|
"multi_class": [ |
|
StrOptions({"auto", "ovr", "multinomial"}), |
|
Hidden(StrOptions({"deprecated"})), |
|
], |
|
} |
|
|
|
def __init__( |
|
self, |
|
penalty="l2", |
|
*, |
|
dual=False, |
|
tol=1e-4, |
|
C=1.0, |
|
fit_intercept=True, |
|
intercept_scaling=1, |
|
class_weight=None, |
|
random_state=None, |
|
solver="lbfgs", |
|
max_iter=100, |
|
multi_class="deprecated", |
|
verbose=0, |
|
warm_start=False, |
|
n_jobs=None, |
|
l1_ratio=None, |
|
): |
|
self.penalty = penalty |
|
self.dual = dual |
|
self.tol = tol |
|
self.C = C |
|
self.fit_intercept = fit_intercept |
|
self.intercept_scaling = intercept_scaling |
|
self.class_weight = class_weight |
|
self.random_state = random_state |
|
self.solver = solver |
|
self.max_iter = max_iter |
|
self.multi_class = multi_class |
|
self.verbose = verbose |
|
self.warm_start = warm_start |
|
self.n_jobs = n_jobs |
|
self.l1_ratio = l1_ratio |
|
|
|
@_fit_context(prefer_skip_nested_validation=True) |
|
def fit(self, X, y, sample_weight=None): |
|
""" |
|
Fit the model according to the given training data. |
|
|
|
Parameters |
|
---------- |
|
X : {array-like, sparse matrix} of shape (n_samples, n_features) |
|
Training vector, where `n_samples` is the number of samples and |
|
`n_features` is the number of features. |
|
|
|
y : array-like of shape (n_samples,) |
|
Target vector relative to X. |
|
|
|
sample_weight : array-like of shape (n_samples,) default=None |
|
Array of weights that are assigned to individual samples. |
|
If not provided, then each sample is given unit weight. |
|
|
|
.. versionadded:: 0.17 |
|
*sample_weight* support to LogisticRegression. |
|
|
|
Returns |
|
------- |
|
self |
|
Fitted estimator. |
|
|
|
Notes |
|
----- |
|
The SAGA solver supports both float64 and float32 bit arrays. |
|
""" |
|
solver = _check_solver(self.solver, self.penalty, self.dual) |
|
|
|
if self.penalty != "elasticnet" and self.l1_ratio is not None: |
|
warnings.warn( |
|
"l1_ratio parameter is only used when penalty is " |
|
"'elasticnet'. Got " |
|
"(penalty={})".format(self.penalty) |
|
) |
|
|
|
if self.penalty == "elasticnet" and self.l1_ratio is None: |
|
raise ValueError("l1_ratio must be specified when penalty is elasticnet.") |
|
|
|
if self.penalty is None: |
|
if self.C != 1.0: |
|
warnings.warn( |
|
"Setting penalty=None will ignore the C and l1_ratio parameters" |
|
) |
|
|
|
C_ = np.inf |
|
penalty = "l2" |
|
else: |
|
C_ = self.C |
|
penalty = self.penalty |
|
|
|
if solver == "lbfgs": |
|
_dtype = np.float64 |
|
else: |
|
_dtype = [np.float64, np.float32] |
|
|
|
X, y = validate_data( |
|
self, |
|
X, |
|
y, |
|
accept_sparse="csr", |
|
dtype=_dtype, |
|
order="C", |
|
accept_large_sparse=solver not in ["liblinear", "sag", "saga"], |
|
) |
|
check_classification_targets(y) |
|
self.classes_ = np.unique(y) |
|
|
|
|
|
multi_class = self.multi_class |
|
if self.multi_class == "multinomial" and len(self.classes_) == 2: |
|
warnings.warn( |
|
( |
|
"'multi_class' was deprecated in version 1.5 and will be removed in" |
|
" 1.7. From then on, binary problems will be fit as proper binary " |
|
" logistic regression models (as if multi_class='ovr' were set)." |
|
" Leave it to its default value to avoid this warning." |
|
), |
|
FutureWarning, |
|
) |
|
elif self.multi_class in ("multinomial", "auto"): |
|
warnings.warn( |
|
( |
|
"'multi_class' was deprecated in version 1.5 and will be removed in" |
|
" 1.7. From then on, it will always use 'multinomial'." |
|
" Leave it to its default value to avoid this warning." |
|
), |
|
FutureWarning, |
|
) |
|
elif self.multi_class == "ovr": |
|
warnings.warn( |
|
( |
|
"'multi_class' was deprecated in version 1.5 and will be removed in" |
|
" 1.7. Use OneVsRestClassifier(LogisticRegression(..)) instead." |
|
" Leave it to its default value to avoid this warning." |
|
), |
|
FutureWarning, |
|
) |
|
else: |
|
|
|
multi_class = "auto" |
|
multi_class = _check_multi_class(multi_class, solver, len(self.classes_)) |
|
|
|
if solver == "liblinear": |
|
if effective_n_jobs(self.n_jobs) != 1: |
|
warnings.warn( |
|
"'n_jobs' > 1 does not have any effect when" |
|
" 'solver' is set to 'liblinear'. Got 'n_jobs'" |
|
" = {}.".format(effective_n_jobs(self.n_jobs)) |
|
) |
|
self.coef_, self.intercept_, self.n_iter_ = _fit_liblinear( |
|
X, |
|
y, |
|
self.C, |
|
self.fit_intercept, |
|
self.intercept_scaling, |
|
self.class_weight, |
|
self.penalty, |
|
self.dual, |
|
self.verbose, |
|
self.max_iter, |
|
self.tol, |
|
self.random_state, |
|
sample_weight=sample_weight, |
|
) |
|
return self |
|
|
|
if solver in ["sag", "saga"]: |
|
max_squared_sum = row_norms(X, squared=True).max() |
|
else: |
|
max_squared_sum = None |
|
|
|
n_classes = len(self.classes_) |
|
classes_ = self.classes_ |
|
if n_classes < 2: |
|
raise ValueError( |
|
"This solver needs samples of at least 2 classes" |
|
" in the data, but the data contains only one" |
|
" class: %r" % classes_[0] |
|
) |
|
|
|
if len(self.classes_) == 2: |
|
n_classes = 1 |
|
classes_ = classes_[1:] |
|
|
|
if self.warm_start: |
|
warm_start_coef = getattr(self, "coef_", None) |
|
else: |
|
warm_start_coef = None |
|
if warm_start_coef is not None and self.fit_intercept: |
|
warm_start_coef = np.append( |
|
warm_start_coef, self.intercept_[:, np.newaxis], axis=1 |
|
) |
|
|
|
|
|
if multi_class == "multinomial": |
|
classes_ = [None] |
|
warm_start_coef = [warm_start_coef] |
|
if warm_start_coef is None: |
|
warm_start_coef = [None] * n_classes |
|
|
|
path_func = delayed(_logistic_regression_path) |
|
|
|
|
|
|
|
if solver in ["sag", "saga"]: |
|
prefer = "threads" |
|
else: |
|
prefer = "processes" |
|
|
|
|
|
|
|
|
|
if ( |
|
solver in ["lbfgs", "newton-cg", "newton-cholesky"] |
|
and len(classes_) == 1 |
|
and effective_n_jobs(self.n_jobs) == 1 |
|
): |
|
|
|
|
|
n_threads = 1 |
|
else: |
|
n_threads = 1 |
|
|
|
fold_coefs_ = Parallel(n_jobs=self.n_jobs, verbose=self.verbose, prefer=prefer)( |
|
path_func( |
|
X, |
|
y, |
|
pos_class=class_, |
|
Cs=[C_], |
|
l1_ratio=self.l1_ratio, |
|
fit_intercept=self.fit_intercept, |
|
tol=self.tol, |
|
verbose=self.verbose, |
|
solver=solver, |
|
multi_class=multi_class, |
|
max_iter=self.max_iter, |
|
class_weight=self.class_weight, |
|
check_input=False, |
|
random_state=self.random_state, |
|
coef=warm_start_coef_, |
|
penalty=penalty, |
|
max_squared_sum=max_squared_sum, |
|
sample_weight=sample_weight, |
|
n_threads=n_threads, |
|
) |
|
for class_, warm_start_coef_ in zip(classes_, warm_start_coef) |
|
) |
|
|
|
fold_coefs_, _, n_iter_ = zip(*fold_coefs_) |
|
self.n_iter_ = np.asarray(n_iter_, dtype=np.int32)[:, 0] |
|
|
|
n_features = X.shape[1] |
|
if multi_class == "multinomial": |
|
self.coef_ = fold_coefs_[0][0] |
|
else: |
|
self.coef_ = np.asarray(fold_coefs_) |
|
self.coef_ = self.coef_.reshape( |
|
n_classes, n_features + int(self.fit_intercept) |
|
) |
|
|
|
if self.fit_intercept: |
|
self.intercept_ = self.coef_[:, -1] |
|
self.coef_ = self.coef_[:, :-1] |
|
else: |
|
self.intercept_ = np.zeros(n_classes) |
|
|
|
return self |
|
|
|
def predict_proba(self, X): |
|
""" |
|
Probability estimates. |
|
|
|
The returned estimates for all classes are ordered by the |
|
label of classes. |
|
|
|
For a multi_class problem, if multi_class is set to be "multinomial" |
|
the softmax function is used to find the predicted probability of |
|
each class. |
|
Else use a one-vs-rest approach, i.e. calculate the probability |
|
of each class assuming it to be positive using the logistic function |
|
and normalize these values across all the classes. |
|
|
|
Parameters |
|
---------- |
|
X : array-like of shape (n_samples, n_features) |
|
Vector to be scored, where `n_samples` is the number of samples and |
|
`n_features` is the number of features. |
|
|
|
Returns |
|
------- |
|
T : array-like of shape (n_samples, n_classes) |
|
Returns the probability of the sample for each class in the model, |
|
where classes are ordered as they are in ``self.classes_``. |
|
""" |
|
check_is_fitted(self) |
|
|
|
ovr = self.multi_class in ["ovr", "warn"] or ( |
|
self.multi_class in ["auto", "deprecated"] |
|
and (self.classes_.size <= 2 or self.solver == "liblinear") |
|
) |
|
if ovr: |
|
return super()._predict_proba_lr(X) |
|
else: |
|
decision = self.decision_function(X) |
|
if decision.ndim == 1: |
|
|
|
|
|
decision_2d = np.c_[-decision, decision] |
|
else: |
|
decision_2d = decision |
|
return softmax(decision_2d, copy=False) |
|
|
|
def predict_log_proba(self, X): |
|
""" |
|
Predict logarithm of probability estimates. |
|
|
|
The returned estimates for all classes are ordered by the |
|
label of classes. |
|
|
|
Parameters |
|
---------- |
|
X : array-like of shape (n_samples, n_features) |
|
Vector to be scored, where `n_samples` is the number of samples and |
|
`n_features` is the number of features. |
|
|
|
Returns |
|
------- |
|
T : array-like of shape (n_samples, n_classes) |
|
Returns the log-probability of the sample for each class in the |
|
model, where classes are ordered as they are in ``self.classes_``. |
|
""" |
|
return np.log(self.predict_proba(X)) |
|
|
|
def __sklearn_tags__(self): |
|
tags = super().__sklearn_tags__() |
|
tags.input_tags.sparse = True |
|
return tags |
|
|
|
|
|
class LogisticRegressionCV(LogisticRegression, LinearClassifierMixin, BaseEstimator): |
|
"""Logistic Regression CV (aka logit, MaxEnt) classifier. |
|
|
|
See glossary entry for :term:`cross-validation estimator`. |
|
|
|
This class implements logistic regression using liblinear, newton-cg, sag |
|
or lbfgs optimizer. The newton-cg, sag and lbfgs solvers support only L2 |
|
regularization with primal formulation. The liblinear solver supports both |
|
L1 and L2 regularization, with a dual formulation only for the L2 penalty. |
|
Elastic-Net penalty is only supported by the saga solver. |
|
|
|
For the grid of `Cs` values and `l1_ratios` values, the best hyperparameter |
|
is selected by the cross-validator |
|
:class:`~sklearn.model_selection.StratifiedKFold`, but it can be changed |
|
using the :term:`cv` parameter. The 'newton-cg', 'sag', 'saga' and 'lbfgs' |
|
solvers can warm-start the coefficients (see :term:`Glossary<warm_start>`). |
|
|
|
Read more in the :ref:`User Guide <logistic_regression>`. |
|
|
|
Parameters |
|
---------- |
|
Cs : int or list of floats, default=10 |
|
Each of the values in Cs describes the inverse of regularization |
|
strength. If Cs is as an int, then a grid of Cs values are chosen |
|
in a logarithmic scale between 1e-4 and 1e4. |
|
Like in support vector machines, smaller values specify stronger |
|
regularization. |
|
|
|
fit_intercept : bool, default=True |
|
Specifies if a constant (a.k.a. bias or intercept) should be |
|
added to the decision function. |
|
|
|
cv : int or cross-validation generator, default=None |
|
The default cross-validation generator used is Stratified K-Folds. |
|
If an integer is provided, then it is the number of folds used. |
|
See the module :mod:`sklearn.model_selection` module for the |
|
list of possible cross-validation objects. |
|
|
|
.. versionchanged:: 0.22 |
|
``cv`` default value if None changed from 3-fold to 5-fold. |
|
|
|
dual : bool, default=False |
|
Dual (constrained) or primal (regularized, see also |
|
:ref:`this equation <regularized-logistic-loss>`) formulation. Dual formulation |
|
is only implemented for l2 penalty with liblinear solver. Prefer dual=False when |
|
n_samples > n_features. |
|
|
|
penalty : {'l1', 'l2', 'elasticnet'}, default='l2' |
|
Specify the norm of the penalty: |
|
|
|
- `'l2'`: add a L2 penalty term (used by default); |
|
- `'l1'`: add a L1 penalty term; |
|
- `'elasticnet'`: both L1 and L2 penalty terms are added. |
|
|
|
.. warning:: |
|
Some penalties may not work with some solvers. See the parameter |
|
`solver` below, to know the compatibility between the penalty and |
|
solver. |
|
|
|
scoring : str or callable, default=None |
|
A string (see :ref:`scoring_parameter`) or |
|
a scorer callable object / function with signature |
|
``scorer(estimator, X, y)``. For a list of scoring functions |
|
that can be used, look at :mod:`sklearn.metrics`. The |
|
default scoring option used is 'accuracy'. |
|
|
|
solver : {'lbfgs', 'liblinear', 'newton-cg', 'newton-cholesky', 'sag', 'saga'}, \ |
|
default='lbfgs' |
|
|
|
Algorithm to use in the optimization problem. Default is 'lbfgs'. |
|
To choose a solver, you might want to consider the following aspects: |
|
|
|
- For small datasets, 'liblinear' is a good choice, whereas 'sag' |
|
and 'saga' are faster for large ones; |
|
- For multiclass problems, all solvers except 'liblinear' minimize the full |
|
multinomial loss; |
|
- 'liblinear' might be slower in :class:`LogisticRegressionCV` |
|
because it does not handle warm-starting. |
|
- 'liblinear' can only handle binary classification by default. To apply a |
|
one-versus-rest scheme for the multiclass setting one can wrap it with the |
|
:class:`~sklearn.multiclass.OneVsRestClassifier`. |
|
- 'newton-cholesky' is a good choice for |
|
`n_samples` >> `n_features * n_classes`, especially with one-hot encoded |
|
categorical features with rare categories. Be aware that the memory usage |
|
of this solver has a quadratic dependency on `n_features * n_classes` |
|
because it explicitly computes the full Hessian matrix. |
|
|
|
.. warning:: |
|
The choice of the algorithm depends on the penalty chosen and on |
|
(multinomial) multiclass support: |
|
|
|
================= ============================== ====================== |
|
solver penalty multinomial multiclass |
|
================= ============================== ====================== |
|
'lbfgs' 'l2' yes |
|
'liblinear' 'l1', 'l2' no |
|
'newton-cg' 'l2' yes |
|
'newton-cholesky' 'l2', no |
|
'sag' 'l2', yes |
|
'saga' 'elasticnet', 'l1', 'l2' yes |
|
================= ============================== ====================== |
|
|
|
.. note:: |
|
'sag' and 'saga' fast convergence is only guaranteed on features |
|
with approximately the same scale. You can preprocess the data with |
|
a scaler from :mod:`sklearn.preprocessing`. |
|
|
|
.. versionadded:: 0.17 |
|
Stochastic Average Gradient descent solver. |
|
.. versionadded:: 0.19 |
|
SAGA solver. |
|
.. versionadded:: 1.2 |
|
newton-cholesky solver. |
|
|
|
tol : float, default=1e-4 |
|
Tolerance for stopping criteria. |
|
|
|
max_iter : int, default=100 |
|
Maximum number of iterations of the optimization algorithm. |
|
|
|
class_weight : dict or 'balanced', default=None |
|
Weights associated with classes in the form ``{class_label: weight}``. |
|
If not given, all classes are supposed to have weight one. |
|
|
|
The "balanced" mode uses the values of y to automatically adjust |
|
weights inversely proportional to class frequencies in the input data |
|
as ``n_samples / (n_classes * np.bincount(y))``. |
|
|
|
Note that these weights will be multiplied with sample_weight (passed |
|
through the fit method) if sample_weight is specified. |
|
|
|
.. versionadded:: 0.17 |
|
class_weight == 'balanced' |
|
|
|
n_jobs : int, default=None |
|
Number of CPU cores used during the cross-validation loop. |
|
``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. |
|
``-1`` means using all processors. See :term:`Glossary <n_jobs>` |
|
for more details. |
|
|
|
verbose : int, default=0 |
|
For the 'liblinear', 'sag' and 'lbfgs' solvers set verbose to any |
|
positive number for verbosity. |
|
|
|
refit : bool, default=True |
|
If set to True, the scores are averaged across all folds, and the |
|
coefs and the C that corresponds to the best score is taken, and a |
|
final refit is done using these parameters. |
|
Otherwise the coefs, intercepts and C that correspond to the |
|
best scores across folds are averaged. |
|
|
|
intercept_scaling : float, default=1 |
|
Useful only when the solver 'liblinear' is used |
|
and self.fit_intercept is set to True. In this case, x becomes |
|
[x, self.intercept_scaling], |
|
i.e. a "synthetic" feature with constant value equal to |
|
intercept_scaling is appended to the instance vector. |
|
The intercept becomes ``intercept_scaling * synthetic_feature_weight``. |
|
|
|
Note! the synthetic feature weight is subject to l1/l2 regularization |
|
as all other features. |
|
To lessen the effect of regularization on synthetic feature weight |
|
(and therefore on the intercept) intercept_scaling has to be increased. |
|
|
|
multi_class : {'auto, 'ovr', 'multinomial'}, default='auto' |
|
If the option chosen is 'ovr', then a binary problem is fit for each |
|
label. For 'multinomial' the loss minimised is the multinomial loss fit |
|
across the entire probability distribution, *even when the data is |
|
binary*. 'multinomial' is unavailable when solver='liblinear'. |
|
'auto' selects 'ovr' if the data is binary, or if solver='liblinear', |
|
and otherwise selects 'multinomial'. |
|
|
|
.. versionadded:: 0.18 |
|
Stochastic Average Gradient descent solver for 'multinomial' case. |
|
.. versionchanged:: 0.22 |
|
Default changed from 'ovr' to 'auto' in 0.22. |
|
.. deprecated:: 1.5 |
|
``multi_class`` was deprecated in version 1.5 and will be removed in 1.7. |
|
From then on, the recommended 'multinomial' will always be used for |
|
`n_classes >= 3`. |
|
Solvers that do not support 'multinomial' will raise an error. |
|
Use `sklearn.multiclass.OneVsRestClassifier(LogisticRegressionCV())` if you |
|
still want to use OvR. |
|
|
|
random_state : int, RandomState instance, default=None |
|
Used when `solver='sag'`, 'saga' or 'liblinear' to shuffle the data. |
|
Note that this only applies to the solver and not the cross-validation |
|
generator. See :term:`Glossary <random_state>` for details. |
|
|
|
l1_ratios : list of float, default=None |
|
The list of Elastic-Net mixing parameter, with ``0 <= l1_ratio <= 1``. |
|
Only used if ``penalty='elasticnet'``. A value of 0 is equivalent to |
|
using ``penalty='l2'``, while 1 is equivalent to using |
|
``penalty='l1'``. For ``0 < l1_ratio <1``, the penalty is a combination |
|
of L1 and L2. |
|
|
|
Attributes |
|
---------- |
|
classes_ : ndarray of shape (n_classes, ) |
|
A list of class labels known to the classifier. |
|
|
|
coef_ : ndarray of shape (1, n_features) or (n_classes, n_features) |
|
Coefficient of the features in the decision function. |
|
|
|
`coef_` is of shape (1, n_features) when the given problem |
|
is binary. |
|
|
|
intercept_ : ndarray of shape (1,) or (n_classes,) |
|
Intercept (a.k.a. bias) added to the decision function. |
|
|
|
If `fit_intercept` is set to False, the intercept is set to zero. |
|
`intercept_` is of shape(1,) when the problem is binary. |
|
|
|
Cs_ : ndarray of shape (n_cs) |
|
Array of C i.e. inverse of regularization parameter values used |
|
for cross-validation. |
|
|
|
l1_ratios_ : ndarray of shape (n_l1_ratios) |
|
Array of l1_ratios used for cross-validation. If no l1_ratio is used |
|
(i.e. penalty is not 'elasticnet'), this is set to ``[None]`` |
|
|
|
coefs_paths_ : ndarray of shape (n_folds, n_cs, n_features) or \ |
|
(n_folds, n_cs, n_features + 1) |
|
dict with classes as the keys, and the path of coefficients obtained |
|
during cross-validating across each fold and then across each Cs |
|
after doing an OvR for the corresponding class as values. |
|
If the 'multi_class' option is set to 'multinomial', then |
|
the coefs_paths are the coefficients corresponding to each class. |
|
Each dict value has shape ``(n_folds, n_cs, n_features)`` or |
|
``(n_folds, n_cs, n_features + 1)`` depending on whether the |
|
intercept is fit or not. If ``penalty='elasticnet'``, the shape is |
|
``(n_folds, n_cs, n_l1_ratios_, n_features)`` or |
|
``(n_folds, n_cs, n_l1_ratios_, n_features + 1)``. |
|
|
|
scores_ : dict |
|
dict with classes as the keys, and the values as the |
|
grid of scores obtained during cross-validating each fold, after doing |
|
an OvR for the corresponding class. If the 'multi_class' option |
|
given is 'multinomial' then the same scores are repeated across |
|
all classes, since this is the multinomial class. Each dict value |
|
has shape ``(n_folds, n_cs)`` or ``(n_folds, n_cs, n_l1_ratios)`` if |
|
``penalty='elasticnet'``. |
|
|
|
C_ : ndarray of shape (n_classes,) or (n_classes - 1,) |
|
Array of C that maps to the best scores across every class. If refit is |
|
set to False, then for each class, the best C is the average of the |
|
C's that correspond to the best scores for each fold. |
|
`C_` is of shape(n_classes,) when the problem is binary. |
|
|
|
l1_ratio_ : ndarray of shape (n_classes,) or (n_classes - 1,) |
|
Array of l1_ratio that maps to the best scores across every class. If |
|
refit is set to False, then for each class, the best l1_ratio is the |
|
average of the l1_ratio's that correspond to the best scores for each |
|
fold. `l1_ratio_` is of shape(n_classes,) when the problem is binary. |
|
|
|
n_iter_ : ndarray of shape (n_classes, n_folds, n_cs) or (1, n_folds, n_cs) |
|
Actual number of iterations for all classes, folds and Cs. |
|
In the binary or multinomial cases, the first dimension is equal to 1. |
|
If ``penalty='elasticnet'``, the shape is ``(n_classes, n_folds, |
|
n_cs, n_l1_ratios)`` or ``(1, n_folds, n_cs, n_l1_ratios)``. |
|
|
|
n_features_in_ : int |
|
Number of features seen during :term:`fit`. |
|
|
|
.. versionadded:: 0.24 |
|
|
|
feature_names_in_ : ndarray of shape (`n_features_in_`,) |
|
Names of features seen during :term:`fit`. Defined only when `X` |
|
has feature names that are all strings. |
|
|
|
.. versionadded:: 1.0 |
|
|
|
See Also |
|
-------- |
|
LogisticRegression : Logistic regression without tuning the |
|
hyperparameter `C`. |
|
|
|
Examples |
|
-------- |
|
>>> from sklearn.datasets import load_iris |
|
>>> from sklearn.linear_model import LogisticRegressionCV |
|
>>> X, y = load_iris(return_X_y=True) |
|
>>> clf = LogisticRegressionCV(cv=5, random_state=0).fit(X, y) |
|
>>> clf.predict(X[:2, :]) |
|
array([0, 0]) |
|
>>> clf.predict_proba(X[:2, :]).shape |
|
(2, 3) |
|
>>> clf.score(X, y) |
|
0.98... |
|
""" |
|
|
|
_parameter_constraints: dict = {**LogisticRegression._parameter_constraints} |
|
|
|
for param in ["C", "warm_start", "l1_ratio"]: |
|
_parameter_constraints.pop(param) |
|
|
|
_parameter_constraints.update( |
|
{ |
|
"Cs": [Interval(Integral, 1, None, closed="left"), "array-like"], |
|
"cv": ["cv_object"], |
|
"scoring": [StrOptions(set(get_scorer_names())), callable, None], |
|
"l1_ratios": ["array-like", None], |
|
"refit": ["boolean"], |
|
"penalty": [StrOptions({"l1", "l2", "elasticnet"})], |
|
} |
|
) |
|
|
|
def __init__( |
|
self, |
|
*, |
|
Cs=10, |
|
fit_intercept=True, |
|
cv=None, |
|
dual=False, |
|
penalty="l2", |
|
scoring=None, |
|
solver="lbfgs", |
|
tol=1e-4, |
|
max_iter=100, |
|
class_weight=None, |
|
n_jobs=None, |
|
verbose=0, |
|
refit=True, |
|
intercept_scaling=1.0, |
|
multi_class="deprecated", |
|
random_state=None, |
|
l1_ratios=None, |
|
): |
|
self.Cs = Cs |
|
self.fit_intercept = fit_intercept |
|
self.cv = cv |
|
self.dual = dual |
|
self.penalty = penalty |
|
self.scoring = scoring |
|
self.tol = tol |
|
self.max_iter = max_iter |
|
self.class_weight = class_weight |
|
self.n_jobs = n_jobs |
|
self.verbose = verbose |
|
self.solver = solver |
|
self.refit = refit |
|
self.intercept_scaling = intercept_scaling |
|
self.multi_class = multi_class |
|
self.random_state = random_state |
|
self.l1_ratios = l1_ratios |
|
|
|
@_fit_context(prefer_skip_nested_validation=True) |
|
def fit(self, X, y, sample_weight=None, **params): |
|
"""Fit the model according to the given training data. |
|
|
|
Parameters |
|
---------- |
|
X : {array-like, sparse matrix} of shape (n_samples, n_features) |
|
Training vector, where `n_samples` is the number of samples and |
|
`n_features` is the number of features. |
|
|
|
y : array-like of shape (n_samples,) |
|
Target vector relative to X. |
|
|
|
sample_weight : array-like of shape (n_samples,) default=None |
|
Array of weights that are assigned to individual samples. |
|
If not provided, then each sample is given unit weight. |
|
|
|
**params : dict |
|
Parameters to pass to the underlying splitter and scorer. |
|
|
|
.. versionadded:: 1.4 |
|
|
|
Returns |
|
------- |
|
self : object |
|
Fitted LogisticRegressionCV estimator. |
|
""" |
|
_raise_for_params(params, self, "fit") |
|
|
|
solver = _check_solver(self.solver, self.penalty, self.dual) |
|
|
|
if self.penalty == "elasticnet": |
|
if ( |
|
self.l1_ratios is None |
|
or len(self.l1_ratios) == 0 |
|
or any( |
|
( |
|
not isinstance(l1_ratio, numbers.Number) |
|
or l1_ratio < 0 |
|
or l1_ratio > 1 |
|
) |
|
for l1_ratio in self.l1_ratios |
|
) |
|
): |
|
raise ValueError( |
|
"l1_ratios must be a list of numbers between " |
|
"0 and 1; got (l1_ratios=%r)" % self.l1_ratios |
|
) |
|
l1_ratios_ = self.l1_ratios |
|
else: |
|
if self.l1_ratios is not None: |
|
warnings.warn( |
|
"l1_ratios parameter is only used when penalty " |
|
"is 'elasticnet'. Got (penalty={})".format(self.penalty) |
|
) |
|
|
|
l1_ratios_ = [None] |
|
|
|
X, y = validate_data( |
|
self, |
|
X, |
|
y, |
|
accept_sparse="csr", |
|
dtype=np.float64, |
|
order="C", |
|
accept_large_sparse=solver not in ["liblinear", "sag", "saga"], |
|
) |
|
check_classification_targets(y) |
|
|
|
class_weight = self.class_weight |
|
|
|
|
|
label_encoder = LabelEncoder().fit(y) |
|
y = label_encoder.transform(y) |
|
if isinstance(class_weight, dict): |
|
class_weight = { |
|
label_encoder.transform([cls])[0]: v for cls, v in class_weight.items() |
|
} |
|
|
|
|
|
classes = self.classes_ = label_encoder.classes_ |
|
encoded_labels = label_encoder.transform(label_encoder.classes_) |
|
|
|
|
|
multi_class = self.multi_class |
|
if self.multi_class == "multinomial" and len(self.classes_) == 2: |
|
warnings.warn( |
|
( |
|
"'multi_class' was deprecated in version 1.5 and will be removed in" |
|
" 1.7. From then on, binary problems will be fit as proper binary " |
|
" logistic regression models (as if multi_class='ovr' were set)." |
|
" Leave it to its default value to avoid this warning." |
|
), |
|
FutureWarning, |
|
) |
|
elif self.multi_class in ("multinomial", "auto"): |
|
warnings.warn( |
|
( |
|
"'multi_class' was deprecated in version 1.5 and will be removed in" |
|
" 1.7. From then on, it will always use 'multinomial'." |
|
" Leave it to its default value to avoid this warning." |
|
), |
|
FutureWarning, |
|
) |
|
elif self.multi_class == "ovr": |
|
warnings.warn( |
|
( |
|
"'multi_class' was deprecated in version 1.5 and will be removed in" |
|
" 1.7. Use OneVsRestClassifier(LogisticRegressionCV(..)) instead." |
|
" Leave it to its default value to avoid this warning." |
|
), |
|
FutureWarning, |
|
) |
|
else: |
|
|
|
multi_class = "auto" |
|
multi_class = _check_multi_class(multi_class, solver, len(classes)) |
|
|
|
if solver in ["sag", "saga"]: |
|
max_squared_sum = row_norms(X, squared=True).max() |
|
else: |
|
max_squared_sum = None |
|
|
|
if _routing_enabled(): |
|
routed_params = process_routing( |
|
self, |
|
"fit", |
|
sample_weight=sample_weight, |
|
**params, |
|
) |
|
else: |
|
routed_params = Bunch() |
|
routed_params.splitter = Bunch(split={}) |
|
routed_params.scorer = Bunch(score=params) |
|
if sample_weight is not None: |
|
routed_params.scorer.score["sample_weight"] = sample_weight |
|
|
|
|
|
cv = check_cv(self.cv, y, classifier=True) |
|
folds = list(cv.split(X, y, **routed_params.splitter.split)) |
|
|
|
|
|
n_classes = len(encoded_labels) |
|
|
|
if n_classes < 2: |
|
raise ValueError( |
|
"This solver needs samples of at least 2 classes" |
|
" in the data, but the data contains only one" |
|
" class: %r" % classes[0] |
|
) |
|
|
|
if n_classes == 2: |
|
|
|
|
|
n_classes = 1 |
|
encoded_labels = encoded_labels[1:] |
|
classes = classes[1:] |
|
|
|
|
|
|
|
if multi_class == "multinomial": |
|
iter_encoded_labels = iter_classes = [None] |
|
else: |
|
iter_encoded_labels = encoded_labels |
|
iter_classes = classes |
|
|
|
|
|
if class_weight == "balanced": |
|
class_weight = compute_class_weight( |
|
class_weight, classes=np.arange(len(self.classes_)), y=y |
|
) |
|
class_weight = dict(enumerate(class_weight)) |
|
|
|
path_func = delayed(_log_reg_scoring_path) |
|
|
|
|
|
|
|
if self.solver in ["sag", "saga"]: |
|
prefer = "threads" |
|
else: |
|
prefer = "processes" |
|
|
|
fold_coefs_ = Parallel(n_jobs=self.n_jobs, verbose=self.verbose, prefer=prefer)( |
|
path_func( |
|
X, |
|
y, |
|
train, |
|
test, |
|
pos_class=label, |
|
Cs=self.Cs, |
|
fit_intercept=self.fit_intercept, |
|
penalty=self.penalty, |
|
dual=self.dual, |
|
solver=solver, |
|
tol=self.tol, |
|
max_iter=self.max_iter, |
|
verbose=self.verbose, |
|
class_weight=class_weight, |
|
scoring=self.scoring, |
|
multi_class=multi_class, |
|
intercept_scaling=self.intercept_scaling, |
|
random_state=self.random_state, |
|
max_squared_sum=max_squared_sum, |
|
sample_weight=sample_weight, |
|
l1_ratio=l1_ratio, |
|
score_params=routed_params.scorer.score, |
|
) |
|
for label in iter_encoded_labels |
|
for train, test in folds |
|
for l1_ratio in l1_ratios_ |
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
coefs_paths, Cs, scores, n_iter_ = zip(*fold_coefs_) |
|
self.Cs_ = Cs[0] |
|
if multi_class == "multinomial": |
|
coefs_paths = np.reshape( |
|
coefs_paths, |
|
(len(folds), len(l1_ratios_) * len(self.Cs_), n_classes, -1), |
|
) |
|
|
|
|
|
coefs_paths = np.swapaxes(coefs_paths, 0, 1) |
|
coefs_paths = np.swapaxes(coefs_paths, 0, 2) |
|
self.n_iter_ = np.reshape( |
|
n_iter_, (1, len(folds), len(self.Cs_) * len(l1_ratios_)) |
|
) |
|
|
|
scores = np.tile(scores, (n_classes, 1, 1)) |
|
else: |
|
coefs_paths = np.reshape( |
|
coefs_paths, |
|
(n_classes, len(folds), len(self.Cs_) * len(l1_ratios_), -1), |
|
) |
|
self.n_iter_ = np.reshape( |
|
n_iter_, (n_classes, len(folds), len(self.Cs_) * len(l1_ratios_)) |
|
) |
|
scores = np.reshape(scores, (n_classes, len(folds), -1)) |
|
self.scores_ = dict(zip(classes, scores)) |
|
self.coefs_paths_ = dict(zip(classes, coefs_paths)) |
|
|
|
self.C_ = list() |
|
self.l1_ratio_ = list() |
|
self.coef_ = np.empty((n_classes, X.shape[1])) |
|
self.intercept_ = np.zeros(n_classes) |
|
for index, (cls, encoded_label) in enumerate( |
|
zip(iter_classes, iter_encoded_labels) |
|
): |
|
if multi_class == "ovr": |
|
scores = self.scores_[cls] |
|
coefs_paths = self.coefs_paths_[cls] |
|
else: |
|
|
|
scores = scores[0] |
|
|
|
|
|
|
|
if self.refit: |
|
|
|
|
|
|
|
|
|
|
|
best_index = scores.sum(axis=0).argmax() |
|
|
|
best_index_C = best_index % len(self.Cs_) |
|
C_ = self.Cs_[best_index_C] |
|
self.C_.append(C_) |
|
|
|
best_index_l1 = best_index // len(self.Cs_) |
|
l1_ratio_ = l1_ratios_[best_index_l1] |
|
self.l1_ratio_.append(l1_ratio_) |
|
|
|
if multi_class == "multinomial": |
|
coef_init = np.mean(coefs_paths[:, :, best_index, :], axis=1) |
|
else: |
|
coef_init = np.mean(coefs_paths[:, best_index, :], axis=0) |
|
|
|
|
|
|
|
w, _, _ = _logistic_regression_path( |
|
X, |
|
y, |
|
pos_class=encoded_label, |
|
Cs=[C_], |
|
solver=solver, |
|
fit_intercept=self.fit_intercept, |
|
coef=coef_init, |
|
max_iter=self.max_iter, |
|
tol=self.tol, |
|
penalty=self.penalty, |
|
class_weight=class_weight, |
|
multi_class=multi_class, |
|
verbose=max(0, self.verbose - 1), |
|
random_state=self.random_state, |
|
check_input=False, |
|
max_squared_sum=max_squared_sum, |
|
sample_weight=sample_weight, |
|
l1_ratio=l1_ratio_, |
|
) |
|
w = w[0] |
|
|
|
else: |
|
|
|
|
|
best_indices = np.argmax(scores, axis=1) |
|
if multi_class == "ovr": |
|
w = np.mean( |
|
[coefs_paths[i, best_indices[i], :] for i in range(len(folds))], |
|
axis=0, |
|
) |
|
else: |
|
w = np.mean( |
|
[ |
|
coefs_paths[:, i, best_indices[i], :] |
|
for i in range(len(folds)) |
|
], |
|
axis=0, |
|
) |
|
|
|
best_indices_C = best_indices % len(self.Cs_) |
|
self.C_.append(np.mean(self.Cs_[best_indices_C])) |
|
|
|
if self.penalty == "elasticnet": |
|
best_indices_l1 = best_indices // len(self.Cs_) |
|
self.l1_ratio_.append(np.mean(l1_ratios_[best_indices_l1])) |
|
else: |
|
self.l1_ratio_.append(None) |
|
|
|
if multi_class == "multinomial": |
|
self.C_ = np.tile(self.C_, n_classes) |
|
self.l1_ratio_ = np.tile(self.l1_ratio_, n_classes) |
|
self.coef_ = w[:, : X.shape[1]] |
|
if self.fit_intercept: |
|
self.intercept_ = w[:, -1] |
|
else: |
|
self.coef_[index] = w[: X.shape[1]] |
|
if self.fit_intercept: |
|
self.intercept_[index] = w[-1] |
|
|
|
self.C_ = np.asarray(self.C_) |
|
self.l1_ratio_ = np.asarray(self.l1_ratio_) |
|
self.l1_ratios_ = np.asarray(l1_ratios_) |
|
|
|
|
|
if self.l1_ratios is not None: |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for cls, coefs_path in self.coefs_paths_.items(): |
|
self.coefs_paths_[cls] = coefs_path.reshape( |
|
(len(folds), self.l1_ratios_.size, self.Cs_.size, -1) |
|
) |
|
self.coefs_paths_[cls] = np.transpose( |
|
self.coefs_paths_[cls], (0, 2, 1, 3) |
|
) |
|
for cls, score in self.scores_.items(): |
|
self.scores_[cls] = score.reshape( |
|
(len(folds), self.l1_ratios_.size, self.Cs_.size) |
|
) |
|
self.scores_[cls] = np.transpose(self.scores_[cls], (0, 2, 1)) |
|
|
|
self.n_iter_ = self.n_iter_.reshape( |
|
(-1, len(folds), self.l1_ratios_.size, self.Cs_.size) |
|
) |
|
self.n_iter_ = np.transpose(self.n_iter_, (0, 1, 3, 2)) |
|
|
|
return self |
|
|
|
def score(self, X, y, sample_weight=None, **score_params): |
|
"""Score using the `scoring` option on the given test data and labels. |
|
|
|
Parameters |
|
---------- |
|
X : array-like of shape (n_samples, n_features) |
|
Test samples. |
|
|
|
y : array-like of shape (n_samples,) |
|
True labels for X. |
|
|
|
sample_weight : array-like of shape (n_samples,), default=None |
|
Sample weights. |
|
|
|
**score_params : dict |
|
Parameters to pass to the `score` method of the underlying scorer. |
|
|
|
.. versionadded:: 1.4 |
|
|
|
Returns |
|
------- |
|
score : float |
|
Score of self.predict(X) w.r.t. y. |
|
""" |
|
_raise_for_params(score_params, self, "score") |
|
|
|
scoring = self._get_scorer() |
|
if _routing_enabled(): |
|
routed_params = process_routing( |
|
self, |
|
"score", |
|
sample_weight=sample_weight, |
|
**score_params, |
|
) |
|
else: |
|
routed_params = Bunch() |
|
routed_params.scorer = Bunch(score={}) |
|
if sample_weight is not None: |
|
routed_params.scorer.score["sample_weight"] = sample_weight |
|
|
|
return scoring( |
|
self, |
|
X, |
|
y, |
|
**routed_params.scorer.score, |
|
) |
|
|
|
def get_metadata_routing(self): |
|
"""Get metadata routing of this object. |
|
|
|
Please check :ref:`User Guide <metadata_routing>` on how the routing |
|
mechanism works. |
|
|
|
.. versionadded:: 1.4 |
|
|
|
Returns |
|
------- |
|
routing : MetadataRouter |
|
A :class:`~sklearn.utils.metadata_routing.MetadataRouter` encapsulating |
|
routing information. |
|
""" |
|
|
|
router = ( |
|
MetadataRouter(owner=self.__class__.__name__) |
|
.add_self_request(self) |
|
.add( |
|
splitter=self.cv, |
|
method_mapping=MethodMapping().add(caller="fit", callee="split"), |
|
) |
|
.add( |
|
scorer=self._get_scorer(), |
|
method_mapping=MethodMapping() |
|
.add(caller="score", callee="score") |
|
.add(caller="fit", callee="score"), |
|
) |
|
) |
|
return router |
|
|
|
def _get_scorer(self): |
|
"""Get the scorer based on the scoring method specified. |
|
The default scoring method is `accuracy`. |
|
""" |
|
scoring = self.scoring or "accuracy" |
|
return get_scorer(scoring) |
|
|
|
def __sklearn_tags__(self): |
|
tags = super().__sklearn_tags__() |
|
tags.input_tags.sparse = True |
|
return tags |
|
|