FairUP / src /aif360 /sklearn /inprocessing /exponentiated_gradient_reduction.py
erasmopurif's picture
First commit
d2a8669
"""
The code for ExponentiatedGradientReduction wraps the source class
fairlearn.reductions.ExponentiatedGradient
available in the https://github.com/fairlearn/fairlearn library
licensed under the MIT Licencse, Copyright Microsoft Corporation
"""
try:
import fairlearn.reductions as red
except ImportError as error:
from logging import warning
warning("{}: ExponentiatedGradientReduction will be unavailable. To install, run:\n"
"pip install 'aif360[Reductions]'".format(error))
from sklearn.base import BaseEstimator, ClassifierMixin, clone
from sklearn.preprocessing import LabelEncoder
class ExponentiatedGradientReduction(BaseEstimator, ClassifierMixin):
"""Exponentiated gradient reduction for fair classification.
Exponentiated gradient reduction is an in-processing technique that reduces
fair classification to a sequence of cost-sensitive classification problems,
returning a randomized classifier with the lowest empirical error subject to
fair classification constraints [#agarwal18]_.
References:
.. [#agarwal18] `A. Agarwal, A. Beygelzimer, M. Dudik, J. Langford, and
H. Wallach, "A Reductions Approach to Fair Classification,"
International Conference on Machine Learning, 2018.
<https://arxiv.org/abs/1803.02453>`_
"""
def __init__(self,
prot_attr,
estimator,
constraints,
eps=0.01,
max_iter=50,
nu=None,
eta0=2.0,
run_linprog_step=True,
drop_prot_attr=True):
"""
Args:
prot_attr: String or array-like column indices or column names of
protected attributes.
estimator: An estimator implementing methods
``fit(X, y, sample_weight)`` and ``predict(X)``, where ``X`` is
the matrix of features, ``y`` is the vector of labels, and
``sample_weight`` is a vector of weights; labels ``y`` and
predictions returned by ``predict(X)`` are either 0 or 1 -- e.g.
scikit-learn classifiers.
constraints (str or fairlearn.reductions.Moment): If string, keyword
denoting the :class:`fairlearn.reductions.Moment` object
defining the disparity constraints -- e.g., "DemographicParity"
or "EqualizedOdds". For a full list of possible options see
`self.model.moments`. Otherwise, provide the desired
:class:`~fairlearn.reductions.Moment` object defining the
disparity constraints.
eps: Allowed fairness constraint violation; the solution is
guaranteed to have the error within ``2*best_gap`` of the best
error under constraint eps; the constraint violation is at most
``2*(eps+best_gap)``.
max_iter: Maximum number of iterations.
nu: Convergence threshold for the duality gap, corresponding to a
conservative automatic setting based on the statistical
uncertainty in measuring classification error.
eta0: Initial setting of the learning rate.
run_linprog_step: If True each step of exponentiated gradient is
followed by the saddle point optimization over the convex hull
of classifiers returned so far.
drop_prot_attr: Boolean flag indicating whether to drop protected
attributes from training data.
"""
self.prot_attr = prot_attr
self.estimator = estimator
self.constraints = constraints
self.eps = eps
self.max_iter = max_iter
self.nu = nu
self.eta0 = eta0
self.run_linprog_step = run_linprog_step
self.drop_prot_attr = drop_prot_attr
def fit(self, X, y):
"""Learns randomized model with less bias
Args:
X (pandas.DataFrame): Training samples.
y (array-like): Training labels.
Returns:
self
"""
self.estimator_ = clone(self.estimator)
moments = {
"DemographicParity": red.DemographicParity,
"EqualizedOdds": red.EqualizedOdds,
"TruePositiveRateParity": red.TruePositiveRateParity,
"FalsePositiveRateParity": red.FalsePositiveRateParity,
"ErrorRateParity": red.ErrorRateParity,
}
if isinstance(self.constraints, str):
if self.constraints not in moments:
raise ValueError(f"Constraint not recognized: {self.constraints}")
self.moment_ = moments[self.constraints]()
elif isinstance(self.constraints, red.Moment):
self.moment_ = self.constraints
else:
raise ValueError("constraints must be a string or Moment object.")
self.model_ = red.ExponentiatedGradient(self.estimator_, self.moment_,
eps=self.eps, max_iter=self.max_iter, nu=self.nu, eta0=self.eta0)
A = X[self.prot_attr]
if self.drop_prot_attr:
X = X.drop(self.prot_attr, axis=1)
le = LabelEncoder()
y = le.fit_transform(y)
self.classes_ = le.classes_
self.model_.fit(X, y, sensitive_features=A)
return self
def predict(self, X):
"""Predict class labels for the given samples.
Args:
X (pandas.DataFrame): Test samples.
Returns:
numpy.ndarray: Predicted class label per sample.
"""
if self.drop_prot_attr:
X = X.drop(self.prot_attr, axis=1)
return self.classes_[self.model_.predict(X)]
def predict_proba(self, X):
"""Probability estimates.
The returned estimates for all classes are ordered by the label of
classes.
Args:
X (pandas.DataFrame): Test samples.
Returns:
numpy.ndarray: Returns the probability of the sample for each class
in the model, where classes are ordered as they are in
``self.classes_``.
"""
if self.drop_prot_attr:
X = X.drop(self.prot_attr, axis=1)
return self.model_._pmf_predict(X)