Spaces:
Runtime error
Runtime error
File size: 6,304 Bytes
d2a8669 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 |
"""
The code for ExponentiatedGradientReduction wraps the source class
fairlearn.reductions.ExponentiatedGradient
available in the https://github.com/fairlearn/fairlearn library
licensed under the MIT Licencse, Copyright Microsoft Corporation
"""
try:
import fairlearn.reductions as red
except ImportError as error:
from logging import warning
warning("{}: ExponentiatedGradientReduction will be unavailable. To install, run:\n"
"pip install 'aif360[Reductions]'".format(error))
from sklearn.base import BaseEstimator, ClassifierMixin, clone
from sklearn.preprocessing import LabelEncoder
class ExponentiatedGradientReduction(BaseEstimator, ClassifierMixin):
"""Exponentiated gradient reduction for fair classification.
Exponentiated gradient reduction is an in-processing technique that reduces
fair classification to a sequence of cost-sensitive classification problems,
returning a randomized classifier with the lowest empirical error subject to
fair classification constraints [#agarwal18]_.
References:
.. [#agarwal18] `A. Agarwal, A. Beygelzimer, M. Dudik, J. Langford, and
H. Wallach, "A Reductions Approach to Fair Classification,"
International Conference on Machine Learning, 2018.
<https://arxiv.org/abs/1803.02453>`_
"""
def __init__(self,
prot_attr,
estimator,
constraints,
eps=0.01,
max_iter=50,
nu=None,
eta0=2.0,
run_linprog_step=True,
drop_prot_attr=True):
"""
Args:
prot_attr: String or array-like column indices or column names of
protected attributes.
estimator: An estimator implementing methods
``fit(X, y, sample_weight)`` and ``predict(X)``, where ``X`` is
the matrix of features, ``y`` is the vector of labels, and
``sample_weight`` is a vector of weights; labels ``y`` and
predictions returned by ``predict(X)`` are either 0 or 1 -- e.g.
scikit-learn classifiers.
constraints (str or fairlearn.reductions.Moment): If string, keyword
denoting the :class:`fairlearn.reductions.Moment` object
defining the disparity constraints -- e.g., "DemographicParity"
or "EqualizedOdds". For a full list of possible options see
`self.model.moments`. Otherwise, provide the desired
:class:`~fairlearn.reductions.Moment` object defining the
disparity constraints.
eps: Allowed fairness constraint violation; the solution is
guaranteed to have the error within ``2*best_gap`` of the best
error under constraint eps; the constraint violation is at most
``2*(eps+best_gap)``.
max_iter: Maximum number of iterations.
nu: Convergence threshold for the duality gap, corresponding to a
conservative automatic setting based on the statistical
uncertainty in measuring classification error.
eta0: Initial setting of the learning rate.
run_linprog_step: If True each step of exponentiated gradient is
followed by the saddle point optimization over the convex hull
of classifiers returned so far.
drop_prot_attr: Boolean flag indicating whether to drop protected
attributes from training data.
"""
self.prot_attr = prot_attr
self.estimator = estimator
self.constraints = constraints
self.eps = eps
self.max_iter = max_iter
self.nu = nu
self.eta0 = eta0
self.run_linprog_step = run_linprog_step
self.drop_prot_attr = drop_prot_attr
def fit(self, X, y):
"""Learns randomized model with less bias
Args:
X (pandas.DataFrame): Training samples.
y (array-like): Training labels.
Returns:
self
"""
self.estimator_ = clone(self.estimator)
moments = {
"DemographicParity": red.DemographicParity,
"EqualizedOdds": red.EqualizedOdds,
"TruePositiveRateParity": red.TruePositiveRateParity,
"FalsePositiveRateParity": red.FalsePositiveRateParity,
"ErrorRateParity": red.ErrorRateParity,
}
if isinstance(self.constraints, str):
if self.constraints not in moments:
raise ValueError(f"Constraint not recognized: {self.constraints}")
self.moment_ = moments[self.constraints]()
elif isinstance(self.constraints, red.Moment):
self.moment_ = self.constraints
else:
raise ValueError("constraints must be a string or Moment object.")
self.model_ = red.ExponentiatedGradient(self.estimator_, self.moment_,
eps=self.eps, max_iter=self.max_iter, nu=self.nu, eta0=self.eta0)
A = X[self.prot_attr]
if self.drop_prot_attr:
X = X.drop(self.prot_attr, axis=1)
le = LabelEncoder()
y = le.fit_transform(y)
self.classes_ = le.classes_
self.model_.fit(X, y, sensitive_features=A)
return self
def predict(self, X):
"""Predict class labels for the given samples.
Args:
X (pandas.DataFrame): Test samples.
Returns:
numpy.ndarray: Predicted class label per sample.
"""
if self.drop_prot_attr:
X = X.drop(self.prot_attr, axis=1)
return self.classes_[self.model_.predict(X)]
def predict_proba(self, X):
"""Probability estimates.
The returned estimates for all classes are ordered by the label of
classes.
Args:
X (pandas.DataFrame): Test samples.
Returns:
numpy.ndarray: Returns the probability of the sample for each class
in the model, where classes are ordered as they are in
``self.classes_``.
"""
if self.drop_prot_attr:
X = X.drop(self.prot_attr, axis=1)
return self.model_._pmf_predict(X)
|