Spaces:

erasmopurif
/

FairUP

Runtime error

File size: 6,304 Bytes

d2a8669

"""
The code for ExponentiatedGradientReduction wraps the source class
fairlearn.reductions.ExponentiatedGradient
available in the https://github.com/fairlearn/fairlearn library
licensed under the MIT Licencse, Copyright Microsoft Corporation
"""
try:
    import fairlearn.reductions as red
except ImportError as error:
    from logging import warning
    warning("{}: ExponentiatedGradientReduction will be unavailable. To install, run:\n"
            "pip install 'aif360[Reductions]'".format(error))
from sklearn.base import BaseEstimator, ClassifierMixin, clone
from sklearn.preprocessing import LabelEncoder


class ExponentiatedGradientReduction(BaseEstimator, ClassifierMixin):
    """Exponentiated gradient reduction for fair classification.

    Exponentiated gradient reduction is an in-processing technique that reduces
    fair classification to a sequence of cost-sensitive classification problems,
    returning a randomized classifier with the lowest empirical error subject to
    fair classification constraints [#agarwal18]_.

    References:
        .. [#agarwal18] `A. Agarwal, A. Beygelzimer, M. Dudik, J. Langford, and
           H. Wallach, "A Reductions Approach to Fair Classification,"
           International Conference on Machine Learning, 2018.
           <https://arxiv.org/abs/1803.02453>`_
    """
    def __init__(self,
                 prot_attr,
                 estimator,
                 constraints,
                 eps=0.01,
                 max_iter=50,
                 nu=None,
                 eta0=2.0,
                 run_linprog_step=True,
                 drop_prot_attr=True):
        """
        Args:
            prot_attr: String or array-like column indices or column names of
                protected attributes.
            estimator: An estimator implementing methods
                ``fit(X, y, sample_weight)`` and ``predict(X)``, where ``X`` is
                the matrix of features, ``y`` is the vector of labels, and
                ``sample_weight`` is a vector of weights; labels ``y`` and
                predictions returned by ``predict(X)`` are either 0 or 1 -- e.g.
                scikit-learn classifiers.
            constraints (str or fairlearn.reductions.Moment): If string, keyword
                denoting the :class:`fairlearn.reductions.Moment` object
                defining the disparity constraints -- e.g., "DemographicParity"
                or "EqualizedOdds". For a full list of possible options see
                `self.model.moments`. Otherwise, provide the desired
                :class:`~fairlearn.reductions.Moment` object defining the
                disparity constraints.
            eps: Allowed fairness constraint violation; the solution is
                guaranteed to have the error within ``2*best_gap`` of the best
                error under constraint eps; the constraint violation is at most
                ``2*(eps+best_gap)``.
            max_iter: Maximum number of iterations.
            nu: Convergence threshold for the duality gap, corresponding to a
                conservative automatic setting based on the statistical
                uncertainty in measuring classification error.
            eta0: Initial setting of the learning rate.
            run_linprog_step: If True each step of exponentiated gradient is
                followed by the saddle point optimization over the convex hull
                of classifiers returned so far.
            drop_prot_attr: Boolean flag indicating whether to drop protected
                attributes from training data.
        """
        self.prot_attr = prot_attr
        self.estimator = estimator
        self.constraints = constraints
        self.eps = eps
        self.max_iter = max_iter
        self.nu = nu
        self.eta0 = eta0
        self.run_linprog_step = run_linprog_step
        self.drop_prot_attr = drop_prot_attr

    def fit(self, X, y):
        """Learns randomized model with less bias

        Args:
            X (pandas.DataFrame): Training samples.
            y (array-like): Training labels.

        Returns:
            self
        """
        self.estimator_ = clone(self.estimator)

        moments = {
            "DemographicParity": red.DemographicParity,
            "EqualizedOdds": red.EqualizedOdds,
            "TruePositiveRateParity": red.TruePositiveRateParity,
            "FalsePositiveRateParity": red.FalsePositiveRateParity,
            "ErrorRateParity": red.ErrorRateParity,
        }
        if isinstance(self.constraints, str):
            if self.constraints not in moments:
                raise ValueError(f"Constraint not recognized: {self.constraints}")
            self.moment_ = moments[self.constraints]()
        elif isinstance(self.constraints, red.Moment):
            self.moment_ = self.constraints
        else:
            raise ValueError("constraints must be a string or Moment object.")

        self.model_ = red.ExponentiatedGradient(self.estimator_, self.moment_,
                eps=self.eps, max_iter=self.max_iter, nu=self.nu, eta0=self.eta0)

        A = X[self.prot_attr]

        if self.drop_prot_attr:
            X = X.drop(self.prot_attr, axis=1)

        le = LabelEncoder()
        y = le.fit_transform(y)
        self.classes_ = le.classes_

        self.model_.fit(X, y, sensitive_features=A)

        return self


    def predict(self, X):
        """Predict class labels for the given samples.
        Args:
            X (pandas.DataFrame): Test samples.
        Returns:
            numpy.ndarray: Predicted class label per sample.
        """
        if self.drop_prot_attr:
            X = X.drop(self.prot_attr, axis=1)

        return self.classes_[self.model_.predict(X)]


    def predict_proba(self, X):
        """Probability estimates.

        The returned estimates for all classes are ordered by the label of
        classes.

        Args:
            X (pandas.DataFrame): Test samples.

        Returns:
            numpy.ndarray: Returns the probability of the sample for each class
            in the model, where classes are ordered as they are in
            ``self.classes_``.
        """
        if self.drop_prot_attr:
            X = X.drop(self.prot_attr, axis=1)

        return self.model_._pmf_predict(X)