""" The code for ExponentiatedGradientReduction wraps the source class fairlearn.reductions.ExponentiatedGradient available in the https://github.com/fairlearn/fairlearn library licensed under the MIT Licencse, Copyright Microsoft Corporation """ try: import fairlearn.reductions as red except ImportError as error: from logging import warning warning("{}: ExponentiatedGradientReduction will be unavailable. To install, run:\n" "pip install 'aif360[Reductions]'".format(error)) from sklearn.base import BaseEstimator, ClassifierMixin, clone from sklearn.preprocessing import LabelEncoder class ExponentiatedGradientReduction(BaseEstimator, ClassifierMixin): """Exponentiated gradient reduction for fair classification. Exponentiated gradient reduction is an in-processing technique that reduces fair classification to a sequence of cost-sensitive classification problems, returning a randomized classifier with the lowest empirical error subject to fair classification constraints [#agarwal18]_. References: .. [#agarwal18] `A. Agarwal, A. Beygelzimer, M. Dudik, J. Langford, and H. Wallach, "A Reductions Approach to Fair Classification," International Conference on Machine Learning, 2018. `_ """ def __init__(self, prot_attr, estimator, constraints, eps=0.01, max_iter=50, nu=None, eta0=2.0, run_linprog_step=True, drop_prot_attr=True): """ Args: prot_attr: String or array-like column indices or column names of protected attributes. estimator: An estimator implementing methods ``fit(X, y, sample_weight)`` and ``predict(X)``, where ``X`` is the matrix of features, ``y`` is the vector of labels, and ``sample_weight`` is a vector of weights; labels ``y`` and predictions returned by ``predict(X)`` are either 0 or 1 -- e.g. scikit-learn classifiers. constraints (str or fairlearn.reductions.Moment): If string, keyword denoting the :class:`fairlearn.reductions.Moment` object defining the disparity constraints -- e.g., "DemographicParity" or "EqualizedOdds". For a full list of possible options see `self.model.moments`. Otherwise, provide the desired :class:`~fairlearn.reductions.Moment` object defining the disparity constraints. eps: Allowed fairness constraint violation; the solution is guaranteed to have the error within ``2*best_gap`` of the best error under constraint eps; the constraint violation is at most ``2*(eps+best_gap)``. max_iter: Maximum number of iterations. nu: Convergence threshold for the duality gap, corresponding to a conservative automatic setting based on the statistical uncertainty in measuring classification error. eta0: Initial setting of the learning rate. run_linprog_step: If True each step of exponentiated gradient is followed by the saddle point optimization over the convex hull of classifiers returned so far. drop_prot_attr: Boolean flag indicating whether to drop protected attributes from training data. """ self.prot_attr = prot_attr self.estimator = estimator self.constraints = constraints self.eps = eps self.max_iter = max_iter self.nu = nu self.eta0 = eta0 self.run_linprog_step = run_linprog_step self.drop_prot_attr = drop_prot_attr def fit(self, X, y): """Learns randomized model with less bias Args: X (pandas.DataFrame): Training samples. y (array-like): Training labels. Returns: self """ self.estimator_ = clone(self.estimator) moments = { "DemographicParity": red.DemographicParity, "EqualizedOdds": red.EqualizedOdds, "TruePositiveRateParity": red.TruePositiveRateParity, "FalsePositiveRateParity": red.FalsePositiveRateParity, "ErrorRateParity": red.ErrorRateParity, } if isinstance(self.constraints, str): if self.constraints not in moments: raise ValueError(f"Constraint not recognized: {self.constraints}") self.moment_ = moments[self.constraints]() elif isinstance(self.constraints, red.Moment): self.moment_ = self.constraints else: raise ValueError("constraints must be a string or Moment object.") self.model_ = red.ExponentiatedGradient(self.estimator_, self.moment_, eps=self.eps, max_iter=self.max_iter, nu=self.nu, eta0=self.eta0) A = X[self.prot_attr] if self.drop_prot_attr: X = X.drop(self.prot_attr, axis=1) le = LabelEncoder() y = le.fit_transform(y) self.classes_ = le.classes_ self.model_.fit(X, y, sensitive_features=A) return self def predict(self, X): """Predict class labels for the given samples. Args: X (pandas.DataFrame): Test samples. Returns: numpy.ndarray: Predicted class label per sample. """ if self.drop_prot_attr: X = X.drop(self.prot_attr, axis=1) return self.classes_[self.model_.predict(X)] def predict_proba(self, X): """Probability estimates. The returned estimates for all classes are ordered by the label of classes. Args: X (pandas.DataFrame): Test samples. Returns: numpy.ndarray: Returns the probability of the sample for each class in the model, where classes are ordered as they are in ``self.classes_``. """ if self.drop_prot_attr: X = X.drop(self.prot_attr, axis=1) return self.model_._pmf_predict(X)