Spaces:

erasmopurif
/

FairUP

Runtime error

App Files Files Community

FairUP / src /aif360 /sklearn /inprocessing /exponentiated_gradient_reduction.py

erasmopurif

First commit

d2a8669 about 2 years ago

raw

history blame contribute delete

6.3 kB

	"""
	The code for ExponentiatedGradientReduction wraps the source class
	fairlearn.reductions.ExponentiatedGradient
	available in the https://github.com/fairlearn/fairlearn library
	licensed under the MIT Licencse, Copyright Microsoft Corporation
	"""
	try:
	import fairlearn.reductions as red
	except ImportError as error:
	from logging import warning
	warning("{}: ExponentiatedGradientReduction will be unavailable. To install, run:\n"
	"pip install 'aif360[Reductions]'".format(error))
	from sklearn.base import BaseEstimator, ClassifierMixin, clone
	from sklearn.preprocessing import LabelEncoder


	class ExponentiatedGradientReduction(BaseEstimator, ClassifierMixin):
	"""Exponentiated gradient reduction for fair classification.

	Exponentiated gradient reduction is an in-processing technique that reduces
	fair classification to a sequence of cost-sensitive classification problems,
	returning a randomized classifier with the lowest empirical error subject to
	fair classification constraints [#agarwal18]_.

	References:
	.. [#agarwal18] `A. Agarwal, A. Beygelzimer, M. Dudik, J. Langford, and
	H. Wallach, "A Reductions Approach to Fair Classification,"
	International Conference on Machine Learning, 2018.
	<https://arxiv.org/abs/1803.02453>`_
	"""
	def __init__(self,
	prot_attr,
	estimator,
	constraints,
	eps=0.01,
	max_iter=50,
	nu=None,
	eta0=2.0,
	run_linprog_step=True,
	drop_prot_attr=True):
	"""
	Args:
	prot_attr: String or array-like column indices or column names of
	protected attributes.
	estimator: An estimator implementing methods
	``fit(X, y, sample_weight)`` and ``predict(X)``, where ``X`` is
	the matrix of features, ``y`` is the vector of labels, and
	``sample_weight`` is a vector of weights; labels ``y`` and
	predictions returned by ``predict(X)`` are either 0 or 1 -- e.g.
	scikit-learn classifiers.
	constraints (str or fairlearn.reductions.Moment): If string, keyword
	denoting the :class:`fairlearn.reductions.Moment` object
	defining the disparity constraints -- e.g., "DemographicParity"
	or "EqualizedOdds". For a full list of possible options see
	`self.model.moments`. Otherwise, provide the desired
	:class:`~fairlearn.reductions.Moment` object defining the
	disparity constraints.
	eps: Allowed fairness constraint violation; the solution is
	guaranteed to have the error within ``2*best_gap`` of the best
	error under constraint eps; the constraint violation is at most
	``2*(eps+best_gap)``.
	max_iter: Maximum number of iterations.
	nu: Convergence threshold for the duality gap, corresponding to a
	conservative automatic setting based on the statistical
	uncertainty in measuring classification error.
	eta0: Initial setting of the learning rate.
	run_linprog_step: If True each step of exponentiated gradient is
	followed by the saddle point optimization over the convex hull
	of classifiers returned so far.
	drop_prot_attr: Boolean flag indicating whether to drop protected
	attributes from training data.
	"""
	self.prot_attr = prot_attr
	self.estimator = estimator
	self.constraints = constraints
	self.eps = eps
	self.max_iter = max_iter
	self.nu = nu
	self.eta0 = eta0
	self.run_linprog_step = run_linprog_step
	self.drop_prot_attr = drop_prot_attr

	def fit(self, X, y):
	"""Learns randomized model with less bias

	Args:
	X (pandas.DataFrame): Training samples.
	y (array-like): Training labels.

	Returns:
	self
	"""
	self.estimator_ = clone(self.estimator)

	moments = {
	"DemographicParity": red.DemographicParity,
	"EqualizedOdds": red.EqualizedOdds,
	"TruePositiveRateParity": red.TruePositiveRateParity,
	"FalsePositiveRateParity": red.FalsePositiveRateParity,
	"ErrorRateParity": red.ErrorRateParity,
	}
	if isinstance(self.constraints, str):
	if self.constraints not in moments:
	raise ValueError(f"Constraint not recognized: {self.constraints}")
	self.moment_ = moments[self.constraints]()
	elif isinstance(self.constraints, red.Moment):
	self.moment_ = self.constraints
	else:
	raise ValueError("constraints must be a string or Moment object.")

	self.model_ = red.ExponentiatedGradient(self.estimator_, self.moment_,
	eps=self.eps, max_iter=self.max_iter, nu=self.nu, eta0=self.eta0)

	A = X[self.prot_attr]

	if self.drop_prot_attr:
	X = X.drop(self.prot_attr, axis=1)

	le = LabelEncoder()
	y = le.fit_transform(y)
	self.classes_ = le.classes_

	self.model_.fit(X, y, sensitive_features=A)

	return self


	def predict(self, X):
	"""Predict class labels for the given samples.
	Args:
	X (pandas.DataFrame): Test samples.
	Returns:
	numpy.ndarray: Predicted class label per sample.
	"""
	if self.drop_prot_attr:
	X = X.drop(self.prot_attr, axis=1)

	return self.classes_[self.model_.predict(X)]


	def predict_proba(self, X):
	"""Probability estimates.

	The returned estimates for all classes are ordered by the label of
	classes.

	Args:
	X (pandas.DataFrame): Test samples.

	Returns:
	numpy.ndarray: Returns the probability of the sample for each class
	in the model, where classes are ordered as they are in
	``self.classes_``.
	"""
	if self.drop_prot_attr:
	X = X.drop(self.prot_attr, axis=1)

	return self.model_._pmf_predict(X)