Spaces:

erasmopurif
/

FairUP

Runtime error

App Files Files Community

FairUP / src /aif360 /algorithms /postprocessing /eq_odds_postprocessing.py

erasmopurif

First commit

d2a8669 about 2 years ago

raw

history blame contribute delete

11.8 kB

	# Original work Copyright (c) 2017 Geoff Pleiss
	#
	# Permission is hereby granted, free of charge, to any person obtaining a copy
	# of this software and associated documentation files (the "Software"), to deal
	# in the Software without restriction, including without limitation the rights
	# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
	# copies of the Software, and to permit persons to whom the Software is
	# furnished to do so, subject to the following conditions:
	#
	# The above copyright notice and this permission notice shall be included in all
	# copies or substantial portions of the Software.
	#
	# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
	# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
	# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
	# SOFTWARE.
	#
	# Modified work Copyright 2018 IBM Corporation
	#
	# Licensed under the Apache License, Version 2.0 (the "License"); you may not
	# use this file except in compliance with the License. You may obtain a copy of
	# the License at http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software distributed
	# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
	# CONDITIONS OF ANY KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations under the License.
	import numpy as np
	from scipy.optimize import linprog

	from aif360.algorithms import Transformer
	from aif360.metrics import ClassificationMetric, utils


	class EqOddsPostprocessing(Transformer):
	"""Equalized odds postprocessing is a post-processing technique that solves
	a linear program to find probabilities with which to change output labels to
	optimize equalized odds [8]_ [9]_.

	References:
	.. [8] M. Hardt, E. Price, and N. Srebro, "Equality of Opportunity in
	Supervised Learning," Conference on Neural Information Processing
	Systems, 2016.
	.. [9] G. Pleiss, M. Raghavan, F. Wu, J. Kleinberg, and
	K. Q. Weinberger, "On Fairness and Calibration," Conference on Neural
	Information Processing Systems, 2017.
	"""

	def __init__(self, unprivileged_groups, privileged_groups, seed=None):
	"""
	Args:
	unprivileged_groups (list(dict)): Representation for unprivileged
	group.
	privileged_groups (list(dict)): Representation for privileged
	group.
	seed (int, optional): Seed to make `predict` repeatable.
	"""
	super(EqOddsPostprocessing, self).__init__(
	unprivileged_groups=unprivileged_groups,
	privileged_groups=privileged_groups,
	seed=seed)

	self.seed = seed
	self.model_params = None
	self.unprivileged_groups = unprivileged_groups
	self.privileged_groups = privileged_groups

	def fit(self, dataset_true, dataset_pred):
	"""Compute parameters for equalizing odds using true and predicted
	labels.

	Args:
	true_dataset (BinaryLabelDataset): Dataset containing true labels.
	pred_dataset (BinaryLabelDataset): Dataset containing predicted
	labels.

	Returns:
	EqOddsPostprocessing: Returns self.
	"""
	metric = ClassificationMetric(dataset_true, dataset_pred,
	unprivileged_groups=self.unprivileged_groups,
	privileged_groups=self.privileged_groups)

	# compute basic statistics
	sbr = metric.base_rate(privileged=True)
	obr = metric.base_rate(privileged=False)

	fpr0 = metric.false_positive_rate(privileged=True)
	fpr1 = metric.false_positive_rate(privileged=False)
	fnr0 = metric.false_negative_rate(privileged=True)
	fnr1 = metric.false_negative_rate(privileged=False)
	tpr0 = metric.true_positive_rate(privileged=True)
	tpr1 = metric.true_positive_rate(privileged=False)
	tnr0 = metric.true_negative_rate(privileged=True)
	tnr1 = metric.true_negative_rate(privileged=False)

	# linear program has 4 decision variables:
	# [Pr[label_tilde = 1 \| label_hat = 1, protected_attributes = 0];
	# Pr[label_tilde = 1 \| label_hat = 0, protected_attributes = 0];
	# Pr[label_tilde = 1 \| label_hat = 1, protected_attributes = 1];
	# Pr[label_tilde = 1 \| label_hat = 0, protected_attributes = 1]]
	# Coefficients of the linear objective function to be minimized.
	c = np.array([fpr0 - tpr0, tnr0 - fnr0, fpr1 - tpr1, tnr1 - fnr1])

	# A_ub - 2-D array which, when matrix-multiplied by x, gives the values
	# of the upper-bound inequality constraints at x
	# b_ub - 1-D array of values representing the upper-bound of each
	# inequality constraint (row) in A_ub.
	# Just to keep these between zero and one
	A_ub = np.array([[ 1, 0, 0, 0],
	[-1, 0, 0, 0],
	[ 0, 1, 0, 0],
	[ 0, -1, 0, 0],
	[ 0, 0, 1, 0],
	[ 0, 0, -1, 0],
	[ 0, 0, 0, 1],
	[ 0, 0, 0, -1]], dtype=np.float64)
	b_ub = np.array([1, 0, 1, 0, 1, 0, 1, 0], dtype=np.float64)

	# Create boolean conditioning vectors for protected groups
	cond_vec_priv = utils.compute_boolean_conditioning_vector(
	dataset_pred.protected_attributes,
	dataset_pred.protected_attribute_names,
	self.privileged_groups)
	cond_vec_unpriv = utils.compute_boolean_conditioning_vector(
	dataset_pred.protected_attributes,
	dataset_pred.protected_attribute_names,
	self.unprivileged_groups)

	sconst = np.ravel(
	dataset_pred.labels[cond_vec_priv] == dataset_pred.favorable_label)
	sflip = np.ravel(
	dataset_pred.labels[cond_vec_priv] == dataset_pred.unfavorable_label)
	oconst = np.ravel(
	dataset_pred.labels[cond_vec_unpriv] == dataset_pred.favorable_label)
	oflip = np.ravel(
	dataset_pred.labels[cond_vec_unpriv] == dataset_pred.unfavorable_label)

	y_true = dataset_true.labels.ravel()

	sm_tn = np.logical_and(sflip,
	y_true[cond_vec_priv] == dataset_true.unfavorable_label,
	dtype=np.float64)
	sm_fn = np.logical_and(sflip,
	y_true[cond_vec_priv] == dataset_true.favorable_label,
	dtype=np.float64)
	sm_fp = np.logical_and(sconst,
	y_true[cond_vec_priv] == dataset_true.unfavorable_label,
	dtype=np.float64)
	sm_tp = np.logical_and(sconst,
	y_true[cond_vec_priv] == dataset_true.favorable_label,
	dtype=np.float64)

	om_tn = np.logical_and(oflip,
	y_true[cond_vec_unpriv] == dataset_true.unfavorable_label,
	dtype=np.float64)
	om_fn = np.logical_and(oflip,
	y_true[cond_vec_unpriv] == dataset_true.favorable_label,
	dtype=np.float64)
	om_fp = np.logical_and(oconst,
	y_true[cond_vec_unpriv] == dataset_true.unfavorable_label,
	dtype=np.float64)
	om_tp = np.logical_and(oconst,
	y_true[cond_vec_unpriv] == dataset_true.favorable_label,
	dtype=np.float64)

	# A_eq - 2-D array which, when matrix-multiplied by x,
	# gives the values of the equality constraints at x
	# b_eq - 1-D array of values representing the RHS of each equality
	# constraint (row) in A_eq.
	# Used to impose equality of odds constraint
	A_eq = [[(np.mean(sconstsm_tp) - np.mean(sflipsm_tp)) / sbr,
	(np.mean(sflipsm_fn) - np.mean(sconstsm_fn)) / sbr,
	(np.mean(oflipom_tp) - np.mean(oconstom_tp)) / obr,
	(np.mean(oconstom_fn) - np.mean(oflipom_fn)) / obr],
	[(np.mean(sconstsm_fp) - np.mean(sflipsm_fp)) / (1-sbr),
	(np.mean(sflipsm_tn) - np.mean(sconstsm_tn)) / (1-sbr),
	(np.mean(oflipom_fp) - np.mean(oconstom_fp)) / (1-obr),
	(np.mean(oconstom_tn) - np.mean(oflipom_tn)) / (1-obr)]]

	b_eq = [(np.mean(oflipom_tp) + np.mean(oconstom_fn)) / obr
	- (np.mean(sflipsm_tp) + np.mean(sconstsm_fn)) / sbr,
	(np.mean(oflipom_fp) + np.mean(oconstom_tn)) / (1-obr)
	- (np.mean(sflipsm_fp) + np.mean(sconstsm_tn)) / (1-sbr)]

	# Linear program
	self.model_params = linprog(c, A_ub=A_ub, b_ub=b_ub, A_eq=A_eq, b_eq=b_eq)

	return self

	def predict(self, dataset):
	"""Perturb the predicted labels to obtain new labels that satisfy
	equalized odds constraints.

	Args:
	dataset (BinaryLabelDataset): Dataset containing labels that needs
	to be transformed.
	dataset (BinaryLabelDataset): Transformed dataset.
	"""
	if self.seed is not None:
	np.random.seed(self.seed)

	# Get the model parameters output from fit
	sp2p, sn2p, op2p, on2p = self.model_params.x

	# Create boolean conditioning vectors for protected groups
	cond_vec_priv = utils.compute_boolean_conditioning_vector(
	dataset.protected_attributes, dataset.protected_attribute_names,
	self.privileged_groups)
	cond_vec_unpriv = utils.compute_boolean_conditioning_vector(
	dataset.protected_attributes, dataset.protected_attribute_names,
	self.unprivileged_groups)

	# Randomly flip labels according to the probabilities in model_params
	self_fair_pred = dataset.labels[cond_vec_priv].copy()
	self_pp_indices, _ = np.nonzero(
	dataset.labels[cond_vec_priv] == dataset.favorable_label)
	self_pn_indices, _ = np.nonzero(
	dataset.labels[cond_vec_priv] == dataset.unfavorable_label)
	np.random.shuffle(self_pp_indices)
	np.random.shuffle(self_pn_indices)

	n2p_indices = self_pn_indices[:int(len(self_pn_indices) * sn2p)]
	self_fair_pred[n2p_indices] = dataset.favorable_label
	p2n_indices = self_pp_indices[:int(len(self_pp_indices) * (1 - sp2p))]
	self_fair_pred[p2n_indices] = dataset.unfavorable_label

	othr_fair_pred = dataset.labels[cond_vec_unpriv].copy()
	othr_pp_indices, _ = np.nonzero(
	dataset.labels[cond_vec_unpriv] == dataset.favorable_label)
	othr_pn_indices, _ = np.nonzero(
	dataset.labels[cond_vec_unpriv] == dataset.unfavorable_label)
	np.random.shuffle(othr_pp_indices)
	np.random.shuffle(othr_pn_indices)

	n2p_indices = othr_pn_indices[:int(len(othr_pn_indices) * on2p)]
	othr_fair_pred[n2p_indices] = dataset.favorable_label
	p2n_indices = othr_pp_indices[:int(len(othr_pp_indices) * (1 - op2p))]
	othr_fair_pred[p2n_indices] = dataset.unfavorable_label

	# Mutated, fairer dataset with new labels
	dataset_new = dataset.copy()

	new_labels = np.zeros_like(dataset.labels, dtype=np.float64)
	new_labels[cond_vec_priv] = self_fair_pred
	new_labels[cond_vec_unpriv] = othr_fair_pred

	dataset_new.labels = new_labels

	return dataset_new

	def fit_predict(self, dataset_true, dataset_pred):
	"""fit and predict methods sequentially."""
	return self.fit(dataset_true, dataset_pred).predict(dataset_pred)