Spaces:

erasmopurif
/

FairUP

Runtime error

App Files Files Community

FairUP / src /aif360 /algorithms /inprocessing /gerryfair /auditor.py

erasmopurif

First commit

d2a8669 about 2 years ago

raw

history blame contribute delete

11.2 kB

	# Copyright 2019 Seth V. Neel, Michael J. Kearns, Aaron L. Roth, Zhiwei Steven Wu
	#
	# Licensed under the Apache License, Version 2.0 (the "License"); you may not
	# use this file except in compliance with the License. You may obtain a copy of
	# the License at http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software distributed
	# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
	# CONDITIONS OF ANY KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations under the License.
	"""Class Auditor and Class Group implementing auditing for rich subgroup fairness in [KRNW18].

	This module contains functionality to Audit an arbitrary classifier with respect to rich subgroup fairness,
	where rich subgroup fairness is defined by hyperplanes over the sensitive attributes.

	Basic Usage:
	auditor = Auditor(data_set, 'FP')
	# returns mean(predictions \| y = 0) if 'FP' 1-mean(predictions \| y = 1) if FN
	metric_baseline = auditor.get_baseline(y, predictions)
	group = auditor.get_group(dataset_yhat.labels, metric_baseline)
	"""

	import numpy as np
	import pandas as pd
	from sklearn import linear_model
	from aif360.algorithms.inprocessing.gerryfair.reg_oracle_class import RegOracle
	from aif360.algorithms.inprocessing.gerryfair import clean


	class Group(object):
	"""Group class: created by Auditor when identifying violation."""
	def __init__(self, func, group_size, weighted_disparity, disparity,
	disparity_direction, group_rate):
	"""Constructor for Group Class.

	:param func: the linear function that defines the group
	:param group_size: the proportion of the dataset in the group
	:param weighted_disparity: group_size*FP or FN disparity
	:param disparity: FN or FP disparity (absolute value)
	:param disparity_direction: indicator whether fp in group > fp_baseline, returns {1, -1}
	:param group_rate: FN or FN rate in the group
	"""
	super(Group, self).__init__()
	self.func = func
	self.group_size = group_size
	self.weighted_disparity = weighted_disparity
	self.disparity = disparity
	self.disparity_direction = disparity_direction
	self.group_rate = group_rate

	def return_f(self):
	return [
	self.func, self.group_size, self.weighted_disparity,
	self.disparity, self.disparity_direction, self.group_rate
	]


	class Auditor:
	"""This is the Auditor class. It is used in the training algorithm to repeatedly find subgroups that break the
	fairness disparity constraint. You can also use it independently as a stand alone auditor."""
	def __init__(self, dataset, fairness_def):
	"""Auditor constructor.

	Args:
	:param dataset: dataset object subclassing StandardDataset.
	:param fairness_def: 'FP' or 'FN'
	"""
	X, X_prime, y = clean.extract_df_from_ds(dataset)
	self.X_prime = X_prime
	self.y_input = y
	self.y_inverse = np.array(
	[abs(1 - y_value) for y_value in self.y_input])
	self.fairness_def = fairness_def
	if self.fairness_def not in ['FP', 'FN']:
	raise Exception(
	'Invalid fairness metric specified: {}. Please choose \'FP\' or \'FN\'.'
	.format(self.fairness_def))
	self.y = self.y_input
	# flip the labels for FN rate auditing
	if self.fairness_def == 'FN':
	self.y = self.y_inverse
	self.X_prime_0 = pd.DataFrame(
	[self.X_prime.iloc[u, :] for u, s in enumerate(self.y) if s == 0])

	def initialize_costs(self, n):
	"""Initialize the costs for CSC problem that corresponds to auditing. See paper for details.

	Args:
	:param self: object of class Auditor
	:param n: size of the dataset

	Return:
	:return The costs for labeling a point 0, for labeling a point 1, as tuples.
	"""
	costs_0 = None
	costs_1 = None
	if self.fairness_def == 'FP':
	costs_0 = [0.0] * n
	costs_1 = [-1.0 / n * (2 * i - 1) for i in self.y_input]

	elif self.fairness_def == 'FN':
	costs_1 = [0.0] * n
	costs_0 = [1.0 / n * (2 * i - 1) for i in self.y_input]
	return tuple(costs_0), tuple(costs_1), self.X_prime_0

	def get_baseline(self, y, predictions):
	"""Return the baseline FP or FN rate of the classifier predictions.

	Args:
	:param y: true labels (binary)
	:param predictions: predictions of classifier (soft predictions)

	Returns:
	:return: The baseline FP or FN rate of the classifier predictions
	"""
	if self.fairness_def == 'FP':
	return np.mean([predictions[i] for i, c in enumerate(y) if c == 0])
	elif self.fairness_def == 'FN':
	return np.mean([(1 - predictions[i]) for i, c in enumerate(y)
	if c == 1])

	def update_costs(self, c_0, c_1, group, C, iteration, gamma):
	"""Recursively update the costs from incorrectly predicting 1 for the learner.

	Args:
	:param c_0: current costs for predicting 0
	:param c_1: current costs for predicting 1
	:param group: last group found by the auditor, object of class Group.
	:param C: see Model class for details.
	:param iteration: current iteration
	:param gamma: target disparity

	Returns:
	:return c_0, c_1: tuples of new costs for CSC problem of learner
	"""

	# make costs mutable type
	c_0 = list(c_0)
	c_1 = list(c_1)

	pos_neg = group.disparity_direction
	n = len(self.y)

	g_members = group.func.predict(self.X_prime_0)
	m = self.X_prime_0.shape[0]
	g_weight = np.sum(g_members) * (1.0 / float(m))
	for i in range(n):
	X_prime_0_index = 0
	if self.y[i] == 0:
	new_group_cost = (1.0 / n) * pos_neg * C * (
	1.0 / iteration) * (g_weight - g_members[X_prime_0_index])
	if np.abs(group.weighted_disparity) < gamma:
	new_group_cost = 0

	if self.fairness_def == 'FP':
	c_1[i] = (c_1[i] - 1.0 / n) * (
	(iteration - 1.0) /
	iteration) + new_group_cost + 1.0 / n
	elif self.fairness_def == 'FN':
	c_0[i] = (c_0[i] - 1.0 / n) * (
	(iteration - 1.0) /
	iteration) + new_group_cost + 1.0 / n

	X_prime_0_index += 1
	else:
	if self.fairness_def == 'FP':
	c_1[i] = -1.0 / n
	elif self.fairness_def == 'FN':
	c_0[i] = -1.0 / n
	return tuple(c_0), tuple(c_1)

	def get_subset(self, predictions):
	"""Returns subset of dataset with y = 0 for FP and labels, or subset with y = 0 with flipped labels
	if the fairness_def is FN.

	Args:
	:param predictions: soft predictions of the classifier
	Returns:
	:return: X_prime_0: subset of features with y = 0
	:return: labels: the labels on y = 0 if FP else 1-labels.
	"""
	if self.fairness_def == 'FP':
	return self.X_prime_0, [
	a for u, a in enumerate(predictions) if self.y[u] == 0
	]
	# handles FN rate by flipping labels
	elif self.fairness_def == 'FN':
	return self.X_prime_0, [(1 - a) for u, a in enumerate(predictions)
	if self.y[u] == 0]

	def get_group(self, predictions, metric_baseline):
	"""Given decisions on sensitive attributes, labels, and FP rate audit wrt
	to gamma unfairness. Return the group found, the gamma unfairness, fp disparity, and sign(fp disparity).

	Args:
	:param predictions: soft predictions of the classifier
	:param metric_baseline: see function get_baseline

	Returns:
	:return func: object of type RegOracle defining the group
	:return g_size_0: the size of the group divided by n
	:return fp_disp: \|group_rate-baseline\|
	:return fp_disp_w: fp_disp*group_size_0
	:return sgn(fp_disp): sgn(group_rate-baseline)
	:return fp_group_rate_neg:
	"""

	X_subset, predictions_subset = self.get_subset(predictions)

	m = len(predictions_subset)
	n = float(len(self.y))

	cost_0 = [0.0] * m
	cost_1 = -1.0 / n * (metric_baseline - predictions_subset)

	reg0 = linear_model.LinearRegression()
	reg0.fit(X_subset, cost_0)
	reg1 = linear_model.LinearRegression()
	reg1.fit(X_subset, cost_1)
	func = RegOracle(reg0, reg1)
	group_members_0 = func.predict(X_subset)

	# get the false positive rate in group
	if sum(group_members_0) == 0:
	fp_group_rate = 0
	else:
	fp_group_rate = np.mean([
	r for t, r in enumerate(predictions_subset)
	if group_members_0[t] == 1
	])
	g_size_0 = np.sum(group_members_0) * 1.0 / n
	fp_disp = np.abs(fp_group_rate - metric_baseline)
	fp_disp_w = fp_disp * g_size_0

	cost_0_neg = [0.0] * m
	cost_1_neg = -1.0 / n * (predictions_subset - metric_baseline)

	reg0_neg = linear_model.LinearRegression()
	reg0_neg.fit(X_subset, cost_0_neg)
	reg1_neg = linear_model.LinearRegression()
	reg1_neg.fit(X_subset, cost_1_neg)
	func_neg = RegOracle(reg0_neg, reg1_neg)
	group_members_0_neg = func_neg.predict(X_subset)

	if sum(group_members_0_neg) == 0:
	fp_group_rate_neg = 0
	else:
	fp_group_rate_neg = np.mean([
	r for t, r in enumerate(predictions_subset)
	if group_members_0[t] == 0
	])
	g_size_0_neg = np.sum(group_members_0_neg) * 1.0 / n
	fp_disp_neg = np.abs(fp_group_rate_neg - metric_baseline)
	fp_disp_w_neg = fp_disp_neg * g_size_0_neg

	# return group
	if (fp_disp_w_neg > fp_disp_w):
	return Group(func_neg, g_size_0_neg, fp_disp_w_neg, fp_disp_neg,
	-1, fp_group_rate)
	else:
	return Group(func, g_size_0, fp_disp_w, fp_disp, 1,
	fp_group_rate_neg)

	def audit(self, predictions):
	"""Takes in predictions on dataset (X',y) and returns:
	a membership vector which represents the group that violates the fairness metric,
	along with the gamma disparity.
	"""
	if isinstance(predictions, pd.DataFrame):
	predictions = predictions.values

	metric_baseline = self.get_baseline(self.y_input, predictions)
	group = self.get_group(predictions, metric_baseline)

	return group.func.predict(self.X_prime), group.weighted_disparity