Spaces:
Runtime error
Runtime error
File size: 7,969 Bytes
d2a8669 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 |
import numpy as np
import scipy.optimize as optim
from aif360.algorithms import Transformer
from aif360.algorithms.preprocessing.lfr_helpers import helpers as lfr_helpers
class LFR(Transformer):
"""Learning fair representations is a pre-processing technique that finds a
latent representation which encodes the data well but obfuscates information
about protected attributes [2]_.
References:
.. [2] R. Zemel, Y. Wu, K. Swersky, T. Pitassi, and C. Dwork, "Learning
Fair Representations." International Conference on Machine Learning,
2013.
Based on code from https://github.com/zjelveh/learning-fair-representations
"""
def __init__(self,
unprivileged_groups,
privileged_groups,
k=5,
Ax=0.01,
Ay=1.0,
Az=50.0,
print_interval=250,
verbose=0,
seed=None):
"""
Args:
unprivileged_groups (tuple): Representation for unprivileged group.
privileged_groups (tuple): Representation for privileged group.
k (int, optional): Number of prototypes.
Ax (float, optional): Input recontruction quality term weight.
Az (float, optional): Fairness constraint term weight.
Ay (float, optional): Output prediction error.
print_interval (int, optional): Print optimization objective value
every print_interval iterations.
verbose (int, optional): If zero, then no output.
seed (int, optional): Seed to make `predict` repeatable.
"""
super(LFR, self).__init__(
unprivileged_groups=unprivileged_groups,
privileged_groups=privileged_groups)
self.seed = seed
self.unprivileged_groups = unprivileged_groups
self.privileged_groups = privileged_groups
if len(self.unprivileged_groups) > 1 or len(self.privileged_groups) > 1:
raise ValueError("Only one unprivileged_group or privileged_group supported.")
self.protected_attribute_name = list(self.unprivileged_groups[0].keys())[0]
self.unprivileged_group_protected_attribute_value = self.unprivileged_groups[0][self.protected_attribute_name]
self.privileged_group_protected_attribute_value = self.privileged_groups[0][self.protected_attribute_name]
self.k = k
self.Ax = Ax
self.Ay = Ay
self.Az = Az
self.print_interval = print_interval
self.verbose = verbose
self.w = None
self.prototypes = None
self.learned_model = None
def fit(self, dataset, maxiter=5000, maxfun=5000):
"""Compute the transformation parameters that leads to fair representations.
Args:
dataset (BinaryLabelDataset): Dataset containing true labels.
maxiter (int): Maximum number of iterations.
maxfun (int): Maxinum number of function evaluations.
Returns:
LFR: Returns self.
"""
if self.seed is not None:
np.random.seed(self.seed)
num_train_samples, self.features_dim = np.shape(dataset.features)
protected_attributes = np.reshape(
dataset.protected_attributes[:, dataset.protected_attribute_names.index(self.protected_attribute_name)],
[-1, 1])
unprivileged_sample_ids = np.array(np.where(protected_attributes == self.unprivileged_group_protected_attribute_value))[0].flatten()
privileged_sample_ids = np.array(np.where(protected_attributes == self.privileged_group_protected_attribute_value))[0].flatten()
features_unprivileged = dataset.features[unprivileged_sample_ids]
features_privileged = dataset.features[privileged_sample_ids]
labels_unprivileged = dataset.labels[unprivileged_sample_ids]
labels_privileged = dataset.labels[privileged_sample_ids]
# Initialize the LFR optim objective parameters
parameters_initialization = np.random.uniform(size=self.k + self.features_dim * self.k)
bnd = [(0, 1)]*self.k + [(None, None)]*self.features_dim*self.k
lfr_helpers.LFR_optim_objective.steps = 0
self.learned_model = optim.fmin_l_bfgs_b(lfr_helpers.LFR_optim_objective, x0=parameters_initialization, epsilon=1e-5,
args=(features_unprivileged, features_privileged,
labels_unprivileged[:, 0], labels_privileged[:, 0], self.k, self.Ax,
self.Ay, self.Az, self.print_interval, self.verbose),
bounds=bnd, approx_grad=True, maxfun=maxfun,
maxiter=maxiter, disp=self.verbose)[0]
self.w = self.learned_model[:self.k]
self.prototypes = self.learned_model[self.k:].reshape((self.k, self.features_dim))
return self
def transform(self, dataset, threshold=0.5):
"""Transform the dataset using learned model parameters.
Args:
dataset (BinaryLabelDataset): Dataset containing labels that needs to be transformed.
threshold(float, optional): threshold parameter used for binary label prediction.
Returns:
dataset (BinaryLabelDataset): Transformed Dataset.
"""
if self.seed is not None:
np.random.seed(self.seed)
protected_attributes = np.reshape(
dataset.protected_attributes[:, dataset.protected_attribute_names.index(self.protected_attribute_name)],
[-1, 1])
unprivileged_sample_ids = \
np.array(np.where(protected_attributes == self.unprivileged_group_protected_attribute_value))[0].flatten()
privileged_sample_ids = \
np.array(np.where(protected_attributes == self.privileged_group_protected_attribute_value))[0].flatten()
features_unprivileged = dataset.features[unprivileged_sample_ids]
features_privileged = dataset.features[privileged_sample_ids]
_, features_hat_unprivileged, labels_hat_unprivileged = lfr_helpers.get_xhat_y_hat(self.prototypes, self.w, features_unprivileged)
_, features_hat_privileged, labels_hat_privileged = lfr_helpers.get_xhat_y_hat(self.prototypes, self.w, features_privileged)
transformed_features = np.zeros(shape=np.shape(dataset.features))
transformed_labels = np.zeros(shape=np.shape(dataset.labels))
transformed_features[unprivileged_sample_ids] = features_hat_unprivileged
transformed_features[privileged_sample_ids] = features_hat_privileged
transformed_labels[unprivileged_sample_ids] = np.reshape(labels_hat_unprivileged, [-1, 1])
transformed_labels[privileged_sample_ids] = np.reshape(labels_hat_privileged,[-1, 1])
transformed_bin_labels = (np.array(transformed_labels) > threshold).astype(np.float64)
# Mutated, fairer dataset with new labels
dataset_new = dataset.copy(deepcopy=True)
dataset_new.features = transformed_features
dataset_new.labels = transformed_bin_labels
dataset_new.scores = np.array(transformed_labels)
return dataset_new
def fit_transform(self, dataset, maxiter=5000, maxfun=5000, threshold=0.5):
"""Fit and transform methods sequentially.
Args:
dataset (BinaryLabelDataset): Dataset containing labels that needs to be transformed.
maxiter (int): Maximum number of iterations.
maxfun (int): Maxinum number of function evaluations.
threshold(float, optional): threshold parameter used for binary label prediction.
Returns:
dataset (BinaryLabelDataset): Transformed Dataset.
"""
return self.fit(dataset, maxiter=maxiter, maxfun=maxfun).transform(dataset, threshold=threshold)
|