Spaces:
Runtime error
Runtime error
# Original work Copyright (c) 2017 Geoff Pleiss | |
# | |
# Permission is hereby granted, free of charge, to any person obtaining a copy | |
# of this software and associated documentation files (the "Software"), to deal | |
# in the Software without restriction, including without limitation the rights | |
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
# copies of the Software, and to permit persons to whom the Software is | |
# furnished to do so, subject to the following conditions: | |
# | |
# The above copyright notice and this permission notice shall be included in all | |
# copies or substantial portions of the Software. | |
# | |
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |
# SOFTWARE. | |
# | |
# Modified work Copyright 2018 IBM Corporation | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); you may not | |
# use this file except in compliance with the License. You may obtain a copy of | |
# the License at http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software distributed | |
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR | |
# CONDITIONS OF ANY KIND, either express or implied. See the License for the | |
# specific language governing permissions and limitations under the License. | |
import numpy as np | |
from aif360.algorithms import Transformer | |
from aif360.metrics import ClassificationMetric, utils | |
class CalibratedEqOddsPostprocessing(Transformer): | |
"""Calibrated equalized odds postprocessing is a post-processing technique | |
that optimizes over calibrated classifier score outputs to find | |
probabilities with which to change output labels with an equalized odds | |
objective [7]_. | |
References: | |
.. [7] G. Pleiss, M. Raghavan, F. Wu, J. Kleinberg, and | |
K. Q. Weinberger, "On Fairness and Calibration," Conference on Neural | |
Information Processing Systems, 2017 | |
Adapted from: | |
https://github.com/gpleiss/equalized_odds_and_calibration/blob/master/calib_eq_odds.py | |
""" | |
def __init__(self, unprivileged_groups, privileged_groups, | |
cost_constraint='weighted', seed=None): | |
""" | |
Args: | |
unprivileged_groups (dict or list(dict)): Representation for | |
unprivileged group. | |
privileged_groups (dict or list(dict)): Representation for | |
privileged group. | |
cost_contraint: fpr, fnr or weighted | |
seed (int, optional): Seed to make `predict` repeatable. | |
""" | |
super(CalibratedEqOddsPostprocessing, self).__init__( | |
unprivileged_groups=unprivileged_groups, | |
privileged_groups=privileged_groups, | |
seed=seed) | |
self.seed = seed | |
self.model_params = None | |
self.unprivileged_groups = [unprivileged_groups] \ | |
if isinstance(unprivileged_groups, dict) else unprivileged_groups | |
self.privileged_groups = [privileged_groups] \ | |
if isinstance(privileged_groups, dict) else privileged_groups | |
self.cost_constraint = cost_constraint | |
if self.cost_constraint == 'fnr': | |
self.fn_rate = 1 | |
self.fp_rate = 0 | |
elif self.cost_constraint == 'fpr': | |
self.fn_rate = 0 | |
self.fp_rate = 1 | |
elif self.cost_constraint == 'weighted': | |
self.fn_rate = 1 | |
self.fp_rate = 1 | |
self.base_rate_priv = 0.0 | |
self.base_rate_unpriv = 0.0 | |
def fit(self, dataset_true, dataset_pred): | |
"""Compute parameters for equalizing generalized odds using true and | |
predicted scores, while preserving calibration. | |
Args: | |
dataset_true (BinaryLabelDataset): Dataset containing true `labels`. | |
dataset_pred (BinaryLabelDataset): Dataset containing predicted | |
`scores`. | |
Returns: | |
CalibratedEqOddsPostprocessing: Returns self. | |
""" | |
# Create boolean conditioning vectors for protected groups | |
cond_vec_priv = utils.compute_boolean_conditioning_vector( | |
dataset_pred.protected_attributes, | |
dataset_pred.protected_attribute_names, | |
self.privileged_groups) | |
cond_vec_unpriv = utils.compute_boolean_conditioning_vector( | |
dataset_pred.protected_attributes, | |
dataset_pred.protected_attribute_names, | |
self.unprivileged_groups) | |
cm = ClassificationMetric(dataset_true, dataset_pred, | |
unprivileged_groups=self.unprivileged_groups, | |
privileged_groups=self.privileged_groups) | |
self.base_rate_priv = cm.base_rate(privileged=True) | |
self.base_rate_unpriv = cm.base_rate(privileged=False) | |
# Create a dataset with "trivial" predictions | |
dataset_trivial = dataset_pred.copy(deepcopy=True) | |
dataset_trivial.scores[cond_vec_priv] = cm.base_rate(privileged=True) | |
dataset_trivial.scores[cond_vec_unpriv] = cm.base_rate(privileged=False) | |
cm_triv = ClassificationMetric(dataset_true, dataset_trivial, | |
unprivileged_groups=self.unprivileged_groups, | |
privileged_groups=self.privileged_groups) | |
if self.fn_rate == 0: | |
priv_cost = cm.generalized_false_positive_rate(privileged=True) | |
unpriv_cost = cm.generalized_false_positive_rate(privileged=False) | |
priv_trivial_cost = cm_triv.generalized_false_positive_rate(privileged=True) | |
unpriv_trivial_cost = cm_triv.generalized_false_positive_rate(privileged=False) | |
elif self.fp_rate == 0: | |
priv_cost = cm.generalized_false_negative_rate(privileged=True) | |
unpriv_cost = cm.generalized_false_negative_rate(privileged=False) | |
priv_trivial_cost = cm_triv.generalized_false_negative_rate(privileged=True) | |
unpriv_trivial_cost = cm_triv.generalized_false_negative_rate(privileged=False) | |
else: | |
priv_cost = weighted_cost(self.fp_rate, self.fn_rate, cm, privileged=True) | |
unpriv_cost = weighted_cost(self.fp_rate, self.fn_rate, cm, privileged=False) | |
priv_trivial_cost = weighted_cost(self.fp_rate, self.fn_rate, cm_triv, privileged=True) | |
unpriv_trivial_cost = weighted_cost(self.fp_rate, self.fn_rate, cm_triv, privileged=False) | |
unpriv_costs_more = unpriv_cost > priv_cost | |
self.priv_mix_rate = (unpriv_cost - priv_cost) / (priv_trivial_cost - priv_cost) if unpriv_costs_more else 0 | |
self.unpriv_mix_rate = 0 if unpriv_costs_more else (priv_cost - unpriv_cost) / (unpriv_trivial_cost - unpriv_cost) | |
return self | |
def predict(self, dataset, threshold=0.5): | |
"""Perturb the predicted scores to obtain new labels that satisfy | |
equalized odds constraints, while preserving calibration. | |
Args: | |
dataset (BinaryLabelDataset): Dataset containing `scores` that needs | |
to be transformed. | |
threshold (float): Threshold for converting `scores` to `labels`. | |
Values greater than or equal to this threshold are predicted to | |
be the `favorable_label`. Default is 0.5. | |
Returns: | |
dataset (BinaryLabelDataset): transformed dataset. | |
""" | |
if self.seed is not None: | |
np.random.seed(self.seed) | |
cond_vec_priv = utils.compute_boolean_conditioning_vector( | |
dataset.protected_attributes, | |
dataset.protected_attribute_names, | |
self.privileged_groups) | |
cond_vec_unpriv = utils.compute_boolean_conditioning_vector( | |
dataset.protected_attributes, | |
dataset.protected_attribute_names, | |
self.unprivileged_groups) | |
unpriv_indices = (np.random.random(sum(cond_vec_unpriv)) | |
<= self.unpriv_mix_rate) | |
unpriv_new_pred = dataset.scores[cond_vec_unpriv].copy() | |
unpriv_new_pred[unpriv_indices] = self.base_rate_unpriv | |
priv_indices = (np.random.random(sum(cond_vec_priv)) | |
<= self.priv_mix_rate) | |
priv_new_pred = dataset.scores[cond_vec_priv].copy() | |
priv_new_pred[priv_indices] = self.base_rate_priv | |
dataset_new = dataset.copy(deepcopy=True) | |
dataset_new.scores = np.zeros_like(dataset.scores, dtype=np.float64) | |
dataset_new.scores[cond_vec_priv] = priv_new_pred | |
dataset_new.scores[cond_vec_unpriv] = unpriv_new_pred | |
# Create labels from scores using a default threshold | |
dataset_new.labels = np.where(dataset_new.scores >= threshold, | |
dataset_new.favorable_label, | |
dataset_new.unfavorable_label) | |
return dataset_new | |
def fit_predict(self, dataset_true, dataset_pred, threshold=0.5): | |
"""fit and predict methods sequentially.""" | |
return self.fit(dataset_true, dataset_pred).predict( | |
dataset_pred, threshold=threshold) | |
######### SUPPORTING FUNCTIONS ########## | |
def weighted_cost(fp_rate, fn_rate, cm, privileged): | |
norm_const = float(fp_rate + fn_rate) if\ | |
(fp_rate != 0 and fn_rate != 0) else 1 | |
return ((fp_rate / norm_const | |
* cm.generalized_false_positive_rate(privileged=privileged) | |
* (1 - cm.base_rate(privileged=privileged))) + | |
(fn_rate / norm_const | |
* cm.generalized_false_negative_rate(privileged=privileged) | |
* cm.base_rate(privileged=privileged))) | |