Spaces:
Runtime error
Runtime error
File size: 6,597 Bytes
d2a8669 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 |
"""
The code for GridSearchReduction wraps the source class
fairlearn.reductions.GridSearch
available in the https://github.com/fairlearn/fairlearn library
licensed under the MIT Licencse, Copyright Microsoft Corporation
"""
from logging import warning
try:
import fairlearn.reductions as red
except ImportError as error:
warning("{}: GridSearchReduction will be unavailable. To install, run:\n"
"pip install 'aif360[Reductions]'".format(error))
import pandas as pd
from aif360.algorithms import Transformer
from aif360.sklearn.inprocessing import GridSearchReduction as skGridSearchRed
class GridSearchReduction(Transformer):
"""Grid search reduction for fair classification or regression.
Grid search is an in-processing technique that can be used for fair
classification or fair regression. For classification it reduces fair
classification to a sequence of cost-sensitive classification problems,
returning the deterministic classifier with the lowest empirical error
subject to fair classification constraints [#agarwal18]_ among the
candidates searched. For regression it uses the same priniciple to return a
deterministic regressor with the lowest empirical error subject to the
constraint of bounded group loss [#agarwal19]_.
References:
.. [#agarwal18] `A. Agarwal, A. Beygelzimer, M. Dudik, J. Langford, and
H. Wallach, "A Reductions Approach to Fair Classification,"
International Conference on Machine Learning, 2018.
<https://arxiv.org/abs/1803.02453>`_
.. [#agarwal19] `A. Agarwal, M. Dudik, and Z. Wu, "Fair Regression:
Quantitative Definitions and Reduction-based Algorithms,"
International Conference on Machine Learning, 2019.
<https://arxiv.org/abs/1905.12843>`_
"""
def __init__(self,
estimator,
constraints,
prot_attr=None,
constraint_weight=0.5,
grid_size=10,
grid_limit=2.0,
grid=None,
drop_prot_attr=True,
loss="ZeroOne",
min_val=None,
max_val=None):
"""
Args:
estimator: An estimator implementing methods ``fit(X, y,
sample_weight)`` and ``predict(X)``, where ``X`` is the matrix
of features, ``y`` is the vector of labels, and
``sample_weight`` is a vector of weights; labels ``y`` and
predictions returned by ``predict(X)`` are either 0 or 1 -- e.g.
scikit-learn classifiers/regressors.
constraints (str or fairlearn.reductions.Moment): If string, keyword
denoting the :class:`fairlearn.reductions.Moment` object
defining the disparity constraints -- e.g., "DemographicParity"
or "EqualizedOdds". For a full list of possible options see
`self.model.moments`. Otherwise, provide the desired
:class:`~fairlearn.reductions.Moment` object defining the
disparity constraints.
prot_attr: String or array-like column indices or column names
of protected attributes.
constraint_weight: When the ``selection_rule`` is
"tradeoff_optimization" (default, no other option currently)
this float specifies the relative weight put on the constraint
violation when selecting the best model. The weight placed on
the error rate will be ``1-constraint_weight``.
grid_size (int): The number of Lagrange multipliers to generate in
the grid.
grid_limit (float): The largest Lagrange multiplier to generate. The
grid will contain values distributed between ``-grid_limit`` and
``grid_limit`` by default.
grid (pandas.DataFrame): Instead of supplying a size and limit for
the grid, users may specify the exact set of Lagrange
multipliers they desire using this argument in a DataFrame.
drop_prot_attr (bool): Flag indicating whether to drop protected
attributes from training data.
loss (str): String identifying loss function for constraints.
Options include "ZeroOne", "Square", and "Absolute."
min_val: Loss function parameter for "Square" and "Absolute,"
typically the minimum of the range of y values.
max_val: Loss function parameter for "Square" and "Absolute,"
typically the maximum of the range of y values.
"""
super(GridSearchReduction, self).__init__()
#init model, set prot_attr during fit
if prot_attr is None:
prot_attr = []
self.model = skGridSearchRed(prot_attr, estimator, constraints,
constraint_weight, grid_size, grid_limit, grid, drop_prot_attr,
loss, min_val, max_val)
def fit(self, dataset):
"""Learns model with less bias
Args:
dataset : Dataset containing true output.
Returns:
GridSearchReduction: Returns self.
"""
#set prot_attr
if len(self.model.prot_attr) == 0:
self.model.prot_attr = dataset.protected_attribute_names
X_df = pd.DataFrame(dataset.features, columns=dataset.feature_names)
Y = dataset.labels
self.model.fit(X_df, Y)
return self
def predict(self, dataset):
"""Obtain the predictions for the provided dataset using the model
learned.
Args:
dataset: Dataset containing output values that need to be
transformed.
Returns:
dataset: Transformed dataset.
"""
X_df = pd.DataFrame(dataset.features, columns=dataset.feature_names)
dataset_new = dataset.copy()
dataset_new.labels = self.model.predict(X_df).reshape(-1, 1)
if isinstance(self.model.moment, red.ClassificationMoment):
fav = int(dataset.favorable_label)
try:
# Probability of favorable label
scores = self.model.predict_proba(X_df)[:, fav]
dataset_new.scores = scores.reshape(-1, 1)
except (AttributeError, NotImplementedError):
warning("dataset.scores not updated, underlying model does not "
"support predict_proba")
return dataset_new
|