erasmopurif's picture
First commit
d2a8669
import os.path as osp
import pandas as pd
import numpy as np
try:
from rpy2 import robjects
from rpy2.robjects.vectors import StrVector
from rpy2.robjects.packages import importr
from rpy2.robjects import pandas2ri
from rpy2.robjects.conversion import localconverter
except ImportError as error:
from logging import warning
warning("{}: FairAdapt will be unavailable. To install, run:\n"
"pip install 'aif360[FairAdapt]'".format(error))
from sklearn.base import BaseEstimator
from aif360.sklearn.utils import check_inputs, check_groups
class FairAdapt(BaseEstimator):
"""Fair Data Adaptation.
Fairadapt is a pre-processing technique that can be used for both fair
classification and fair regression [#plecko20]_. The method is a causal
inference approach to bias removal and it relies on the causal graph for
the dataset. The original implementation is in R [#plecko21]_.
References:
.. [#plecko20] `D. Plečko and N. Meinshausen,
"Fair Data Adaptation with Quantile Preservation,"
Journal of Machine Learning Research, 2020.
<https://www.jmlr.org/papers/volume21/19-966/19-966.pdf>`_
.. [#plecko21] `D. Plečko and N. Bennett and N. Meinshausen,
"FairAdapt: Causal Reasoning for Fair Data Pre-processing,"
arXiv, 2021. <https://arxiv.org/abs/2110.10200>`_
Attributes:
prot_attr_ (str or list(str)): Protected attribute(s) used for fair data
adaptation.
groups_ (array, shape (n_groups,)): A list of group labels known to the
transformer.
"""
def __init__(self, prot_attr, adj_mat):
"""
Args:
prot_attr (single label): Name of the protected attribute. Must be
binary.
adj_mat (array-like): A 2-dimensional array representing the
adjacency matrix of the causal diagram of the data generating
process. Row/column order must match `X_train`.
"""
self.prot_attr = prot_attr
self.adj_mat = adj_mat
# R packages need to run FairAdapt
pkgs = ('ranger', 'fairadapt')
# selectively install the missing packages
pkgs = [p for p in pkgs if not robjects.packages.isinstalled(p)]
if len(pkgs) > 0:
utls = robjects.packages.importr('utils')
utls.chooseCRANmirror(ind=1)
utls.install_packages(StrVector(pkgs))
def fit_transform(self, X_train, y_train, X_test):
"""Remove bias from the given dataset by fair adaptation.
Args:
X_train (pandas.DataFrame): Training data frame (including the
protected attribute).
y_train (pandas.Series): Training labels.
X_test (pandas.DataFrame): Test data frame (including the protected
attribute).
Returns:
tuple:
Transformed inputs.
* **X_fair_train** (pandas.DataFrame) -- Transformed training
data.
* **y_fair_train** (array-like) -- Transformed training labels.
* **X_fair_test** (pandas.DataFrame) -- Transformed test data.
"""
# merge X_train and y_train
df_train = pd.concat([X_train, y_train], axis=1)
groups, self.prot_attr_ = check_groups(X_train, self.prot_attr, ensure_binary=True)
self.groups_ = np.unique(groups)
wrapper = osp.join(osp.dirname(osp.abspath(__file__)), 'fairadapt.R')
robjects.r.source(wrapper)
FairAdapt_R = robjects.r['wrapper']
# convert to Pandas with a local converter
with localconverter(robjects.default_converter + pandas2ri.converter):
train_data = robjects.conversion.py2rpy(df_train)
test_data = robjects.conversion.py2rpy(X_test)
adj_mat = robjects.conversion.py2rpy(self.adj_mat)
# run FairAdapt in R
res = FairAdapt_R(
train_data=train_data,
test_data=test_data,
adj_mat=adj_mat,
prot_attr=self.prot_attr_,
outcome=y_train.name
)
with localconverter(robjects.default_converter + pandas2ri.converter):
X_fair_train = robjects.conversion.rpy2py(res.rx2('train'))
X_fair_test = robjects.conversion.rpy2py(res.rx2('test'))
X_fair_train.columns = [y_train.name] + X_train.columns.tolist()
y_fair_train = X_fair_train.pop(y_train.name)
X_fair_test.columns = X_test.columns
return X_fair_train, y_fair_train, X_fair_test