import os.path as osp import pandas as pd import numpy as np try: from rpy2 import robjects from rpy2.robjects.vectors import StrVector from rpy2.robjects.packages import importr from rpy2.robjects import pandas2ri from rpy2.robjects.conversion import localconverter except ImportError as error: from logging import warning warning("{}: FairAdapt will be unavailable. To install, run:\n" "pip install 'aif360[FairAdapt]'".format(error)) from sklearn.base import BaseEstimator from aif360.sklearn.utils import check_inputs, check_groups class FairAdapt(BaseEstimator): """Fair Data Adaptation. Fairadapt is a pre-processing technique that can be used for both fair classification and fair regression [#plecko20]_. The method is a causal inference approach to bias removal and it relies on the causal graph for the dataset. The original implementation is in R [#plecko21]_. References: .. [#plecko20] `D. Plečko and N. Meinshausen, "Fair Data Adaptation with Quantile Preservation," Journal of Machine Learning Research, 2020. `_ .. [#plecko21] `D. Plečko and N. Bennett and N. Meinshausen, "FairAdapt: Causal Reasoning for Fair Data Pre-processing," arXiv, 2021. `_ Attributes: prot_attr_ (str or list(str)): Protected attribute(s) used for fair data adaptation. groups_ (array, shape (n_groups,)): A list of group labels known to the transformer. """ def __init__(self, prot_attr, adj_mat): """ Args: prot_attr (single label): Name of the protected attribute. Must be binary. adj_mat (array-like): A 2-dimensional array representing the adjacency matrix of the causal diagram of the data generating process. Row/column order must match `X_train`. """ self.prot_attr = prot_attr self.adj_mat = adj_mat # R packages need to run FairAdapt pkgs = ('ranger', 'fairadapt') # selectively install the missing packages pkgs = [p for p in pkgs if not robjects.packages.isinstalled(p)] if len(pkgs) > 0: utls = robjects.packages.importr('utils') utls.chooseCRANmirror(ind=1) utls.install_packages(StrVector(pkgs)) def fit_transform(self, X_train, y_train, X_test): """Remove bias from the given dataset by fair adaptation. Args: X_train (pandas.DataFrame): Training data frame (including the protected attribute). y_train (pandas.Series): Training labels. X_test (pandas.DataFrame): Test data frame (including the protected attribute). Returns: tuple: Transformed inputs. * **X_fair_train** (pandas.DataFrame) -- Transformed training data. * **y_fair_train** (array-like) -- Transformed training labels. * **X_fair_test** (pandas.DataFrame) -- Transformed test data. """ # merge X_train and y_train df_train = pd.concat([X_train, y_train], axis=1) groups, self.prot_attr_ = check_groups(X_train, self.prot_attr, ensure_binary=True) self.groups_ = np.unique(groups) wrapper = osp.join(osp.dirname(osp.abspath(__file__)), 'fairadapt.R') robjects.r.source(wrapper) FairAdapt_R = robjects.r['wrapper'] # convert to Pandas with a local converter with localconverter(robjects.default_converter + pandas2ri.converter): train_data = robjects.conversion.py2rpy(df_train) test_data = robjects.conversion.py2rpy(X_test) adj_mat = robjects.conversion.py2rpy(self.adj_mat) # run FairAdapt in R res = FairAdapt_R( train_data=train_data, test_data=test_data, adj_mat=adj_mat, prot_attr=self.prot_attr_, outcome=y_train.name ) with localconverter(robjects.default_converter + pandas2ri.converter): X_fair_train = robjects.conversion.rpy2py(res.rx2('train')) X_fair_test = robjects.conversion.rpy2py(res.rx2('test')) X_fair_train.columns = [y_train.name] + X_train.columns.tolist() y_fair_train = X_fair_train.pop(y_train.name) X_fair_test.columns = X_test.columns return X_fair_train, y_fair_train, X_fair_test