Spaces:

erasmopurif
/

FairUP

Runtime error

App Files Files Community

FairUP / src /aif360 /sklearn /preprocessing /fairadapt.py

erasmopurif

First commit

d2a8669 about 2 years ago

raw

history blame contribute delete

4.62 kB

	import os.path as osp

	import pandas as pd
	import numpy as np
	try:
	from rpy2 import robjects
	from rpy2.robjects.vectors import StrVector
	from rpy2.robjects.packages import importr
	from rpy2.robjects import pandas2ri
	from rpy2.robjects.conversion import localconverter
	except ImportError as error:
	from logging import warning
	warning("{}: FairAdapt will be unavailable. To install, run:\n"
	"pip install 'aif360[FairAdapt]'".format(error))
	from sklearn.base import BaseEstimator

	from aif360.sklearn.utils import check_inputs, check_groups


	class FairAdapt(BaseEstimator):
	"""Fair Data Adaptation.

	Fairadapt is a pre-processing technique that can be used for both fair
	classification and fair regression [#plecko20]_. The method is a causal
	inference approach to bias removal and it relies on the causal graph for
	the dataset. The original implementation is in R [#plecko21]_.

	References:
	.. [#plecko20] `D. Plečko and N. Meinshausen,
	"Fair Data Adaptation with Quantile Preservation,"
	Journal of Machine Learning Research, 2020.
	<https://www.jmlr.org/papers/volume21/19-966/19-966.pdf>`_
	.. [#plecko21] `D. Plečko and N. Bennett and N. Meinshausen,
	"FairAdapt: Causal Reasoning for Fair Data Pre-processing,"
	arXiv, 2021. <https://arxiv.org/abs/2110.10200>`_

	Attributes:
	prot_attr_ (str or list(str)): Protected attribute(s) used for fair data
	adaptation.
	groups_ (array, shape (n_groups,)): A list of group labels known to the
	transformer.
	"""

	def __init__(self, prot_attr, adj_mat):
	"""
	Args:
	prot_attr (single label): Name of the protected attribute. Must be
	binary.
	adj_mat (array-like): A 2-dimensional array representing the
	adjacency matrix of the causal diagram of the data generating
	process. Row/column order must match `X_train`.
	"""
	self.prot_attr = prot_attr
	self.adj_mat = adj_mat

	# R packages need to run FairAdapt
	pkgs = ('ranger', 'fairadapt')
	# selectively install the missing packages
	pkgs = [p for p in pkgs if not robjects.packages.isinstalled(p)]
	if len(pkgs) > 0:
	utls = robjects.packages.importr('utils')
	utls.chooseCRANmirror(ind=1)
	utls.install_packages(StrVector(pkgs))

	def fit_transform(self, X_train, y_train, X_test):
	"""Remove bias from the given dataset by fair adaptation.

	Args:
	X_train (pandas.DataFrame): Training data frame (including the
	protected attribute).
	y_train (pandas.Series): Training labels.
	X_test (pandas.DataFrame): Test data frame (including the protected
	attribute).

	Returns:
	tuple:
	Transformed inputs.

	* X_fair_train (pandas.DataFrame) -- Transformed training
	data.
	* y_fair_train (array-like) -- Transformed training labels.
	* X_fair_test (pandas.DataFrame) -- Transformed test data.

	"""
	# merge X_train and y_train
	df_train = pd.concat([X_train, y_train], axis=1)
	groups, self.prot_attr_ = check_groups(X_train, self.prot_attr, ensure_binary=True)
	self.groups_ = np.unique(groups)

	wrapper = osp.join(osp.dirname(osp.abspath(__file__)), 'fairadapt.R')
	robjects.r.source(wrapper)
	FairAdapt_R = robjects.r['wrapper']
	# convert to Pandas with a local converter
	with localconverter(robjects.default_converter + pandas2ri.converter):
	train_data = robjects.conversion.py2rpy(df_train)
	test_data = robjects.conversion.py2rpy(X_test)
	adj_mat = robjects.conversion.py2rpy(self.adj_mat)

	# run FairAdapt in R
	res = FairAdapt_R(
	train_data=train_data,
	test_data=test_data,
	adj_mat=adj_mat,
	prot_attr=self.prot_attr_,
	outcome=y_train.name
	)

	with localconverter(robjects.default_converter + pandas2ri.converter):
	X_fair_train = robjects.conversion.rpy2py(res.rx2('train'))
	X_fair_test = robjects.conversion.rpy2py(res.rx2('test'))
	X_fair_train.columns = [y_train.name] + X_train.columns.tolist()
	y_fair_train = X_fair_train.pop(y_train.name)
	X_fair_test.columns = X_test.columns

	return X_fair_train, y_fair_train, X_fair_test