Spaces:
Runtime error
Runtime error
File size: 6,872 Bytes
d2a8669 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 |
import sklearn.preprocessing
import numpy as np
from aif360.algorithms import Transformer
class LimeEncoder(Transformer):
"""Tranformer for converting aif360 dataset to LIME dataset and vice versa.
(LIME - Local Interpretable Model-Agnostic Explanations) [2]_
See for details/usage:
https://github.com/marcotcr/lime
References:
.. [2] M.T. Ribeiro, S. Singh, and C. Guestrin, '"Why should I trust
you?" Explaining the predictions of any classifier.'
https://arxiv.org/pdf/1602.04938v1.pdf
"""
def __init__(self):
super(LimeEncoder, self).__init__()
def fit(self, dataset):
"""Take an aif360 dataset and save all relevant metadata as well as
mappings needed to transform/inverse_transform the data between aif360
and lime.
Args:
dataset (BinaryLabelDataset): aif360 dataset
Returns:
LimeEncoder: Returns self.
"""
self.s_feature_names_with_one_hot_encoding = dataset.feature_names
df, df_dict = dataset.convert_to_dataframe(de_dummy_code=True)
dfc = df.drop(dataset.label_names[0], axis=1) # remove label (class) column
self.s_feature_names = list(dfc.columns) # create list of feature names
self.s_data = dfc.values # create array of feature values
# since categorical features are 1-hot-encoded and their names changed,
# the set diff gives us the list of categorical features as non-
# categorical feature names are not changed
self.s_categorical_features = list(set(self.s_feature_names)
- set(self.s_feature_names_with_one_hot_encoding))
self.s_protected_attribute_names = dataset.protected_attribute_names
# add protected attribute names to the list of categorical features
self.s_categorical_features = self.s_categorical_features \
+ self.s_protected_attribute_names
self.s_labels = df[dataset.label_names[0]] # create labels
# following 3 lines are not really needed
# using to create s_class_names..can do so manually as well ...array([ 0., 1.])
s_le = sklearn.preprocessing.LabelEncoder()
s_le.fit(self.s_labels)
# self.s_labels = s_le.transform(self.s_labels)
self.s_class_names = s_le.classes_
# convert s_categorical_features to a list of array indexes in
# s_feature_names corresponding to categorical features
# (NOTE - does not included protected attributes)
self.s_categorical_features = [self.s_feature_names.index(x)
for x in self.s_categorical_features]
# map all the categorical features to numerical values and store the
# mappings in s_categorical_names
self.s_categorical_names = {}
for feature in self.s_categorical_features:
self.le = sklearn.preprocessing.LabelEncoder()
self.le.fit(self.s_data[:, feature])
#self.s_data[:, feature] = le.transform(self.s_data[:, feature])
self.s_categorical_names[feature] = self.le.classes_
return self
def transform(self, aif360data):
"""Take aif360 data array and return data array that is lime encoded
(numeric array in which categorical features are NOT one-hot-encoded).
Args:
aif360data (np.ndarray): Dataset features
Returns:
np.ndarray: LIME dataset features
"""
tgtNumRows = aif360data.shape[0]
tgtNumcolumns = len(self.s_feature_names)
limedata = np.zeros(shape=(tgtNumRows, tgtNumcolumns))
# non_categorical_features = list(set(self.s_feature_names) & set(self.s_feature_names_with_one_hot_encoding))
for rw in range(limedata.shape[0]):
for ind, feature in enumerate(self.s_feature_names):
if ind in self.s_categorical_features:
# tranform the value since categorical feature except if it
# is also a protected attribute
if feature in self.s_protected_attribute_names:
# just copy the value as is
limedata[rw, ind] = aif360data[rw, self.s_feature_names_with_one_hot_encoding.index(feature)]
else:
possible_feature_values = self.s_categorical_names[ind]
for indc in range(len(possible_feature_values)):
cval = possible_feature_values[indc]
colName = feature + "=" + cval
if (aif360data[rw][self.s_feature_names_with_one_hot_encoding.index(colName)] == 1.0):
limedata[rw][ind] = indc
else:
# just copy the value as is
limedata[rw, ind] = aif360data[rw, self.s_feature_names_with_one_hot_encoding.index(feature)]
return limedata
def inverse_transform(self, limedata):
"""Take data array that is lime encoded (that is, lime-compatible data
created by this class from a given aif360 dataset) and return data array
consistent with the original aif360 dataset.
Args:
limedata (np.ndarray): Dataset features
Returns:
np.ndarray: aif360 dataset features
"""
tgtNumRows = limedata.shape[0]
tgtNumcolumns = len(self.s_feature_names_with_one_hot_encoding)
aif360data = np.zeros(shape=(tgtNumRows, tgtNumcolumns))
for rw in range(aif360data.shape[0]):
for ind, feature in enumerate(self.s_feature_names):
# s_categorical_features has list of indexes into
# s_feature_names for categorical features
if ind in self.s_categorical_features:
if feature in self.s_protected_attribute_names:
# just copy the value as is
aif360data[rw, self.s_feature_names_with_one_hot_encoding.index(feature)] = limedata[rw, ind]
else:
# s_categorical_names[ind] has mapping of categorical to
# numerical values i.e. limedata[rw, ind] is index of
# this array. value is string val
new_feature = feature + '=' + self.s_categorical_names[ind][int(limedata[rw, ind])]
# categorical feature:
aif360data[rw, self.s_feature_names_with_one_hot_encoding.index(new_feature)] = 1.0
else: # just copy value
aif360data[rw, self.s_feature_names_with_one_hot_encoding.index(feature)] = limedata[rw, ind]
return aif360data
|