erasmopurif's picture
First commit
d2a8669
from abc import ABC, abstractmethod
from functools import partial
import numpy as np
from scipy.stats import multivariate_normal
from sklearn.metrics import accuracy_score
class General(ABC):
"""This is the class with the general functions of the algorithm.
For different fairness metrics, the objective function of the optimization
problem is different and hence needs different implementations.
The fairness-metric specific methods need to extend this class and implement
the necessary functions.
"""
@abstractmethod
def getExpectedGrad(self, dist, a, b, params, samples, mu, z_prior):
"""Used in gradient descent algorithm. Returns the value of gradient at
any step.
"""
raise NotImplementedError
@abstractmethod
def getValueForX(self, dist, a, b, params, z_prior, x):
"""Returns the threshold value at any point."""
raise NotImplementedError
@abstractmethod
def getFuncValue(self, dist, a, b, params, samples, z_prior):
"""Returns the value of the objective function for given parameters."""
raise NotImplementedError
@property
@abstractmethod
def num_params(self):
raise NotImplementedError
def range(self, eps, tau):
a = np.arange(np.ceil(tau/eps), step=10) * eps
b = (a + eps) / tau
b = np.minimum(b, 1)
return np.c_[a, b]
@abstractmethod
def gamma(self, y_true, y_pred, sens):
raise NotImplementedError
def init_params(self, i):
return [i] * self.num_params
def gradientDescent(self, dist, a, b, samples, z_prior):
"""Gradient Descent implementation for the optimizing the objective
function.
Note that one can alternately also use packages like CVXPY here.
Here we use decaying step size. For certain objectives, constant step
size might be better.
"""
min_val = np.inf # 1e8
min_param = None
for i in range(1, 10):
params = self.init_params(i)
for k in range(1, 50):
grad = self.getExpectedGrad(dist, a, b, params, samples, 0.01,
z_prior)
for j in range(self.num_params):
params[j] = params[j] - 1/k * grad[j]
f_val = self.getFuncValue(dist, a, b, params, samples, z_prior)
if f_val < min_val:
min_val, min_param = f_val, params
return min_param
def prob(self, dist, x):
return dist.pdf(x)
def getModel(self, tau, X, y, sens, random_state=None):
"""Returns the model given the training data and input tau."""
train = np.c_[X, y, sens]
mean = np.mean(train, axis=0)
cov = np.cov(train, rowvar=False)
dist = multivariate_normal(mean, cov, allow_singular=True,
seed=random_state)
n = X.shape[1]
dist_x = multivariate_normal(mean[:n], cov[:n, :n], allow_singular=True,
seed=random_state)
eps = 0.01
z_1 = np.mean(sens)
params_opt = [0] * self.num_params
max_acc = 0
p, q = 0, 0
if tau != 0:
for a, b in self.range(eps, tau):
samples = dist_x.rvs(size=20) # TODO: why 20?
params = self.gradientDescent(dist, a, b, samples, z_1)
t = self.getValueForX(dist, a, b, params, z_1, X)
y_pred = np.where(t > 0, 1, -1)
acc = accuracy_score(y, y_pred)
gamma = self.gamma(y, y_pred, sens)
if max_acc < acc and gamma >= tau - 0.2: # TODO: why - 0.2?
max_acc = acc
params_opt = params
p, q = a, b
return partial(self.getValueForX, dist, p, q, params_opt, z_1)