#!/usr/bin/env python # -*- coding: utf-8 -*- """ Two Class logistic regression module with Prejudice Remover the number of sensitive features is restricted to one, and the feature must be binary. Attributes ---------- EPSILON : floast small positive constant N_S : int the number of sensitive features N_CLASSES : int the number of classes """ from __future__ import print_function from __future__ import division from __future__ import unicode_literals #============================================================================== # Module metadata variables #============================================================================== #============================================================================== # Imports #============================================================================== import logging import numpy as np from scipy.optimize import fmin_cg from sklearn.linear_model import LogisticRegression from sklearn.base import BaseEstimator, ClassifierMixin #============================================================================== # Public symbols #============================================================================== __all__ = ['LRwPRType4'] #============================================================================== # Constants #============================================================================== EPSILON = 1.0e-10 SIGMOID_RANGE = np.log((1.0 - EPSILON) / EPSILON) N_S = 1 N_CLASSES = 2 #============================================================================== # Module variables #============================================================================== #============================================================================== # Functions #============================================================================== def sigmoid(x, w): """ sigmoid(w^T x) To suppress the warnings at np.exp, do "np.seterr(all='ignore')" Parameters ---------- x : array, shape=(d) input vector w : array, shape=(d) weight ------- sigmoid : float sigmoid(w^T x) """ s = np.clip(np.dot(w, x), -SIGMOID_RANGE, SIGMOID_RANGE) return 1.0 / (1.0 + np.exp(-s)) #============================================================================== # Classes #============================================================================== class LRwPR(BaseEstimator, ClassifierMixin): """ Two class LogisticRegression with Prejudice Remover Parameters ---------- C : float regularization parameter eta : float penalty parameter fit_intercept : bool use a constant term penalty : str fixed to 'l2' Attributes ---------- minor_type : int type of likelihood fitting `coef_` : array, shape=(n_features) parameters for logistic regression model `mx_` : array-like, shape(n_sfv, n_nsf) mx_[si, :] is a mean rows of X whose corresponding sensitive feature is exactly si. `n_s_` : int the number of sensitive features `n_sfv_` : int the number of sensitive feature values. `c_s_` : ary, shape=(`n_sfv_`) the counts of each senstive values in training samples `n_features_` : int the number of non-sensitive features including a bias constant `n_samples_` : int the number of samples `f_loss_` : float the value of loss function after training """ def __init__(self, C=1.0, eta=1.0, fit_intercept=True, penalty='l2'): if C < 0.0: raise TypeError self.fit_intercept = fit_intercept self.penalty = penalty self.C = C self.eta = eta self.minor_type = 0 self.f_loss_ = np.inf def predict(self, X): """ predict classes Parameters ---------- X : array, shape=(n_samples, n_features) feature vectors of samples Returns ------- y : array, shape=(n_samples), dtype=int array of predicted class """ return np.argmax(self.predict_proba(X), 1) class LRwPRPredictProbaType2Mixin(LRwPR): """ mixin for singe type 2 likelihood """ def predict_proba(self, X): """ predict probabilities a set of weight vectors, whose size if the same as the number of the sensitive features, are available and these weights are selected according to the value of a sensitive feature Parameters ---------- X : array, shape=(n_samples, n_features) feature vectors of samples Returns ------- y_proba : array, shape=(n_samples, n_classes), dtype=float array of predicted class """ # add a constanet term s = np.atleast_1d(np.squeeze(np.array(X)[:, -self.n_s_]).astype(int)) if self.fit_intercept: X = np.c_[np.atleast_2d(X)[:, :-self.n_s_], np.ones(X.shape[0])] else: X = np.atleast_2d(X)[:, :-self.n_s_] coef = self.coef_.reshape(self.n_sfv_, self.n_features_) proba = np.empty((X.shape[0], N_CLASSES)) proba[:, 1] = [sigmoid(X[i, :], coef[s[i], :]) for i in range(X.shape[0])] proba[:, 0] = 1.0 - proba[:, 1] return proba class LRwPRFittingType1Mixin(LRwPR): """ Fitting Method Mixin """ def init_coef(self, itype, X, y, s): """ set initial weight initialization methods are specified by `itype` * 0: cleared by 0 * 1: follows standard normal distribution * 2: learned by standard logistic regression * 3: learned by standard logistic regression separately according to the value of sensitve feature Parameters ---------- itype : int type of initialization method X : array, shape=(n_samples, n_features) feature vectors of samples y : array, shape=(n_samples) target class of samples s : array, shape=(n_samples) values of sensitive features """ if itype == 0: # clear by zeros self.coef_ = np.zeros(self.n_sfv_ * self.n_features_, dtype=np.float) elif itype == 1: # at random self.coef_ = np.random.randn(self.n_sfv_ * self.n_features_) elif itype == 2: # learned by standard LR self.coef_ = np.empty(self.n_sfv_ * self.n_features_, dtype=np.float) coef = self.coef_.reshape(self.n_sfv_, self.n_features_) clr = LogisticRegression(C=self.C, penalty='l2', fit_intercept=False) clr.fit(X, y) coef[:, :] = clr.coef_ elif itype == 3: # learned by standard LR self.coef_ = np.empty(self.n_sfv_ * self.n_features_, dtype=np.float) coef = self.coef_.reshape(self.n_sfv_, self.n_features_) for i in range(self.n_sfv_): clr = LogisticRegression(C=self.C, penalty='l2', max_iter=1000, fit_intercept=False) clr.fit(X[s == i, :], y[s == i]) coef[i, :] = clr.coef_ else: raise TypeError def fit(self, X, y, ns=N_S, itype=0, **kwargs): """ train this model Parameters ---------- X : array, shape = (n_samples, n_features) feature vectors of samples y : array, shape = (n_samples) target class of samples ns : int number of sensitive features. currently fixed to N_S itype : int type of initialization method kwargs : any arguments to optmizer """ # rearrange input arguments s = np.atleast_1d(np.squeeze(np.array(X)[:, -ns]).astype(int)) if self.fit_intercept: X = np.c_[np.atleast_2d(X)[:, :-ns], np.ones(X.shape[0])] else: X = np.atleast_2d(X)[:, :-ns] # check optimization parameters if not 'disp' in kwargs: kwargs['disp'] = False if not 'maxiter' in kwargs: kwargs['maxiter'] = 100 # set instance variables self.n_s_ = ns self.n_sfv_ = np.max(s) + 1 self.c_s_ = np.array([np.sum(s == si).astype(np.float) for si in range(self.n_sfv_)]) self.n_features_ = X.shape[1] self.n_samples_ = X.shape[0] # optimization self.init_coef(itype, X, y, s) self.coef_ = fmin_cg(self.loss, self.coef_, fprime=self.grad_loss, args=(X, y, s), **kwargs) # get final loss self.f_loss_ = self.loss(self.coef_, X, y, s) class LRwPRObjetiveType4Mixin(LRwPR): """ objective function of logistic regression with prejudice remover Loss Function type 4: Weights for logistic regression are prepared for each value of S. Penalty for enhancing is defined as mutual information between Y and S. """ def loss(self, coef_, X, y, s): """ loss function: negative log - likelihood with l2 regularizer To suppress the warnings at np.log, do "np.seterr(all='ignore')" Parameters ---------- `coef_` : array, shape=(`n_sfv_` * n_features) coefficients of model X : array, shape=(n_samples, n_features) feature vectors of samples y : array, shape=(n_samples) target class of samples s : array, shape=(n_samples) values of sensitive features Returns ------- loss : float loss function value """ coef = coef_.reshape(self.n_sfv_, self.n_features_) # print >> sys.stderr, "loss:", coef[0, :], coef[1, :] ### constants # sigma = Pr[y=0|x,s] = sigmoid(w(s)^T x) p = np.array([sigmoid(X[i, :], coef[s[i], :]) for i in range(self.n_samples_)]) # rho(s) = Pr[y=0|s] = \sum_{(xi,si)in D st si=s} sigma(xi,si) / #D[s] q = np.array([np.sum(p[s == si]) for si in range(self.n_sfv_)]) / self.c_s_ # pi = Pr[y=0] = \sum_{(xi,si)in D} sigma(xi,si) r = np.sum(p) / self.n_samples_ ### loss function # likelihood # \sum_{x,s,y in D} y log(sigma) + (1 - y) log(1 - sigma) l = np.sum(y * np.log(p) + (1.0 - y) * np.log(1.0 - p)) # fairness-aware regularizer # \sum_{x,s in D} \ # sigma(x,x) [log(rho(s)) - log(pi) ] + \ # (1 - sigma(x,s)) [log(1 - rho(s)) - log(1 - pi)] f = np.sum(p * (np.log(q[s]) - np.log(r)) + (1.0 - p) * (np.log(1.0 - q[s]) - np.log(1.0 - r))) # l2 regularizer reg = np.sum(coef * coef) l = -l + self.eta * f + 0.5 * self.C * reg # print >> sys.stderr, l return l def grad_loss(self, coef_, X, y, s): """ first derivative of loss function Parameters ---------- `coef_` : array, shape=(`n_sfv_` * n_features) coefficients of model X : array, shape=(n_samples, n_features) feature vectors of samples y : array, shape=(n_samples) target class of samples s : array, shape=(n_samples) values of sensitive features Returns grad_loss : float first derivative of loss function """ coef = coef_.reshape(self.n_sfv_, self.n_features_) l_ = np.empty(self.n_sfv_ * self.n_features_) l = l_.reshape(self.n_sfv_, self.n_features_) # print >> sys.stderr, "grad_loss:", coef[0, :], coef[1, :] ### constants # prefix "d_": derivertive by w(s) # sigma = Pr[y=0|x,s] = sigmoid(w(s)^T x) # d_sigma(x,s) = d sigma / d w(s) = sigma (1 - sigma) x p = np.array([sigmoid(X[i, :], coef[s[i], :]) for i in range(self.n_samples_)]) dp = (p * (1.0 - p))[:, np.newaxis] * X # rho(s) = Pr[y=0|s] = \sum_{(xi,si)in D st si=s} sigma(xi,si) / #D[s] # d_rho(s) = \sum_{(xi,si)in D st si=s} d_sigma(xi,si) / #D[s] q = np.array([np.sum(p[s == si]) for si in range(self.n_sfv_)]) / self.c_s_ dq = np.array([np.sum(dp[s == si, :], axis=0) for si in range(self.n_sfv_)]) \ / self.c_s_[:, np.newaxis] # pi = Pr[y=0] = \sum_{(xi,si)in D} sigma(xi,si) / #D # d_pi = \sum_{(xi,si)in D} d_sigma(xi,si) / #D r = np.sum(p) / self.n_samples_ dr = np.sum(dp, axis=0) / self.n_samples_ # likelihood # l(si) = \sum_{x,y in D st s=si} (y - sigma(x, si)) x for si in range(self.n_sfv_): l[si, :] = np.sum((y - p)[s == si][:, np.newaxis] * X[s == si, :], axis=0) # fairness-aware regularizer # differentialy by w(s) # \sum_{x,s in {D st s=si} \ # [(log(rho(si)) - log(pi)) - (log(1 - rho(si)) - log(1 - pi))] \ # * d_sigma # + \sum_{x,s in {D st s=si} \ # [ {sigma(xi, si) - rho(si)} / {rho(si) (1 - rho(si))} ] \ # * d_rho # - \sum_{x,s in {D st s=si} \ # [ {sigma(xi, si) - pi} / {pi (1 - pi)} ] \ # * d_pi f1 = (np.log(q[s]) - np.log(r)) \ - (np.log(1.0 - q[s]) - np.log(1.0 - r)) f2 = (p - q[s]) / (q[s] * (1.0 - q[s])) f3 = (p - r) / (r * (1.0 - r)) f4 = f1[:, np.newaxis] * dp \ + f2[:, np.newaxis] * dq[s, :] \ - np.outer(f3, dr) f = np.array([np.sum(f4[s == si, :], axis=0) for si in range(self.n_sfv_)]) # l2 regularizer reg = coef # sum l[:, :] = -l + self.eta * f + self.C * reg # print >> sys.stderr, "l =", l return l_ class LRwPRType4\ (LRwPRObjetiveType4Mixin, LRwPRFittingType1Mixin, LRwPRPredictProbaType2Mixin): """ Two class LogisticRegression with Prejudice Remover Parameters ---------- C : float regularization parameter eta : float penalty parameter fit_intercept : bool use a constant term penalty : str fixed to 'l2' """ def __init__(self, C=1.0, eta=1.0, fit_intercept=True, penalty='l2'): super(LRwPRType4, self).\ __init__(C=C, eta=eta, fit_intercept=fit_intercept, penalty=penalty) self.coef_ = None self.mx_ = None self.n_s_ = 0 self.n_sfv_ = 0 self.minor_type = 4 #============================================================================== # Module initialization #============================================================================== # init logging system logger = logging.getLogger('fadm') if not logger.handlers: logger.addHandler(logging.NullHandler) #============================================================================== # Test routine #============================================================================== def _test(): """ test function for this module """ # perform doctest import sys import doctest doctest.testmod() sys.exit(0) # Check if this is call as command script if __name__ == '__main__': _test()