File size: 4,131 Bytes
d2a8669
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
# Copyright 2019 Seth V. Neel, Michael J. Kearns, Aaron L. Roth, Zhiwei Steven Wu
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not
# use this file except in compliance with the License. You may obtain a copy of
# the License at http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
# CONDITIONS OF ANY KIND, either express or implied. See the License for the
# specific language governing permissions and limitations under the License.
"""Function generating 3-d heatmap visualizing gamma-disparity.

The main function in this module, heat_map(), generates and saves a 3-d
heatmap visualizing the gamma-disparity for groups defined by linear thresholds over 2 sensitive attributes.
This serves as a (heuristic) method to help visualize convergence of the algorithm via brute force checking in
low dimensions, rather than relying on the Auditor. See [KRNW18] for details.
"""

import matplotlib
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import cm
from mpl_toolkits.mplot3d import Axes3D
from aif360.algorithms.inprocessing.gerryfair.reg_oracle_class import *


def calc_disp(predictions, X, group_labels, X_prime, group):
    """Return the fp disparity in a group g - helper function for heat_map.

    Args:
        :param predictions: dataframe of predictions of the classifier
        :param X: dataframe of covariates
        :param group_labels: dataframe of group labels
        :param X_prime: sensitive covariates
        :param group: object of class Group(), see auditor.py
    Returns:
        :return: weighted disparity on the group g
    """
    X_0 = pd.DataFrame(
        [X_prime.iloc[u, :] for u, s in enumerate(group_labels) if s == 0])
    group_0 = group.predict(X_0)
    n = len(group_labels)
    g_size_0 = np.sum(group_0) * 1.0 / n
    FP = [predictions[i] for i, c in enumerate(group_labels) if c == 0]
    FP = np.mean(FP)
    group_members = group.predict(X_prime)
    fp_g = [
        predictions[i] for i, c in enumerate(group_labels)
        if group_members[i] == 1 and c == 0
    ]
    if len(fp_g) == 0:
        return 0
    fp_g = np.mean(fp_g)
    return (FP - fp_g) * g_size_0


def heat_map(X, X_prime, y, predictions, eta, plot_path, vmin=None, vmax=None):
    """Generate 3-d heatmap and save it at plot_path.
    Args:
        :param eta: discretization parameter of coefficients defining subgroups
        :param plot_path: the path to save the heatmap at
        :param vmin: Min  value to map: see plot_surface documentation in matplotlib
        :param vmax: Max value to map

    Returns:
        :return: the min and max gamma disparities on groups in the plot
    """
    fig = plt.figure()
    ax = fig.add_subplot(1, 1, 1, projection='3d')
    columns = [str(c) for c in X_prime.columns]
    attribute_1 = np.zeros(int(1 / eta))
    attribute_2 = np.zeros(int(1 / eta))
    disparity = np.zeros((int(1 / eta), int(1 / eta)))

    for i in range(int(1 / eta)):
        for j in range(int(1 / eta)):
            beta = [-1 + 2 * eta * i, -1 + 2 * eta * j]
            group = LinearThresh(beta)

            attribute_1[i] = beta[0]
            attribute_2[j] = beta[1]
            disparity[i, j] = calc_disp(predictions, X, y, X_prime, group)

    X_plot, Y_plot = np.meshgrid(attribute_1, attribute_2)

    ax.set_xlabel(columns[0] + ' coefficient')
    ax.set_ylabel(columns[1] + ' coefficient')
    ax.set_zlabel('gamma disparity')
    ax.set_zlim3d([np.min(disparity), np.max(disparity)])
    surface = ax.plot_surface(X_plot,
                              Y_plot,
                              disparity,
                              cmap=cm.coolwarm,
                              linewidth=0,
                              antialiased=False,
                              vmin=vmin,
                              vmax=vmax)
    if plot_path != '.':
        fig.savefig('{}.png'.format(plot_path))
        plt.close()
    return [np.min(disparity), np.max(disparity)]