File size: 11,372 Bytes
96283ff
 
ca696d0
 
96283ff
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
'Calculate and plot the gradients (the mean and std of the mini-batch gradients) of the trained network'
import matplotlib
# matplotlib.use("TkAgg")
matplotlib.use("Agg")
import numpy as np
import idnns.plots.utils as plt_ut
import matplotlib.pyplot as plt
import tkinter as tk
from tkinter import filedialog
from numpy import linalg as LA
import os
import sys
import statsmodels
colors = ['red', 'c', 'blue', 'green', 'orange', 'purple']


def plot_gradients(name_s=None, data_array=None, figures_dir=''):
    """Plot the gradients and the means of the networks over the batches"""
    if data_array == None:
        data_array= plt_ut.get_data(name_s[0][0])
    #plot_loss_figures(data_array, xlim = [0, 7000] )
    #The gradients - the diemnstions are #epochs X #Batchs # Layers
    conv_net = False
    if conv_net:
        gradients =data_array['var_grad_val'][0][0][0]
        num_of_epochs = len(gradients)
        num_of_batchs = len(gradients[0])
        num_of_layers = len(gradients[0][0]) / 2
    else:
        gradients = np.squeeze(data_array['var_grad_val'])[:, :, :]
        num_of_epochs,num_of_batchs,  num_of_layers = gradients.shape
        num_of_layers = int(num_of_layers / 2)
    #The indxes where we sampled the network
    print (np.squeeze(data_array['var_grad_val'])[0,0].shape)
    epochsInds = (data_array['params']['epochsInds']).astype(np.int)
    #The norms of the layers
    #l2_norm = calc_weights_norms(data_array['ws_all'])
    f_log, axes_log, f_norms, axes_norms, f_snr,  axes_snr,axes_gaus, f_gaus = create_figs()
    p_1, p_0,  sum_y ,p_3, p_4= [], [], [], [], []
    # Go over the layers
    cov_traces_all,means_all = [],[]
    all_gradients = np.empty(num_of_layers, dtype=np.object)
    #print np.squeeze(data_array['var_grad_val']).shape
    for layer in range(0,num_of_layers):
        # The traces of the covarince and the means of the gradients for the current layer
        # Go over all the epochs
        cov_traces, means = [], []
        gradinets_layer = []
        for epoch_index in range(num_of_epochs):
            # the gradients are dimensions of #batchs X # output weights - when output weights is the number of wieghts that go out from the layer
            gradients_current_epoch_and_layer = flatted_graidnet(gradients, epoch_index, 2 * layer)
            gradinets_layer.append(gradients_current_epoch_and_layer)
            num_of_output_weights =  gradients_current_epoch_and_layer.shape[1]
            # the average vector over the batchs - this is vector in the size of #output weights
            # We averged over the batchs - It's mean vector of the batchs!
            average_vec = np.mean(gradients_current_epoch_and_layer, axis=0)
            # The sqrt of the sum over all the weights of the squares of the gradinets -  Sqrt of AA^T - This is a number
            gradients_mean = LA.norm(average_vec)
            # The covarince matrix is in the size of #output weights X #output weights
            sum_covs_mat = np.zeros((average_vec.shape[0], average_vec.shape[0]))
            # Go over all the vectors of batchs (each vector is the size of # output weights, reduce the mean (over the batchs)
            # and calculate the covariance matrix
            for batch_index in range(num_of_batchs):
                # This is in the size of the #output weights
                current_vec = gradients_current_epoch_and_layer[batch_index, :] - average_vec
                # The outer product of the current gradinet of the weights (in this specipic batch) with the transpose of it -
                # give a matrix in the size of # output weights X # output weights
                current_cov_mat = np.einsum('i,j', current_vec, current_vec)
                #current_cov_mat = np.dot(current_vec[:,None], current_vec[None,:])
                # Sum the covarince matrixes over the batchs
                sum_covs_mat+=current_cov_mat
            #Take the mean of the cov matrix over the batchs  - The size is #output weights X # output weights
            mean_cov_mat = sum_covs_mat / num_of_batchs
            #The trace of the mean of the cov matrix - a number
            trac_cov = np.sqrt(np.trace(mean_cov_mat))
            means.append(gradients_mean)
            cov_traces.append(trac_cov)
            """

                #cov_traces.append(np.mean(grad_norms))
                #means.append(norm_mean)
                c_var,c_mean,total_w = [], [],[]

                for neuron in range(len(grad[epoch_number][0][layer])/10):
                    gradients_list = np.array([grad[epoch_number][i][layer][neuron] for i in range(len(grad[epoch_number]))])
                    total_w.extend(gradients_list.T)
                    grad_norms1 = np.std(gradients_list, axis=0)
                    mean_la = np.abs(np.mean(np.array(gradients_list), axis=0))
                    #mean_la = LA.norm(gradients_list, axis=0)
                    c_var.append(np.mean(grad_norms1))
                    c_mean.append(np.mean(mean_la))
                #total_w is in size [num_of_total_weights, num of epochs]
                total_w = np.array(total_w)
                #c_var.append(np.sqrt(np.trace(np.cov(np.array(total_w).T)))/np.cov(np.array(total_w).T).shape[0])
                #print np.mean(c_mean).shape
                means.append(np.mean(c_mean))
                cov_traces.append(np.mean(c_var))
            """

        gradinets_layer = np.array(gradinets_layer)
        all_gradients[layer]= gradinets_layer
        cov_traces_all.append(np.array(cov_traces))
        means_all.append(np.array(means))
        #The cov_traces and the means are vectors with the dimension of # epochs
        #y_var = np.array(cov_traces)
        #y_mean = np.array(means)
        y_var = np.sum(cov_traces_all, axis=0)
        y_mean = np.sum(means_all, axis=0)
        snr =  y_mean**2 / y_var
        #Plot the gradients and the means
        c_p1, = axes_log.plot(epochsInds[:], np.sqrt(y_var),markersize = 4, linewidth = 4,color = colors[layer], linestyle=':', markeredgewidth=0.2, dashes = [4,4])
        c_p0,= axes_log.plot(epochsInds[:], y_mean,  linewidth = 2,color = colors[layer])
        c_p3,= axes_snr.plot(epochsInds[:],snr,  linewidth = 2,color = colors[layer])
        c_p4,= axes_gaus.plot(epochsInds[:],np.log(1+snr),  linewidth = 2,color = colors[layer])
        #For the legend
        p_0.append(c_p0), p_1.append(c_p1),sum_y.append(y_mean) , p_3.append(c_p3), p_4.append(c_p4)
    plt_ut.adjust_axes(axes_log, axes_norms, p_0, p_1, f_log, f_norms, axes_snr, f_snr, p_3, axes_gaus, f_gaus, p_4,
                       directory_name=figures_dir)
    plt.show()


def calc_mean_var_loss(epochsInds,loss_train):
    #Loss train is in dimension # epochs X #batchs
    num_of_epochs = loss_train.shape[0]
    #Average over the batchs
    loss_train_mean = np.mean(loss_train,1)
    #The diff divided by the sampled indexes
    d_mean_loss_to_dt = np.sqrt(np.abs(np.diff(loss_train_mean) / np.diff(epochsInds[:])))
    var_loss = []
    #Go over the epochs
    for epoch_index in range(num_of_epochs):
        #The loss for the specpic epoch
        current_loss = loss_train[epoch_index, :]
        #The derivative between the batchs
        current_loss_dt = np.diff(current_loss)
        #The mean of his derivative
        average_loss = np.mean(current_loss_dt)
        current_loss_minus_mean = current_loss_dt- average_loss
        #The covarince between the batchs
        cov_mat = np.dot(current_loss_minus_mean[:, None], current_loss_minus_mean[None, :])
        # The trace of the cov matrix
        trac_cov = np.trace(cov_mat)
        var_loss.append(trac_cov)
    return np.array(var_loss), d_mean_loss_to_dt

def plot_loss_figures(data_array, fig_size=(14, 10), xlim = None, y_lim = None):
    epochsInds = (data_array['params']['epochsInds']).astype(np.int)
    dif_var_loss, diff_mean_loss = calc_mean_var_loss(epochsInds, np.squeeze(data_array['loss_train']))
    f_log1, (axes_log1) = plt.subplots(1, 1, figsize=fig_size)
    axes_log1.set_title('The mean and the varince( between the batchs) of the derivative of the train error')
    axes_log1.plot(epochsInds[1:], np.array(diff_mean_loss), color='green', label = 'Mean of the derivative of the error')
    axes_log1.plot(epochsInds[:], (dif_var_loss), color='blue', label='Variance of the derivative of the error' )
    axes_log1.set_xscale('log')
    axes_log1.set_yscale('log')
    axes_log1.set_xlabel('#Epochs')
    axes_log1.legend()

    f_log1, (axes_log1) = plt.subplots(1, 1, figsize=fig_size)
    title = r'The SNR of the error derivatives'

    p_5, =axes_log1.plot(epochsInds[1:], np.array(diff_mean_loss)/ np.sqrt(dif_var_loss[1:]), linewidth = 3, color='green',
                  )
    plt_ut.update_axes(axes_log1, f_log1, '#Epochs', 'SNR',[0, 7000], [0.001, 1], title, 'log', 'log',
                       [1, 10, 100, 1000, 7000], [0.001, 0.01, 0.1, 1])
    #axes_log1.plot(epochsInds[:], (dif_var_loss), color='blue', label='Variance of the derivative of the error')
    axes_log1.legend([r'$\frac{|d Error|}{STD\left(Error)\right)}$'], loc= 'best',fontsize = 21)

def create_figs(fig_size = (14, 10)):
    f_norms, (axes_norms) = plt.subplots(1, 1, figsize=fig_size)
    f_log, (axes_log) = plt.subplots(1, 1, figsize=fig_size)
    f_snr, (axes_snr) = plt.subplots(1, 1, figsize=fig_size)
    f_gaus, (axes_gaus) = plt.subplots(1, 1, figsize=fig_size)
    f_log.subplots_adjust(left=0.097, bottom=0.11, right=.95, top=0.95, wspace=0.03, hspace=0.03)
    return f_log, axes_log, f_norms, axes_norms, f_snr, axes_snr,axes_gaus, f_gaus


def flatted_graidnet(gradients, epoch_number, layer):
    gradients_list = []
    # For each neuron in the current layer go over all the weights
    for i in range(len(gradients[epoch_number])):
        current_list_inner = []
        for neuron in range(len(gradients[epoch_number][0][layer])):
            c_n = gradients[epoch_number][i][layer][neuron]
            current_list_inner.extend(c_n)
        gradients_list.append(current_list_inner)
    gradients_list = np.array(gradients_list)
    gradients_list =np.reshape(gradients_list, (gradients_list.shape[0], -1))

    return gradients_list


def calc_weights_norms(ws, num_of_layer = 6):
    layer_l2_norm = []
    for i in range(num_of_layer):
        flatted_list = [1]
        """
        if type(ws_in[epoch_number][layer_index]) is list:
            flatted_list = [item for sublist in ws_in[epoch_number][layer_index] for item in sublist]
        else:
            flatted_list = ws_in[epoch_number][layer_index]
        """
        layer_l2_norm.append(LA.norm(np.array(flatted_list)))
    # plot the norms
    #axes_norms.plot(epochsInds[:], np.array(layer_l2_norm), linewidth=2, color=colors[layer_index])
    return layer_l2_norm

def extract_array(data, name):
    results = [[data[j,k][name] for k in range(data.shape[1])] for j in range(data.shape[0])]
    return results


def load_from_memory(data_array):
    plot_gradients(data_array=data_array)


if __name__ == '__main__':
    directory = './figures/'
    if not os.path.exists(directory):
        os.makedirs(directory)
    root = tk.Tk()
    root.withdraw()
    file_path = filedialog.askopenfilename()
    str_names = [[('/').join(file_path.split('/')[:-1]) + '/']]
    plot_gradients(str_names, figures_dir=directory)