AI-Midterm-IDNN / idnns /plots /plot_gradients.py
Ashley Goluoglu
changes to work on my computer
ca696d0
'Calculate and plot the gradients (the mean and std of the mini-batch gradients) of the trained network'
import matplotlib
# matplotlib.use("TkAgg")
matplotlib.use("Agg")
import numpy as np
import idnns.plots.utils as plt_ut
import matplotlib.pyplot as plt
import tkinter as tk
from tkinter import filedialog
from numpy import linalg as LA
import os
import sys
import statsmodels
colors = ['red', 'c', 'blue', 'green', 'orange', 'purple']
def plot_gradients(name_s=None, data_array=None, figures_dir=''):
"""Plot the gradients and the means of the networks over the batches"""
if data_array == None:
data_array= plt_ut.get_data(name_s[0][0])
#plot_loss_figures(data_array, xlim = [0, 7000] )
#The gradients - the diemnstions are #epochs X #Batchs # Layers
conv_net = False
if conv_net:
gradients =data_array['var_grad_val'][0][0][0]
num_of_epochs = len(gradients)
num_of_batchs = len(gradients[0])
num_of_layers = len(gradients[0][0]) / 2
else:
gradients = np.squeeze(data_array['var_grad_val'])[:, :, :]
num_of_epochs,num_of_batchs, num_of_layers = gradients.shape
num_of_layers = int(num_of_layers / 2)
#The indxes where we sampled the network
print (np.squeeze(data_array['var_grad_val'])[0,0].shape)
epochsInds = (data_array['params']['epochsInds']).astype(np.int)
#The norms of the layers
#l2_norm = calc_weights_norms(data_array['ws_all'])
f_log, axes_log, f_norms, axes_norms, f_snr, axes_snr,axes_gaus, f_gaus = create_figs()
p_1, p_0, sum_y ,p_3, p_4= [], [], [], [], []
# Go over the layers
cov_traces_all,means_all = [],[]
all_gradients = np.empty(num_of_layers, dtype=np.object)
#print np.squeeze(data_array['var_grad_val']).shape
for layer in range(0,num_of_layers):
# The traces of the covarince and the means of the gradients for the current layer
# Go over all the epochs
cov_traces, means = [], []
gradinets_layer = []
for epoch_index in range(num_of_epochs):
# the gradients are dimensions of #batchs X # output weights - when output weights is the number of wieghts that go out from the layer
gradients_current_epoch_and_layer = flatted_graidnet(gradients, epoch_index, 2 * layer)
gradinets_layer.append(gradients_current_epoch_and_layer)
num_of_output_weights = gradients_current_epoch_and_layer.shape[1]
# the average vector over the batchs - this is vector in the size of #output weights
# We averged over the batchs - It's mean vector of the batchs!
average_vec = np.mean(gradients_current_epoch_and_layer, axis=0)
# The sqrt of the sum over all the weights of the squares of the gradinets - Sqrt of AA^T - This is a number
gradients_mean = LA.norm(average_vec)
# The covarince matrix is in the size of #output weights X #output weights
sum_covs_mat = np.zeros((average_vec.shape[0], average_vec.shape[0]))
# Go over all the vectors of batchs (each vector is the size of # output weights, reduce the mean (over the batchs)
# and calculate the covariance matrix
for batch_index in range(num_of_batchs):
# This is in the size of the #output weights
current_vec = gradients_current_epoch_and_layer[batch_index, :] - average_vec
# The outer product of the current gradinet of the weights (in this specipic batch) with the transpose of it -
# give a matrix in the size of # output weights X # output weights
current_cov_mat = np.einsum('i,j', current_vec, current_vec)
#current_cov_mat = np.dot(current_vec[:,None], current_vec[None,:])
# Sum the covarince matrixes over the batchs
sum_covs_mat+=current_cov_mat
#Take the mean of the cov matrix over the batchs - The size is #output weights X # output weights
mean_cov_mat = sum_covs_mat / num_of_batchs
#The trace of the mean of the cov matrix - a number
trac_cov = np.sqrt(np.trace(mean_cov_mat))
means.append(gradients_mean)
cov_traces.append(trac_cov)
"""
#cov_traces.append(np.mean(grad_norms))
#means.append(norm_mean)
c_var,c_mean,total_w = [], [],[]
for neuron in range(len(grad[epoch_number][0][layer])/10):
gradients_list = np.array([grad[epoch_number][i][layer][neuron] for i in range(len(grad[epoch_number]))])
total_w.extend(gradients_list.T)
grad_norms1 = np.std(gradients_list, axis=0)
mean_la = np.abs(np.mean(np.array(gradients_list), axis=0))
#mean_la = LA.norm(gradients_list, axis=0)
c_var.append(np.mean(grad_norms1))
c_mean.append(np.mean(mean_la))
#total_w is in size [num_of_total_weights, num of epochs]
total_w = np.array(total_w)
#c_var.append(np.sqrt(np.trace(np.cov(np.array(total_w).T)))/np.cov(np.array(total_w).T).shape[0])
#print np.mean(c_mean).shape
means.append(np.mean(c_mean))
cov_traces.append(np.mean(c_var))
"""
gradinets_layer = np.array(gradinets_layer)
all_gradients[layer]= gradinets_layer
cov_traces_all.append(np.array(cov_traces))
means_all.append(np.array(means))
#The cov_traces and the means are vectors with the dimension of # epochs
#y_var = np.array(cov_traces)
#y_mean = np.array(means)
y_var = np.sum(cov_traces_all, axis=0)
y_mean = np.sum(means_all, axis=0)
snr = y_mean**2 / y_var
#Plot the gradients and the means
c_p1, = axes_log.plot(epochsInds[:], np.sqrt(y_var),markersize = 4, linewidth = 4,color = colors[layer], linestyle=':', markeredgewidth=0.2, dashes = [4,4])
c_p0,= axes_log.plot(epochsInds[:], y_mean, linewidth = 2,color = colors[layer])
c_p3,= axes_snr.plot(epochsInds[:],snr, linewidth = 2,color = colors[layer])
c_p4,= axes_gaus.plot(epochsInds[:],np.log(1+snr), linewidth = 2,color = colors[layer])
#For the legend
p_0.append(c_p0), p_1.append(c_p1),sum_y.append(y_mean) , p_3.append(c_p3), p_4.append(c_p4)
plt_ut.adjust_axes(axes_log, axes_norms, p_0, p_1, f_log, f_norms, axes_snr, f_snr, p_3, axes_gaus, f_gaus, p_4,
directory_name=figures_dir)
plt.show()
def calc_mean_var_loss(epochsInds,loss_train):
#Loss train is in dimension # epochs X #batchs
num_of_epochs = loss_train.shape[0]
#Average over the batchs
loss_train_mean = np.mean(loss_train,1)
#The diff divided by the sampled indexes
d_mean_loss_to_dt = np.sqrt(np.abs(np.diff(loss_train_mean) / np.diff(epochsInds[:])))
var_loss = []
#Go over the epochs
for epoch_index in range(num_of_epochs):
#The loss for the specpic epoch
current_loss = loss_train[epoch_index, :]
#The derivative between the batchs
current_loss_dt = np.diff(current_loss)
#The mean of his derivative
average_loss = np.mean(current_loss_dt)
current_loss_minus_mean = current_loss_dt- average_loss
#The covarince between the batchs
cov_mat = np.dot(current_loss_minus_mean[:, None], current_loss_minus_mean[None, :])
# The trace of the cov matrix
trac_cov = np.trace(cov_mat)
var_loss.append(trac_cov)
return np.array(var_loss), d_mean_loss_to_dt
def plot_loss_figures(data_array, fig_size=(14, 10), xlim = None, y_lim = None):
epochsInds = (data_array['params']['epochsInds']).astype(np.int)
dif_var_loss, diff_mean_loss = calc_mean_var_loss(epochsInds, np.squeeze(data_array['loss_train']))
f_log1, (axes_log1) = plt.subplots(1, 1, figsize=fig_size)
axes_log1.set_title('The mean and the varince( between the batchs) of the derivative of the train error')
axes_log1.plot(epochsInds[1:], np.array(diff_mean_loss), color='green', label = 'Mean of the derivative of the error')
axes_log1.plot(epochsInds[:], (dif_var_loss), color='blue', label='Variance of the derivative of the error' )
axes_log1.set_xscale('log')
axes_log1.set_yscale('log')
axes_log1.set_xlabel('#Epochs')
axes_log1.legend()
f_log1, (axes_log1) = plt.subplots(1, 1, figsize=fig_size)
title = r'The SNR of the error derivatives'
p_5, =axes_log1.plot(epochsInds[1:], np.array(diff_mean_loss)/ np.sqrt(dif_var_loss[1:]), linewidth = 3, color='green',
)
plt_ut.update_axes(axes_log1, f_log1, '#Epochs', 'SNR',[0, 7000], [0.001, 1], title, 'log', 'log',
[1, 10, 100, 1000, 7000], [0.001, 0.01, 0.1, 1])
#axes_log1.plot(epochsInds[:], (dif_var_loss), color='blue', label='Variance of the derivative of the error')
axes_log1.legend([r'$\frac{|d Error|}{STD\left(Error)\right)}$'], loc= 'best',fontsize = 21)
def create_figs(fig_size = (14, 10)):
f_norms, (axes_norms) = plt.subplots(1, 1, figsize=fig_size)
f_log, (axes_log) = plt.subplots(1, 1, figsize=fig_size)
f_snr, (axes_snr) = plt.subplots(1, 1, figsize=fig_size)
f_gaus, (axes_gaus) = plt.subplots(1, 1, figsize=fig_size)
f_log.subplots_adjust(left=0.097, bottom=0.11, right=.95, top=0.95, wspace=0.03, hspace=0.03)
return f_log, axes_log, f_norms, axes_norms, f_snr, axes_snr,axes_gaus, f_gaus
def flatted_graidnet(gradients, epoch_number, layer):
gradients_list = []
# For each neuron in the current layer go over all the weights
for i in range(len(gradients[epoch_number])):
current_list_inner = []
for neuron in range(len(gradients[epoch_number][0][layer])):
c_n = gradients[epoch_number][i][layer][neuron]
current_list_inner.extend(c_n)
gradients_list.append(current_list_inner)
gradients_list = np.array(gradients_list)
gradients_list =np.reshape(gradients_list, (gradients_list.shape[0], -1))
return gradients_list
def calc_weights_norms(ws, num_of_layer = 6):
layer_l2_norm = []
for i in range(num_of_layer):
flatted_list = [1]
"""
if type(ws_in[epoch_number][layer_index]) is list:
flatted_list = [item for sublist in ws_in[epoch_number][layer_index] for item in sublist]
else:
flatted_list = ws_in[epoch_number][layer_index]
"""
layer_l2_norm.append(LA.norm(np.array(flatted_list)))
# plot the norms
#axes_norms.plot(epochsInds[:], np.array(layer_l2_norm), linewidth=2, color=colors[layer_index])
return layer_l2_norm
def extract_array(data, name):
results = [[data[j,k][name] for k in range(data.shape[1])] for j in range(data.shape[0])]
return results
def load_from_memory(data_array):
plot_gradients(data_array=data_array)
if __name__ == '__main__':
directory = './figures/'
if not os.path.exists(directory):
os.makedirs(directory)
root = tk.Tk()
root.withdraw()
file_path = filedialog.askopenfilename()
str_names = [[('/').join(file_path.split('/')[:-1]) + '/']]
plot_gradients(str_names, figures_dir=directory)