'Calculate and plot the gradients (the mean and std of the mini-batch gradients) of the trained network' | |
import matplotlib | |
# matplotlib.use("TkAgg") | |
matplotlib.use("Agg") | |
import numpy as np | |
import idnns.plots.utils as plt_ut | |
import matplotlib.pyplot as plt | |
import tkinter as tk | |
from tkinter import filedialog | |
from numpy import linalg as LA | |
import os | |
import sys | |
import statsmodels | |
colors = ['red', 'c', 'blue', 'green', 'orange', 'purple'] | |
def plot_gradients(name_s=None, data_array=None, figures_dir=''): | |
"""Plot the gradients and the means of the networks over the batches""" | |
if data_array == None: | |
data_array= plt_ut.get_data(name_s[0][0]) | |
#plot_loss_figures(data_array, xlim = [0, 7000] ) | |
#The gradients - the diemnstions are #epochs X #Batchs # Layers | |
conv_net = False | |
if conv_net: | |
gradients =data_array['var_grad_val'][0][0][0] | |
num_of_epochs = len(gradients) | |
num_of_batchs = len(gradients[0]) | |
num_of_layers = len(gradients[0][0]) / 2 | |
else: | |
gradients = np.squeeze(data_array['var_grad_val'])[:, :, :] | |
num_of_epochs,num_of_batchs, num_of_layers = gradients.shape | |
num_of_layers = int(num_of_layers / 2) | |
#The indxes where we sampled the network | |
print (np.squeeze(data_array['var_grad_val'])[0,0].shape) | |
epochsInds = (data_array['params']['epochsInds']).astype( | |
#The norms of the layers | |
#l2_norm = calc_weights_norms(data_array['ws_all']) | |
f_log, axes_log, f_norms, axes_norms, f_snr, axes_snr,axes_gaus, f_gaus = create_figs() | |
p_1, p_0, sum_y ,p_3, p_4= [], [], [], [], [] | |
# Go over the layers | |
cov_traces_all,means_all = [],[] | |
all_gradients = np.empty(num_of_layers, dtype=np.object) | |
#print np.squeeze(data_array['var_grad_val']).shape | |
for layer in range(0,num_of_layers): | |
# The traces of the covarince and the means of the gradients for the current layer | |
# Go over all the epochs | |
cov_traces, means = [], [] | |
gradinets_layer = [] | |
for epoch_index in range(num_of_epochs): | |
# the gradients are dimensions of #batchs X # output weights - when output weights is the number of wieghts that go out from the layer | |
gradients_current_epoch_and_layer = flatted_graidnet(gradients, epoch_index, 2 * layer) | |
gradinets_layer.append(gradients_current_epoch_and_layer) | |
num_of_output_weights = gradients_current_epoch_and_layer.shape[1] | |
# the average vector over the batchs - this is vector in the size of #output weights | |
# We averged over the batchs - It's mean vector of the batchs! | |
average_vec = np.mean(gradients_current_epoch_and_layer, axis=0) | |
# The sqrt of the sum over all the weights of the squares of the gradinets - Sqrt of AA^T - This is a number | |
gradients_mean = LA.norm(average_vec) | |
# The covarince matrix is in the size of #output weights X #output weights | |
sum_covs_mat = np.zeros((average_vec.shape[0], average_vec.shape[0])) | |
# Go over all the vectors of batchs (each vector is the size of # output weights, reduce the mean (over the batchs) | |
# and calculate the covariance matrix | |
for batch_index in range(num_of_batchs): | |
# This is in the size of the #output weights | |
current_vec = gradients_current_epoch_and_layer[batch_index, :] - average_vec | |
# The outer product of the current gradinet of the weights (in this specipic batch) with the transpose of it - | |
# give a matrix in the size of # output weights X # output weights | |
current_cov_mat = np.einsum('i,j', current_vec, current_vec) | |
#current_cov_mat =[:,None], current_vec[None,:]) | |
# Sum the covarince matrixes over the batchs | |
sum_covs_mat+=current_cov_mat | |
#Take the mean of the cov matrix over the batchs - The size is #output weights X # output weights | |
mean_cov_mat = sum_covs_mat / num_of_batchs | |
#The trace of the mean of the cov matrix - a number | |
trac_cov = np.sqrt(np.trace(mean_cov_mat)) | |
means.append(gradients_mean) | |
cov_traces.append(trac_cov) | |
""" | |
#cov_traces.append(np.mean(grad_norms)) | |
#means.append(norm_mean) | |
c_var,c_mean,total_w = [], [],[] | |
for neuron in range(len(grad[epoch_number][0][layer])/10): | |
gradients_list = np.array([grad[epoch_number][i][layer][neuron] for i in range(len(grad[epoch_number]))]) | |
total_w.extend(gradients_list.T) | |
grad_norms1 = np.std(gradients_list, axis=0) | |
mean_la = np.abs(np.mean(np.array(gradients_list), axis=0)) | |
#mean_la = LA.norm(gradients_list, axis=0) | |
c_var.append(np.mean(grad_norms1)) | |
c_mean.append(np.mean(mean_la)) | |
#total_w is in size [num_of_total_weights, num of epochs] | |
total_w = np.array(total_w) | |
#c_var.append(np.sqrt(np.trace(np.cov(np.array(total_w).T)))/np.cov(np.array(total_w).T).shape[0]) | |
#print np.mean(c_mean).shape | |
means.append(np.mean(c_mean)) | |
cov_traces.append(np.mean(c_var)) | |
""" | |
gradinets_layer = np.array(gradinets_layer) | |
all_gradients[layer]= gradinets_layer | |
cov_traces_all.append(np.array(cov_traces)) | |
means_all.append(np.array(means)) | |
#The cov_traces and the means are vectors with the dimension of # epochs | |
#y_var = np.array(cov_traces) | |
#y_mean = np.array(means) | |
y_var = np.sum(cov_traces_all, axis=0) | |
y_mean = np.sum(means_all, axis=0) | |
snr = y_mean**2 / y_var | |
#Plot the gradients and the means | |
c_p1, = axes_log.plot(epochsInds[:], np.sqrt(y_var),markersize = 4, linewidth = 4,color = colors[layer], linestyle=':', markeredgewidth=0.2, dashes = [4,4]) | |
c_p0,= axes_log.plot(epochsInds[:], y_mean, linewidth = 2,color = colors[layer]) | |
c_p3,= axes_snr.plot(epochsInds[:],snr, linewidth = 2,color = colors[layer]) | |
c_p4,= axes_gaus.plot(epochsInds[:],np.log(1+snr), linewidth = 2,color = colors[layer]) | |
#For the legend | |
p_0.append(c_p0), p_1.append(c_p1),sum_y.append(y_mean) , p_3.append(c_p3), p_4.append(c_p4) | |
plt_ut.adjust_axes(axes_log, axes_norms, p_0, p_1, f_log, f_norms, axes_snr, f_snr, p_3, axes_gaus, f_gaus, p_4, | |
directory_name=figures_dir) | | | |
def calc_mean_var_loss(epochsInds,loss_train): | |
#Loss train is in dimension # epochs X #batchs | |
num_of_epochs = loss_train.shape[0] | |
#Average over the batchs | |
loss_train_mean = np.mean(loss_train,1) | |
#The diff divided by the sampled indexes | |
d_mean_loss_to_dt = np.sqrt(np.abs(np.diff(loss_train_mean) / np.diff(epochsInds[:]))) | |
var_loss = [] | |
#Go over the epochs | |
for epoch_index in range(num_of_epochs): | |
#The loss for the specpic epoch | |
current_loss = loss_train[epoch_index, :] | |
#The derivative between the batchs | |
current_loss_dt = np.diff(current_loss) | |
#The mean of his derivative | |
average_loss = np.mean(current_loss_dt) | |
current_loss_minus_mean = current_loss_dt- average_loss | |
#The covarince between the batchs | |
cov_mat =[:, None], current_loss_minus_mean[None, :]) | |
# The trace of the cov matrix | |
trac_cov = np.trace(cov_mat) | |
var_loss.append(trac_cov) | |
return np.array(var_loss), d_mean_loss_to_dt | |
def plot_loss_figures(data_array, fig_size=(14, 10), xlim = None, y_lim = None): | |
epochsInds = (data_array['params']['epochsInds']).astype( | |
dif_var_loss, diff_mean_loss = calc_mean_var_loss(epochsInds, np.squeeze(data_array['loss_train'])) | |
f_log1, (axes_log1) = plt.subplots(1, 1, figsize=fig_size) | |
axes_log1.set_title('The mean and the varince( between the batchs) of the derivative of the train error') | |
axes_log1.plot(epochsInds[1:], np.array(diff_mean_loss), color='green', label = 'Mean of the derivative of the error') | |
axes_log1.plot(epochsInds[:], (dif_var_loss), color='blue', label='Variance of the derivative of the error' ) | |
axes_log1.set_xscale('log') | |
axes_log1.set_yscale('log') | |
axes_log1.set_xlabel('#Epochs') | |
axes_log1.legend() | |
f_log1, (axes_log1) = plt.subplots(1, 1, figsize=fig_size) | |
title = r'The SNR of the error derivatives' | |
p_5, =axes_log1.plot(epochsInds[1:], np.array(diff_mean_loss)/ np.sqrt(dif_var_loss[1:]), linewidth = 3, color='green', | |
) | |
plt_ut.update_axes(axes_log1, f_log1, '#Epochs', 'SNR',[0, 7000], [0.001, 1], title, 'log', 'log', | |
[1, 10, 100, 1000, 7000], [0.001, 0.01, 0.1, 1]) | |
#axes_log1.plot(epochsInds[:], (dif_var_loss), color='blue', label='Variance of the derivative of the error') | |
axes_log1.legend([r'$\frac{|d Error|}{STD\left(Error)\right)}$'], loc= 'best',fontsize = 21) | |
def create_figs(fig_size = (14, 10)): | |
f_norms, (axes_norms) = plt.subplots(1, 1, figsize=fig_size) | |
f_log, (axes_log) = plt.subplots(1, 1, figsize=fig_size) | |
f_snr, (axes_snr) = plt.subplots(1, 1, figsize=fig_size) | |
f_gaus, (axes_gaus) = plt.subplots(1, 1, figsize=fig_size) | |
f_log.subplots_adjust(left=0.097, bottom=0.11, right=.95, top=0.95, wspace=0.03, hspace=0.03) | |
return f_log, axes_log, f_norms, axes_norms, f_snr, axes_snr,axes_gaus, f_gaus | |
def flatted_graidnet(gradients, epoch_number, layer): | |
gradients_list = [] | |
# For each neuron in the current layer go over all the weights | |
for i in range(len(gradients[epoch_number])): | |
current_list_inner = [] | |
for neuron in range(len(gradients[epoch_number][0][layer])): | |
c_n = gradients[epoch_number][i][layer][neuron] | |
current_list_inner.extend(c_n) | |
gradients_list.append(current_list_inner) | |
gradients_list = np.array(gradients_list) | |
gradients_list =np.reshape(gradients_list, (gradients_list.shape[0], -1)) | |
return gradients_list | |
def calc_weights_norms(ws, num_of_layer = 6): | |
layer_l2_norm = [] | |
for i in range(num_of_layer): | |
flatted_list = [1] | |
""" | |
if type(ws_in[epoch_number][layer_index]) is list: | |
flatted_list = [item for sublist in ws_in[epoch_number][layer_index] for item in sublist] | |
else: | |
flatted_list = ws_in[epoch_number][layer_index] | |
""" | |
layer_l2_norm.append(LA.norm(np.array(flatted_list))) | |
# plot the norms | |
#axes_norms.plot(epochsInds[:], np.array(layer_l2_norm), linewidth=2, color=colors[layer_index]) | |
return layer_l2_norm | |
def extract_array(data, name): | |
results = [[data[j,k][name] for k in range(data.shape[1])] for j in range(data.shape[0])] | |
return results | |
def load_from_memory(data_array): | |
plot_gradients(data_array=data_array) | |
if __name__ == '__main__': | |
directory = './figures/' | |
if not os.path.exists(directory): | |
os.makedirs(directory) | |
root = tk.Tk() | |
root.withdraw() | |
file_path = filedialog.askopenfilename() | |
str_names = [[('/').join(file_path.split('/')[:-1]) + '/']] | |
plot_gradients(str_names, figures_dir=directory) | |