'Calculate and plot the gradients (the mean and std of the mini-batch gradients) of the trained network' import matplotlib # matplotlib.use("TkAgg") matplotlib.use("Agg") import numpy as np import idnns.plots.utils as plt_ut import matplotlib.pyplot as plt import tkinter as tk from tkinter import filedialog from numpy import linalg as LA import os import sys import statsmodels colors = ['red', 'c', 'blue', 'green', 'orange', 'purple'] def plot_gradients(name_s=None, data_array=None, figures_dir=''): """Plot the gradients and the means of the networks over the batches""" if data_array == None: data_array= plt_ut.get_data(name_s[0][0]) #plot_loss_figures(data_array, xlim = [0, 7000] ) #The gradients - the diemnstions are #epochs X #Batchs # Layers conv_net = False if conv_net: gradients =data_array['var_grad_val'][0][0][0] num_of_epochs = len(gradients) num_of_batchs = len(gradients[0]) num_of_layers = len(gradients[0][0]) / 2 else: gradients = np.squeeze(data_array['var_grad_val'])[:, :, :] num_of_epochs,num_of_batchs, num_of_layers = gradients.shape num_of_layers = int(num_of_layers / 2) #The indxes where we sampled the network print (np.squeeze(data_array['var_grad_val'])[0,0].shape) epochsInds = (data_array['params']['epochsInds']).astype(np.int) #The norms of the layers #l2_norm = calc_weights_norms(data_array['ws_all']) f_log, axes_log, f_norms, axes_norms, f_snr, axes_snr,axes_gaus, f_gaus = create_figs() p_1, p_0, sum_y ,p_3, p_4= [], [], [], [], [] # Go over the layers cov_traces_all,means_all = [],[] all_gradients = np.empty(num_of_layers, dtype=np.object) #print np.squeeze(data_array['var_grad_val']).shape for layer in range(0,num_of_layers): # The traces of the covarince and the means of the gradients for the current layer # Go over all the epochs cov_traces, means = [], [] gradinets_layer = [] for epoch_index in range(num_of_epochs): # the gradients are dimensions of #batchs X # output weights - when output weights is the number of wieghts that go out from the layer gradients_current_epoch_and_layer = flatted_graidnet(gradients, epoch_index, 2 * layer) gradinets_layer.append(gradients_current_epoch_and_layer) num_of_output_weights = gradients_current_epoch_and_layer.shape[1] # the average vector over the batchs - this is vector in the size of #output weights # We averged over the batchs - It's mean vector of the batchs! average_vec = np.mean(gradients_current_epoch_and_layer, axis=0) # The sqrt of the sum over all the weights of the squares of the gradinets - Sqrt of AA^T - This is a number gradients_mean = LA.norm(average_vec) # The covarince matrix is in the size of #output weights X #output weights sum_covs_mat = np.zeros((average_vec.shape[0], average_vec.shape[0])) # Go over all the vectors of batchs (each vector is the size of # output weights, reduce the mean (over the batchs) # and calculate the covariance matrix for batch_index in range(num_of_batchs): # This is in the size of the #output weights current_vec = gradients_current_epoch_and_layer[batch_index, :] - average_vec # The outer product of the current gradinet of the weights (in this specipic batch) with the transpose of it - # give a matrix in the size of # output weights X # output weights current_cov_mat = np.einsum('i,j', current_vec, current_vec) #current_cov_mat = np.dot(current_vec[:,None], current_vec[None,:]) # Sum the covarince matrixes over the batchs sum_covs_mat+=current_cov_mat #Take the mean of the cov matrix over the batchs - The size is #output weights X # output weights mean_cov_mat = sum_covs_mat / num_of_batchs #The trace of the mean of the cov matrix - a number trac_cov = np.sqrt(np.trace(mean_cov_mat)) means.append(gradients_mean) cov_traces.append(trac_cov) """ #cov_traces.append(np.mean(grad_norms)) #means.append(norm_mean) c_var,c_mean,total_w = [], [],[] for neuron in range(len(grad[epoch_number][0][layer])/10): gradients_list = np.array([grad[epoch_number][i][layer][neuron] for i in range(len(grad[epoch_number]))]) total_w.extend(gradients_list.T) grad_norms1 = np.std(gradients_list, axis=0) mean_la = np.abs(np.mean(np.array(gradients_list), axis=0)) #mean_la = LA.norm(gradients_list, axis=0) c_var.append(np.mean(grad_norms1)) c_mean.append(np.mean(mean_la)) #total_w is in size [num_of_total_weights, num of epochs] total_w = np.array(total_w) #c_var.append(np.sqrt(np.trace(np.cov(np.array(total_w).T)))/np.cov(np.array(total_w).T).shape[0]) #print np.mean(c_mean).shape means.append(np.mean(c_mean)) cov_traces.append(np.mean(c_var)) """ gradinets_layer = np.array(gradinets_layer) all_gradients[layer]= gradinets_layer cov_traces_all.append(np.array(cov_traces)) means_all.append(np.array(means)) #The cov_traces and the means are vectors with the dimension of # epochs #y_var = np.array(cov_traces) #y_mean = np.array(means) y_var = np.sum(cov_traces_all, axis=0) y_mean = np.sum(means_all, axis=0) snr = y_mean**2 / y_var #Plot the gradients and the means c_p1, = axes_log.plot(epochsInds[:], np.sqrt(y_var),markersize = 4, linewidth = 4,color = colors[layer], linestyle=':', markeredgewidth=0.2, dashes = [4,4]) c_p0,= axes_log.plot(epochsInds[:], y_mean, linewidth = 2,color = colors[layer]) c_p3,= axes_snr.plot(epochsInds[:],snr, linewidth = 2,color = colors[layer]) c_p4,= axes_gaus.plot(epochsInds[:],np.log(1+snr), linewidth = 2,color = colors[layer]) #For the legend p_0.append(c_p0), p_1.append(c_p1),sum_y.append(y_mean) , p_3.append(c_p3), p_4.append(c_p4) plt_ut.adjust_axes(axes_log, axes_norms, p_0, p_1, f_log, f_norms, axes_snr, f_snr, p_3, axes_gaus, f_gaus, p_4, directory_name=figures_dir) plt.show() def calc_mean_var_loss(epochsInds,loss_train): #Loss train is in dimension # epochs X #batchs num_of_epochs = loss_train.shape[0] #Average over the batchs loss_train_mean = np.mean(loss_train,1) #The diff divided by the sampled indexes d_mean_loss_to_dt = np.sqrt(np.abs(np.diff(loss_train_mean) / np.diff(epochsInds[:]))) var_loss = [] #Go over the epochs for epoch_index in range(num_of_epochs): #The loss for the specpic epoch current_loss = loss_train[epoch_index, :] #The derivative between the batchs current_loss_dt = np.diff(current_loss) #The mean of his derivative average_loss = np.mean(current_loss_dt) current_loss_minus_mean = current_loss_dt- average_loss #The covarince between the batchs cov_mat = np.dot(current_loss_minus_mean[:, None], current_loss_minus_mean[None, :]) # The trace of the cov matrix trac_cov = np.trace(cov_mat) var_loss.append(trac_cov) return np.array(var_loss), d_mean_loss_to_dt def plot_loss_figures(data_array, fig_size=(14, 10), xlim = None, y_lim = None): epochsInds = (data_array['params']['epochsInds']).astype(np.int) dif_var_loss, diff_mean_loss = calc_mean_var_loss(epochsInds, np.squeeze(data_array['loss_train'])) f_log1, (axes_log1) = plt.subplots(1, 1, figsize=fig_size) axes_log1.set_title('The mean and the varince( between the batchs) of the derivative of the train error') axes_log1.plot(epochsInds[1:], np.array(diff_mean_loss), color='green', label = 'Mean of the derivative of the error') axes_log1.plot(epochsInds[:], (dif_var_loss), color='blue', label='Variance of the derivative of the error' ) axes_log1.set_xscale('log') axes_log1.set_yscale('log') axes_log1.set_xlabel('#Epochs') axes_log1.legend() f_log1, (axes_log1) = plt.subplots(1, 1, figsize=fig_size) title = r'The SNR of the error derivatives' p_5, =axes_log1.plot(epochsInds[1:], np.array(diff_mean_loss)/ np.sqrt(dif_var_loss[1:]), linewidth = 3, color='green', ) plt_ut.update_axes(axes_log1, f_log1, '#Epochs', 'SNR',[0, 7000], [0.001, 1], title, 'log', 'log', [1, 10, 100, 1000, 7000], [0.001, 0.01, 0.1, 1]) #axes_log1.plot(epochsInds[:], (dif_var_loss), color='blue', label='Variance of the derivative of the error') axes_log1.legend([r'$\frac{|d Error|}{STD\left(Error)\right)}$'], loc= 'best',fontsize = 21) def create_figs(fig_size = (14, 10)): f_norms, (axes_norms) = plt.subplots(1, 1, figsize=fig_size) f_log, (axes_log) = plt.subplots(1, 1, figsize=fig_size) f_snr, (axes_snr) = plt.subplots(1, 1, figsize=fig_size) f_gaus, (axes_gaus) = plt.subplots(1, 1, figsize=fig_size) f_log.subplots_adjust(left=0.097, bottom=0.11, right=.95, top=0.95, wspace=0.03, hspace=0.03) return f_log, axes_log, f_norms, axes_norms, f_snr, axes_snr,axes_gaus, f_gaus def flatted_graidnet(gradients, epoch_number, layer): gradients_list = [] # For each neuron in the current layer go over all the weights for i in range(len(gradients[epoch_number])): current_list_inner = [] for neuron in range(len(gradients[epoch_number][0][layer])): c_n = gradients[epoch_number][i][layer][neuron] current_list_inner.extend(c_n) gradients_list.append(current_list_inner) gradients_list = np.array(gradients_list) gradients_list =np.reshape(gradients_list, (gradients_list.shape[0], -1)) return gradients_list def calc_weights_norms(ws, num_of_layer = 6): layer_l2_norm = [] for i in range(num_of_layer): flatted_list = [1] """ if type(ws_in[epoch_number][layer_index]) is list: flatted_list = [item for sublist in ws_in[epoch_number][layer_index] for item in sublist] else: flatted_list = ws_in[epoch_number][layer_index] """ layer_l2_norm.append(LA.norm(np.array(flatted_list))) # plot the norms #axes_norms.plot(epochsInds[:], np.array(layer_l2_norm), linewidth=2, color=colors[layer_index]) return layer_l2_norm def extract_array(data, name): results = [[data[j,k][name] for k in range(data.shape[1])] for j in range(data.shape[0])] return results def load_from_memory(data_array): plot_gradients(data_array=data_array) if __name__ == '__main__': directory = './figures/' if not os.path.exists(directory): os.makedirs(directory) root = tk.Tk() root.withdraw() file_path = filedialog.askopenfilename() str_names = [[('/').join(file_path.split('/')[:-1]) + '/']] plot_gradients(str_names, figures_dir=directory)