Spaces:

aagoluoglu
/

AI-Midterm-IDNN

Build error

AI-Midterm-IDNN / idnns /plots /plot_gradients.py

Ashley Goluoglu

changes to work on my computer

ca696d0 over 1 year ago

11.4 kB

	'Calculate and plot the gradients (the mean and std of the mini-batch gradients) of the trained network'
	import matplotlib
	# matplotlib.use("TkAgg")
	matplotlib.use("Agg")
	import numpy as np
	import idnns.plots.utils as plt_ut
	import matplotlib.pyplot as plt
	import tkinter as tk
	from tkinter import filedialog
	from numpy import linalg as LA
	import os
	import sys
	import statsmodels
	colors = ['red', 'c', 'blue', 'green', 'orange', 'purple']


	def plot_gradients(name_s=None, data_array=None, figures_dir=''):
	"""Plot the gradients and the means of the networks over the batches"""
	if data_array == None:
	data_array= plt_ut.get_data(name_s[0][0])
	#plot_loss_figures(data_array, xlim = [0, 7000] )
	#The gradients - the diemnstions are #epochs X #Batchs # Layers
	conv_net = False
	if conv_net:
	gradients =data_array['var_grad_val'][0][0][0]
	num_of_epochs = len(gradients)
	num_of_batchs = len(gradients[0])
	num_of_layers = len(gradients[0][0]) / 2
	else:
	gradients = np.squeeze(data_array['var_grad_val'])[:, :, :]
	num_of_epochs,num_of_batchs, num_of_layers = gradients.shape
	num_of_layers = int(num_of_layers / 2)
	#The indxes where we sampled the network
	print (np.squeeze(data_array['var_grad_val'])[0,0].shape)
	epochsInds = (data_array['params']['epochsInds']).astype(np.int)
	#The norms of the layers
	#l2_norm = calc_weights_norms(data_array['ws_all'])
	f_log, axes_log, f_norms, axes_norms, f_snr, axes_snr,axes_gaus, f_gaus = create_figs()
	p_1, p_0, sum_y ,p_3, p_4= [], [], [], [], []
	# Go over the layers
	cov_traces_all,means_all = [],[]
	all_gradients = np.empty(num_of_layers, dtype=np.object)
	#print np.squeeze(data_array['var_grad_val']).shape
	for layer in range(0,num_of_layers):
	# The traces of the covarince and the means of the gradients for the current layer
	# Go over all the epochs
	cov_traces, means = [], []
	gradinets_layer = []
	for epoch_index in range(num_of_epochs):
	# the gradients are dimensions of #batchs X # output weights - when output weights is the number of wieghts that go out from the layer
	gradients_current_epoch_and_layer = flatted_graidnet(gradients, epoch_index, 2 * layer)
	gradinets_layer.append(gradients_current_epoch_and_layer)
	num_of_output_weights = gradients_current_epoch_and_layer.shape[1]
	# the average vector over the batchs - this is vector in the size of #output weights
	# We averged over the batchs - It's mean vector of the batchs!
	average_vec = np.mean(gradients_current_epoch_and_layer, axis=0)
	# The sqrt of the sum over all the weights of the squares of the gradinets - Sqrt of AA^T - This is a number
	gradients_mean = LA.norm(average_vec)
	# The covarince matrix is in the size of #output weights X #output weights
	sum_covs_mat = np.zeros((average_vec.shape[0], average_vec.shape[0]))
	# Go over all the vectors of batchs (each vector is the size of # output weights, reduce the mean (over the batchs)
	# and calculate the covariance matrix
	for batch_index in range(num_of_batchs):
	# This is in the size of the #output weights
	current_vec = gradients_current_epoch_and_layer[batch_index, :] - average_vec
	# The outer product of the current gradinet of the weights (in this specipic batch) with the transpose of it -
	# give a matrix in the size of # output weights X # output weights
	current_cov_mat = np.einsum('i,j', current_vec, current_vec)
	#current_cov_mat = np.dot(current_vec[:,None], current_vec[None,:])
	# Sum the covarince matrixes over the batchs
	sum_covs_mat+=current_cov_mat
	#Take the mean of the cov matrix over the batchs - The size is #output weights X # output weights
	mean_cov_mat = sum_covs_mat / num_of_batchs
	#The trace of the mean of the cov matrix - a number
	trac_cov = np.sqrt(np.trace(mean_cov_mat))
	means.append(gradients_mean)
	cov_traces.append(trac_cov)
	"""

	#cov_traces.append(np.mean(grad_norms))
	#means.append(norm_mean)
	c_var,c_mean,total_w = [], [],[]

	for neuron in range(len(grad[epoch_number][0][layer])/10):
	gradients_list = np.array([grad[epoch_number][i][layer][neuron] for i in range(len(grad[epoch_number]))])
	total_w.extend(gradients_list.T)
	grad_norms1 = np.std(gradients_list, axis=0)
	mean_la = np.abs(np.mean(np.array(gradients_list), axis=0))
	#mean_la = LA.norm(gradients_list, axis=0)
	c_var.append(np.mean(grad_norms1))
	c_mean.append(np.mean(mean_la))
	#total_w is in size [num_of_total_weights, num of epochs]
	total_w = np.array(total_w)
	#c_var.append(np.sqrt(np.trace(np.cov(np.array(total_w).T)))/np.cov(np.array(total_w).T).shape[0])
	#print np.mean(c_mean).shape
	means.append(np.mean(c_mean))
	cov_traces.append(np.mean(c_var))
	"""

	gradinets_layer = np.array(gradinets_layer)
	all_gradients[layer]= gradinets_layer
	cov_traces_all.append(np.array(cov_traces))
	means_all.append(np.array(means))
	#The cov_traces and the means are vectors with the dimension of # epochs
	#y_var = np.array(cov_traces)
	#y_mean = np.array(means)
	y_var = np.sum(cov_traces_all, axis=0)
	y_mean = np.sum(means_all, axis=0)
	snr = y_mean**2 / y_var
	#Plot the gradients and the means
	c_p1, = axes_log.plot(epochsInds[:], np.sqrt(y_var),markersize = 4, linewidth = 4,color = colors[layer], linestyle=':', markeredgewidth=0.2, dashes = [4,4])
	c_p0,= axes_log.plot(epochsInds[:], y_mean, linewidth = 2,color = colors[layer])
	c_p3,= axes_snr.plot(epochsInds[:],snr, linewidth = 2,color = colors[layer])
	c_p4,= axes_gaus.plot(epochsInds[:],np.log(1+snr), linewidth = 2,color = colors[layer])
	#For the legend
	p_0.append(c_p0), p_1.append(c_p1),sum_y.append(y_mean) , p_3.append(c_p3), p_4.append(c_p4)
	plt_ut.adjust_axes(axes_log, axes_norms, p_0, p_1, f_log, f_norms, axes_snr, f_snr, p_3, axes_gaus, f_gaus, p_4,
	directory_name=figures_dir)
	plt.show()


	def calc_mean_var_loss(epochsInds,loss_train):
	#Loss train is in dimension # epochs X #batchs
	num_of_epochs = loss_train.shape[0]
	#Average over the batchs
	loss_train_mean = np.mean(loss_train,1)
	#The diff divided by the sampled indexes
	d_mean_loss_to_dt = np.sqrt(np.abs(np.diff(loss_train_mean) / np.diff(epochsInds[:])))
	var_loss = []
	#Go over the epochs
	for epoch_index in range(num_of_epochs):
	#The loss for the specpic epoch
	current_loss = loss_train[epoch_index, :]
	#The derivative between the batchs
	current_loss_dt = np.diff(current_loss)
	#The mean of his derivative
	average_loss = np.mean(current_loss_dt)
	current_loss_minus_mean = current_loss_dt- average_loss
	#The covarince between the batchs
	cov_mat = np.dot(current_loss_minus_mean[:, None], current_loss_minus_mean[None, :])
	# The trace of the cov matrix
	trac_cov = np.trace(cov_mat)
	var_loss.append(trac_cov)
	return np.array(var_loss), d_mean_loss_to_dt

	def plot_loss_figures(data_array, fig_size=(14, 10), xlim = None, y_lim = None):
	epochsInds = (data_array['params']['epochsInds']).astype(np.int)
	dif_var_loss, diff_mean_loss = calc_mean_var_loss(epochsInds, np.squeeze(data_array['loss_train']))
	f_log1, (axes_log1) = plt.subplots(1, 1, figsize=fig_size)
	axes_log1.set_title('The mean and the varince( between the batchs) of the derivative of the train error')
	axes_log1.plot(epochsInds[1:], np.array(diff_mean_loss), color='green', label = 'Mean of the derivative of the error')
	axes_log1.plot(epochsInds[:], (dif_var_loss), color='blue', label='Variance of the derivative of the error' )
	axes_log1.set_xscale('log')
	axes_log1.set_yscale('log')
	axes_log1.set_xlabel('#Epochs')
	axes_log1.legend()

	f_log1, (axes_log1) = plt.subplots(1, 1, figsize=fig_size)
	title = r'The SNR of the error derivatives'

	p_5, =axes_log1.plot(epochsInds[1:], np.array(diff_mean_loss)/ np.sqrt(dif_var_loss[1:]), linewidth = 3, color='green',
	)
	plt_ut.update_axes(axes_log1, f_log1, '#Epochs', 'SNR',[0, 7000], [0.001, 1], title, 'log', 'log',
	[1, 10, 100, 1000, 7000], [0.001, 0.01, 0.1, 1])
	#axes_log1.plot(epochsInds[:], (dif_var_loss), color='blue', label='Variance of the derivative of the error')
	axes_log1.legend([r'$\frac{\|d Error\|}{STD\left(Error)\right)}$'], loc= 'best',fontsize = 21)

	def create_figs(fig_size = (14, 10)):
	f_norms, (axes_norms) = plt.subplots(1, 1, figsize=fig_size)
	f_log, (axes_log) = plt.subplots(1, 1, figsize=fig_size)
	f_snr, (axes_snr) = plt.subplots(1, 1, figsize=fig_size)
	f_gaus, (axes_gaus) = plt.subplots(1, 1, figsize=fig_size)
	f_log.subplots_adjust(left=0.097, bottom=0.11, right=.95, top=0.95, wspace=0.03, hspace=0.03)
	return f_log, axes_log, f_norms, axes_norms, f_snr, axes_snr,axes_gaus, f_gaus


	def flatted_graidnet(gradients, epoch_number, layer):
	gradients_list = []
	# For each neuron in the current layer go over all the weights
	for i in range(len(gradients[epoch_number])):
	current_list_inner = []
	for neuron in range(len(gradients[epoch_number][0][layer])):
	c_n = gradients[epoch_number][i][layer][neuron]
	current_list_inner.extend(c_n)
	gradients_list.append(current_list_inner)
	gradients_list = np.array(gradients_list)
	gradients_list =np.reshape(gradients_list, (gradients_list.shape[0], -1))

	return gradients_list


	def calc_weights_norms(ws, num_of_layer = 6):
	layer_l2_norm = []
	for i in range(num_of_layer):
	flatted_list = [1]
	"""
	if type(ws_in[epoch_number][layer_index]) is list:
	flatted_list = [item for sublist in ws_in[epoch_number][layer_index] for item in sublist]
	else:
	flatted_list = ws_in[epoch_number][layer_index]
	"""
	layer_l2_norm.append(LA.norm(np.array(flatted_list)))
	# plot the norms
	#axes_norms.plot(epochsInds[:], np.array(layer_l2_norm), linewidth=2, color=colors[layer_index])
	return layer_l2_norm

	def extract_array(data, name):
	results = [[data[j,k][name] for k in range(data.shape[1])] for j in range(data.shape[0])]
	return results


	def load_from_memory(data_array):
	plot_gradients(data_array=data_array)


	if __name__ == '__main__':
	directory = './figures/'
	if not os.path.exists(directory):
	os.makedirs(directory)
	root = tk.Tk()
	root.withdraw()
	file_path = filedialog.askopenfilename()
	str_names = [[('/').join(file_path.split('/')[:-1]) + '/']]
	plot_gradients(str_names, figures_dir=directory)