''' Calculate the information in the network Can be by the full distribution rule (for small netowrk) or bt diffrenet approximation method ''' import multiprocessing import warnings import numpy as np import tensorflow as tf import idnns.information.information_utilities as inf_ut from idnns.networks import model as mo from idnns.information.mutual_info_estimation import calc_varitional_information warnings.filterwarnings("ignore") from joblib import Parallel, delayed NUM_CORES = multiprocessing.cpu_count() from idnns.information.mutual_information_calculation import * import numpy as np def calc_information_for_layer(data, bins, unique_inverse_x, unique_inverse_y, pxs, pys1): bins = bins.astype(np.float32) digitized = bins[np.digitize(np.squeeze(data.reshape(1, -1)), bins) - 1].reshape(len(data), -1) b2 = np.ascontiguousarray(digitized).view( np.dtype((np.void, digitized.dtype.itemsize * digitized.shape[1]))) unique_array, unique_inverse_t, unique_counts = \ np.unique(b2, return_index=False, return_inverse=True, return_counts=True) p_ts = unique_counts / float(sum(unique_counts)) PXs, PYs = np.asarray(pxs).T, np.asarray(pys1).T local_IXT, local_ITY = calc_information_from_mat(PXs, PYs, p_ts, digitized, unique_inverse_x, unique_inverse_y, unique_array) return local_IXT, local_ITY def calc_information_sampling(data, bins, pys1, pxs, label, b, b1, len_unique_a, p_YgX, unique_inverse_x, unique_inverse_y, calc_DKL=False): bins = bins.astype(np.float32) num_of_bins = bins.shape[0] # bins = stats.mstats.mquantiles(np.squeeze(data.reshape(1, -1)), np.linspace(0,1, num=num_of_bins)) # hist, bin_edges = np.histogram(np.squeeze(data.reshape(1, -1)), normed=True) digitized = bins[np.digitize(np.squeeze(data.reshape(1, -1)), bins) - 1].reshape(len(data), -1) b2 = np.ascontiguousarray(digitized).view( np.dtype((np.void, digitized.dtype.itemsize * digitized.shape[1]))) unique_array, unique_inverse_t, unique_counts = \ np.unique(b2, return_index=False, return_inverse=True, return_counts=True) p_ts = unique_counts / float(sum(unique_counts)) PXs, PYs = np.asarray(pxs).T, np.asarray(pys1).T if calc_DKL: pxy_given_T = np.array( [calc_probs(i, unique_inverse_t, label, b, b1, len_unique_a) for i in range(0, len(unique_array))] ) p_XgT = np.vstack(pxy_given_T[:, 0]) p_YgT = pxy_given_T[:, 1] p_YgT = np.vstack(p_YgT).T DKL_YgX_YgT = np.sum([inf_ut.KL(c_p_YgX, p_YgT.T) for c_p_YgX in p_YgX.T], axis=0) H_Xgt = np.nansum(p_XgT * np.log2(p_XgT), axis=1) local_IXT, local_ITY = calc_information_from_mat(PXs, PYs, p_ts, digitized, unique_inverse_x, unique_inverse_y, unique_array) return local_IXT, local_ITY def calc_information_for_layer_with_other(data, bins, unique_inverse_x, unique_inverse_y, label, b, b1, len_unique_a, pxs, p_YgX, pys1, percent_of_sampling=50): local_IXT, local_ITY = calc_information_sampling(data, bins, pys1, pxs, label, b, b1, len_unique_a, p_YgX, unique_inverse_x, unique_inverse_y) number_of_indexs = int(data.shape[1] * (1. / 100 * percent_of_sampling)) indexs_of_sampls = np.random.choice(data.shape[1], number_of_indexs, replace=False) if percent_of_sampling != 100: sampled_data = data[:, indexs_of_sampls] sampled_local_IXT, sampled_local_ITY = calc_information_sampling( sampled_data, bins, pys1, pxs, label, b, b1, len_unique_a, p_YgX, unique_inverse_x, unique_inverse_y) params = {} params['local_IXT'] = local_IXT params['local_ITY'] = local_ITY return params def calc_by_sampling_neurons(ws_iter_index, num_of_samples, label, sigma, bins, pxs): iter_infomration = [] for j in range(len(ws_iter_index)): data = ws_iter_index[j] new_data = np.zeros((num_of_samples * data.shape[0], data.shape[1])) labels = np.zeros((num_of_samples * label.shape[0], label.shape[1])) x = np.zeros((num_of_samples * data.shape[0], 2)) for i in range(data.shape[0]): cov_matrix = np.eye(data[i, :].shape[0]) * sigma t_i = np.random.multivariate_normal(data[i, :], cov_matrix, num_of_samples) new_data[num_of_samples * i:(num_of_samples * (i + 1)), :] = t_i labels[num_of_samples * i:(num_of_samples * (i + 1)), :] = label[i, :] x[num_of_samples * i:(num_of_samples * (i + 1)), 0] = i b = np.ascontiguousarray(x).view(np.dtype((np.void, x.dtype.itemsize * x.shape[1]))) unique_array, unique_indices, unique_inverse_x, unique_counts = \ np.unique(b, return_index=True, return_inverse=True, return_counts=True) b_y = np.ascontiguousarray(labels).view(np.dtype((np.void, labels.dtype.itemsize * labels.shape[1]))) unique_array_y, unique_indices_y, unique_inverse_y, unique_counts_y = \ np.unique(b_y, return_index=True, return_inverse=True, return_counts=True) pys1 = unique_counts_y / float(np.sum(unique_counts_y)) iter_infomration.append( calc_information_for_layer(data=new_data, bins=bins, unique_inverse_x=unique_inverse_x, unique_inverse_y=unique_inverse_y, pxs=pxs, pys1=pys1)) params = np.array(iter_infomration) return params def calc_information_for_epoch(iter_index, interval_information_display, ws_iter_index, bins, unique_inverse_x, unique_inverse_y, label, b, b1, len_unique_a, pys, pxs, py_x, pys1, model_path, input_size, layerSize, calc_vartional_information=False, calc_information_by_sampling=False, calc_full_and_vartional=False, calc_regular_information=True, num_of_samples=100, sigma=0.5, ss=[], ks=[]): """Calculate the information for all the layers for specific epoch""" np.random.seed(None) if calc_full_and_vartional: # Vartional information params_vartional = [ calc_varitional_information(ws_iter_index[i], label, model_path, i, len(ws_iter_index) - 1, iter_index, input_size, layerSize, ss[i], pys, ks[i], search_sigma=False) for i in range(len(ws_iter_index))] # Full plug-in infomration params_original = np.array( [calc_information_for_layer_with_other(data=ws_iter_index[i], bins=bins, unique_inverse_x=unique_inverse_x, unique_inverse_y=unique_inverse_y, label=label, b=b, b1=b1, len_unique_a=len_unique_a, pxs=pxs, p_YgX=py_x, pys1=pys1) for i in range(len(ws_iter_index))]) # Combine them params = [] for i in range(len(ws_iter_index)): current_params = params_original[i] current_params_vartional = params_vartional[i] current_params['IXT_vartional'] = current_params_vartional['local_IXT'] current_params['ITY_vartional'] = current_params_vartional['local_ITY'] params.append(current_params) elif calc_vartional_information: params = [ calc_varitional_information(ws_iter_index[i], label, model_path, i, len(ws_iter_index) - 1, iter_index, input_size, layerSize, ss[i], pys, ks[i], search_sigma=True) for i in range(len(ws_iter_index))] # Calc infomration of only subset of the neurons elif calc_information_by_sampling: parmas = calc_by_sampling_neurons(ws_iter_index=ws_iter_index, num_of_samples=num_of_samples, label=label, sigma=sigma, bins=bins, pxs=pxs) elif calc_regular_information: params = np.array( [calc_information_for_layer_with_other(data=ws_iter_index[i], bins=bins, unique_inverse_x=unique_inverse_x, unique_inverse_y=unique_inverse_y, label=label, b=b, b1=b1, len_unique_a=len_unique_a, pxs=pxs, p_YgX=py_x, pys1=pys1) for i in range(len(ws_iter_index))]) if np.mod(iter_index, interval_information_display) == 0: print('Calculated The information of epoch number - {0}'.format(iter_index)) return params def extract_probs(label, x): """calculate the probabilities of the given data and labels p(x), p(y) and (y|x)""" pys = np.sum(label, axis=0) / float(label.shape[0]) b = np.ascontiguousarray(x).view(np.dtype((np.void, x.dtype.itemsize * x.shape[1]))) unique_array, unique_indices, unique_inverse_x, unique_counts = \ np.unique(b, return_index=True, return_inverse=True, return_counts=True) unique_a = x[unique_indices] b1 = np.ascontiguousarray(unique_a).view(np.dtype((np.void, unique_a.dtype.itemsize * unique_a.shape[1]))) pxs = unique_counts / float(np.sum(unique_counts)) p_y_given_x = [] for i in range(0, len(unique_array)): indexs = unique_inverse_x == i py_x_current = np.mean(label[indexs, :], axis=0) p_y_given_x.append(py_x_current) p_y_given_x = np.array(p_y_given_x).T b_y = np.ascontiguousarray(label).view(np.dtype((np.void, label.dtype.itemsize * label.shape[1]))) unique_array_y, unique_indices_y, unique_inverse_y, unique_counts_y = \ np.unique(b_y, return_index=True, return_inverse=True, return_counts=True) pys1 = unique_counts_y / float(np.sum(unique_counts_y)) return pys, pys1, p_y_given_x, b1, b, unique_a, unique_inverse_x, unique_inverse_y, pxs def get_information(ws, x, label, num_of_bins, interval_information_display, model, layerSize, calc_parallel=True, py_hats=0): """Calculate the information for the network for all the epochs and all the layers""" print('Start calculating the information...') bins = np.linspace(-1, 1, num_of_bins) label = np.array(label).astype(float) pys, pys1, p_y_given_x, b1, b, unique_a, unique_inverse_x, unique_inverse_y, pxs = extract_probs(label, x) if calc_parallel: params = np.array(Parallel(n_jobs=NUM_CORES )(delayed(calc_information_for_epoch) (i, interval_information_display, ws[i], bins, unique_inverse_x, unique_inverse_y, label, b, b1, len(unique_a), pys, pxs, p_y_given_x, pys1, model.save_file, x.shape[1], layerSize) for i in range(len(ws)))) else: params = np.array([calc_information_for_epoch (i, interval_information_display, ws[i], bins, unique_inverse_x, unique_inverse_y, label, b, b1, len(unique_a), pys, pxs, p_y_given_x, pys1, model.save_file, x.shape[1], layerSize) for i in range(len(ws))]) return params