AI-Midterm-IDNN / idnns /information /information_process.py
Ashley Goluoglu
add files from pantelis/IDNN
96283ff
raw
history blame
10.8 kB
'''
Calculate the information in the network
Can be by the full distribution rule (for small netowrk) or bt diffrenet approximation method
'''
import multiprocessing
import warnings
import numpy as np
import tensorflow as tf
import idnns.information.information_utilities as inf_ut
from idnns.networks import model as mo
from idnns.information.mutual_info_estimation import calc_varitional_information
warnings.filterwarnings("ignore")
from joblib import Parallel, delayed
NUM_CORES = multiprocessing.cpu_count()
from idnns.information.mutual_information_calculation import *
import numpy as np
def calc_information_for_layer(data, bins, unique_inverse_x, unique_inverse_y, pxs, pys1):
bins = bins.astype(np.float32)
digitized = bins[np.digitize(np.squeeze(data.reshape(1, -1)), bins) - 1].reshape(len(data), -1)
b2 = np.ascontiguousarray(digitized).view(
np.dtype((np.void, digitized.dtype.itemsize * digitized.shape[1])))
unique_array, unique_inverse_t, unique_counts = \
np.unique(b2, return_index=False, return_inverse=True, return_counts=True)
p_ts = unique_counts / float(sum(unique_counts))
PXs, PYs = np.asarray(pxs).T, np.asarray(pys1).T
local_IXT, local_ITY = calc_information_from_mat(PXs, PYs, p_ts, digitized, unique_inverse_x, unique_inverse_y,
unique_array)
return local_IXT, local_ITY
def calc_information_sampling(data, bins, pys1, pxs, label, b, b1, len_unique_a, p_YgX, unique_inverse_x,
unique_inverse_y, calc_DKL=False):
bins = bins.astype(np.float32)
num_of_bins = bins.shape[0]
# bins = stats.mstats.mquantiles(np.squeeze(data.reshape(1, -1)), np.linspace(0,1, num=num_of_bins))
# hist, bin_edges = np.histogram(np.squeeze(data.reshape(1, -1)), normed=True)
digitized = bins[np.digitize(np.squeeze(data.reshape(1, -1)), bins) - 1].reshape(len(data), -1)
b2 = np.ascontiguousarray(digitized).view(
np.dtype((np.void, digitized.dtype.itemsize * digitized.shape[1])))
unique_array, unique_inverse_t, unique_counts = \
np.unique(b2, return_index=False, return_inverse=True, return_counts=True)
p_ts = unique_counts / float(sum(unique_counts))
PXs, PYs = np.asarray(pxs).T, np.asarray(pys1).T
if calc_DKL:
pxy_given_T = np.array(
[calc_probs(i, unique_inverse_t, label, b, b1, len_unique_a) for i in range(0, len(unique_array))]
)
p_XgT = np.vstack(pxy_given_T[:, 0])
p_YgT = pxy_given_T[:, 1]
p_YgT = np.vstack(p_YgT).T
DKL_YgX_YgT = np.sum([inf_ut.KL(c_p_YgX, p_YgT.T) for c_p_YgX in p_YgX.T], axis=0)
H_Xgt = np.nansum(p_XgT * np.log2(p_XgT), axis=1)
local_IXT, local_ITY = calc_information_from_mat(PXs, PYs, p_ts, digitized, unique_inverse_x, unique_inverse_y,
unique_array)
return local_IXT, local_ITY
def calc_information_for_layer_with_other(data, bins, unique_inverse_x, unique_inverse_y, label,
b, b1, len_unique_a, pxs, p_YgX, pys1,
percent_of_sampling=50):
local_IXT, local_ITY = calc_information_sampling(data, bins, pys1, pxs, label, b, b1,
len_unique_a, p_YgX, unique_inverse_x,
unique_inverse_y)
number_of_indexs = int(data.shape[1] * (1. / 100 * percent_of_sampling))
indexs_of_sampls = np.random.choice(data.shape[1], number_of_indexs, replace=False)
if percent_of_sampling != 100:
sampled_data = data[:, indexs_of_sampls]
sampled_local_IXT, sampled_local_ITY = calc_information_sampling(
sampled_data, bins, pys1, pxs, label, b, b1, len_unique_a, p_YgX, unique_inverse_x, unique_inverse_y)
params = {}
params['local_IXT'] = local_IXT
params['local_ITY'] = local_ITY
return params
def calc_by_sampling_neurons(ws_iter_index, num_of_samples, label, sigma, bins, pxs):
iter_infomration = []
for j in range(len(ws_iter_index)):
data = ws_iter_index[j]
new_data = np.zeros((num_of_samples * data.shape[0], data.shape[1]))
labels = np.zeros((num_of_samples * label.shape[0], label.shape[1]))
x = np.zeros((num_of_samples * data.shape[0], 2))
for i in range(data.shape[0]):
cov_matrix = np.eye(data[i, :].shape[0]) * sigma
t_i = np.random.multivariate_normal(data[i, :], cov_matrix, num_of_samples)
new_data[num_of_samples * i:(num_of_samples * (i + 1)), :] = t_i
labels[num_of_samples * i:(num_of_samples * (i + 1)), :] = label[i, :]
x[num_of_samples * i:(num_of_samples * (i + 1)), 0] = i
b = np.ascontiguousarray(x).view(np.dtype((np.void, x.dtype.itemsize * x.shape[1])))
unique_array, unique_indices, unique_inverse_x, unique_counts = \
np.unique(b, return_index=True, return_inverse=True, return_counts=True)
b_y = np.ascontiguousarray(labels).view(np.dtype((np.void, labels.dtype.itemsize * labels.shape[1])))
unique_array_y, unique_indices_y, unique_inverse_y, unique_counts_y = \
np.unique(b_y, return_index=True, return_inverse=True, return_counts=True)
pys1 = unique_counts_y / float(np.sum(unique_counts_y))
iter_infomration.append(
calc_information_for_layer(data=new_data, bins=bins, unique_inverse_x=unique_inverse_x,
unique_inverse_y=unique_inverse_y, pxs=pxs, pys1=pys1))
params = np.array(iter_infomration)
return params
def calc_information_for_epoch(iter_index, interval_information_display, ws_iter_index, bins, unique_inverse_x,
unique_inverse_y, label, b, b1,
len_unique_a, pys, pxs, py_x, pys1, model_path, input_size, layerSize,
calc_vartional_information=False, calc_information_by_sampling=False,
calc_full_and_vartional=False, calc_regular_information=True, num_of_samples=100,
sigma=0.5, ss=[], ks=[]):
"""Calculate the information for all the layers for specific epoch"""
np.random.seed(None)
if calc_full_and_vartional:
# Vartional information
params_vartional = [
calc_varitional_information(ws_iter_index[i], label, model_path, i, len(ws_iter_index) - 1, iter_index,
input_size, layerSize, ss[i], pys, ks[i], search_sigma=False) for i in
range(len(ws_iter_index))]
# Full plug-in infomration
params_original = np.array(
[calc_information_for_layer_with_other(data=ws_iter_index[i], bins=bins, unique_inverse_x=unique_inverse_x,
unique_inverse_y=unique_inverse_y, label=label,
b=b, b1=b1, len_unique_a=len_unique_a, pxs=pxs,
p_YgX=py_x, pys1=pys1)
for i in range(len(ws_iter_index))])
# Combine them
params = []
for i in range(len(ws_iter_index)):
current_params = params_original[i]
current_params_vartional = params_vartional[i]
current_params['IXT_vartional'] = current_params_vartional['local_IXT']
current_params['ITY_vartional'] = current_params_vartional['local_ITY']
params.append(current_params)
elif calc_vartional_information:
params = [
calc_varitional_information(ws_iter_index[i], label, model_path, i, len(ws_iter_index) - 1, iter_index,
input_size, layerSize, ss[i], pys, ks[i], search_sigma=True) for i in
range(len(ws_iter_index))]
# Calc infomration of only subset of the neurons
elif calc_information_by_sampling:
parmas = calc_by_sampling_neurons(ws_iter_index=ws_iter_index, num_of_samples=num_of_samples, label=label,
sigma=sigma, bins=bins, pxs=pxs)
elif calc_regular_information:
params = np.array(
[calc_information_for_layer_with_other(data=ws_iter_index[i], bins=bins, unique_inverse_x=unique_inverse_x,
unique_inverse_y=unique_inverse_y, label=label,
b=b, b1=b1, len_unique_a=len_unique_a, pxs=pxs,
p_YgX=py_x, pys1=pys1)
for i in range(len(ws_iter_index))])
if np.mod(iter_index, interval_information_display) == 0:
print('Calculated The information of epoch number - {0}'.format(iter_index))
return params
def extract_probs(label, x):
"""calculate the probabilities of the given data and labels p(x), p(y) and (y|x)"""
pys = np.sum(label, axis=0) / float(label.shape[0])
b = np.ascontiguousarray(x).view(np.dtype((np.void, x.dtype.itemsize * x.shape[1])))
unique_array, unique_indices, unique_inverse_x, unique_counts = \
np.unique(b, return_index=True, return_inverse=True, return_counts=True)
unique_a = x[unique_indices]
b1 = np.ascontiguousarray(unique_a).view(np.dtype((np.void, unique_a.dtype.itemsize * unique_a.shape[1])))
pxs = unique_counts / float(np.sum(unique_counts))
p_y_given_x = []
for i in range(0, len(unique_array)):
indexs = unique_inverse_x == i
py_x_current = np.mean(label[indexs, :], axis=0)
p_y_given_x.append(py_x_current)
p_y_given_x = np.array(p_y_given_x).T
b_y = np.ascontiguousarray(label).view(np.dtype((np.void, label.dtype.itemsize * label.shape[1])))
unique_array_y, unique_indices_y, unique_inverse_y, unique_counts_y = \
np.unique(b_y, return_index=True, return_inverse=True, return_counts=True)
pys1 = unique_counts_y / float(np.sum(unique_counts_y))
return pys, pys1, p_y_given_x, b1, b, unique_a, unique_inverse_x, unique_inverse_y, pxs
def get_information(ws, x, label, num_of_bins, interval_information_display, model, layerSize,
calc_parallel=True, py_hats=0):
"""Calculate the information for the network for all the epochs and all the layers"""
print('Start calculating the information...')
bins = np.linspace(-1, 1, num_of_bins)
label = np.array(label).astype(float)
pys, pys1, p_y_given_x, b1, b, unique_a, unique_inverse_x, unique_inverse_y, pxs = extract_probs(label, x)
if calc_parallel:
params = np.array(Parallel(n_jobs=NUM_CORES
)(delayed(calc_information_for_epoch)
(i, interval_information_display, ws[i], bins, unique_inverse_x, unique_inverse_y,
label,
b, b1, len(unique_a), pys,
pxs, p_y_given_x, pys1, model.save_file, x.shape[1], layerSize)
for i in range(len(ws))))
else:
params = np.array([calc_information_for_epoch
(i, interval_information_display, ws[i], bins, unique_inverse_x, unique_inverse_y,
label, b, b1, len(unique_a), pys,
pxs, p_y_given_x, pys1, model.save_file, x.shape[1], layerSize)
for i in range(len(ws))])
return params