Spaces:
Build error
Build error
import numpy as np | |
from scipy.optimize import minimize | |
import sys | |
import tensorflow as tf | |
from idnns.networks import model as mo | |
import contextlib | |
import idnns.information.entropy_estimators as ee | |
def printoptions(*args, **kwargs): | |
original = np.get_printoptions() | |
np.set_printoptions(*args, **kwargs) | |
try: | |
yield | |
finally: | |
np.set_printoptions(**original) | |
def optimiaze_func(s, diff_mat, d, N): | |
diff_mat1 = (1. / (np.sqrt(2. * np.pi) * (s ** 2) ** (d / 2.))) * np.exp(-diff_mat / (2. * s ** 2)) | |
np.fill_diagonal(diff_mat1, 0) | |
diff_mat2 = (1. / (N - 1)) * np.sum(diff_mat1, axis=0) | |
diff_mat3 = np.sum(np.log2(diff_mat2), axis=0) | |
return -diff_mat3 | |
def calc_all_sigams(data, sigmas): | |
batchs = 128 | |
num_of_bins = 8 | |
# bins = np.linspace(-1, 1, num_of_bins).astype(np.float32) | |
# bins = stats.mstats.mquantiles(np.squeeze(data.reshape(1, -1)), np.linspace(0,1, num=num_of_bins)) | |
# data = bins[np.digitize(np.squeeze(data.reshape(1, -1)), bins) - 1].reshape(len(data), -1) | |
batch_points = np.rint(np.arange(0, data.shape[0] + 1, batchs)).astype(dtype=np.int32) | |
I_XT = [] | |
num_of_rand = min(800, data.shape[1]) | |
for sigma in sigmas: | |
# print sigma | |
I_XT_temp = 0 | |
for i in range(0, len(batch_points) - 1): | |
new_data = data[batch_points[i]:batch_points[i + 1], :] | |
rand_indexs = np.random.randint(0, new_data.shape[1], num_of_rand) | |
new_data = new_data[:, :] | |
N = new_data.shape[0] | |
d = new_data.shape[1] | |
diff_mat = np.linalg.norm(((new_data[:, np.newaxis, :] - new_data)), axis=2) | |
# print diff_mat.shape, new_data.shape | |
s0 = 0.2 | |
# DOTO -add leaveoneout validation | |
res = minimize(optimiaze_func, s0, args=(diff_mat, d, N), method='nelder-mead', | |
options={'xtol': 1e-8, 'disp': False, 'maxiter': 6}) | |
eta = res.x | |
diff_mat0 = - 0.5 * (diff_mat / (sigma ** 2 + eta ** 2)) | |
diff_mat1 = np.sum(np.exp(diff_mat0), axis=0) | |
diff_mat2 = -(1.0 / N) * np.sum(np.log2((1.0 / N) * diff_mat1)) | |
I_XT_temp += diff_mat2 - d * np.log2((sigma ** 2) / (eta ** 2 + sigma ** 2)) | |
# print diff_mat2 - d*np.log2((sigma**2)/(eta**2+sigma**2)) | |
I_XT_temp /= len(batch_points) | |
I_XT.append(I_XT_temp) | |
sys.stdout.flush() | |
return I_XT | |
def estimate_IY_by_network(data, labels, from_layer=0): | |
if len(data.shape) > 2: | |
input_size = data.shape[1:] | |
else: | |
input_size = data.shape[1] | |
p_y_given_t_i = data | |
acc_all = [0] | |
if from_layer < 5: | |
acc_all = [] | |
g1 = tf.Graph() ## This is one graph | |
with g1.as_default(): | |
# For each epoch and for each layer we calculate the best decoder - we train a 2 lyaer network | |
cov_net = 4 | |
model = mo.Model(input_size, [400, 100, 50], labels.shape[1], 0.0001, '', cov_net=cov_net, | |
from_layer=from_layer) | |
if from_layer < 5: | |
optimizer = model.optimize | |
init = tf.global_variables_initializer() | |
num_of_ephocs = 50 | |
batch_size = 51 | |
batch_points = np.rint(np.arange(0, data.shape[0] + 1, batch_size)).astype(dtype=np.int32) | |
if data.shape[0] not in batch_points: | |
batch_points = np.append(batch_points, [data.shape[0]]) | |
with tf.Session(graph=g1) as sess: | |
sess.run(init) | |
if from_layer < 5: | |
for j in range(0, num_of_ephocs): | |
for i in range(0, len(batch_points) - 1): | |
batch_xs = data[batch_points[i]:batch_points[i + 1], :] | |
batch_ys = labels[batch_points[i]:batch_points[i + 1], :] | |
feed_dict = {model.x: batch_xs, model.labels: batch_ys} | |
if cov_net == 1: | |
feed_dict[model.drouput] = 0.5 | |
optimizer.run(feed_dict) | |
p_y_given_t_i = [] | |
batch_size = 256 | |
batch_points = np.rint(np.arange(0, data.shape[0] + 1, batch_size)).astype(dtype=np.int32) | |
if data.shape[0] not in batch_points: | |
batch_points = np.append(batch_points, [data.shape[0]]) | |
for i in range(0, len(batch_points) - 1): | |
batch_xs = data[batch_points[i]:batch_points[i + 1], :] | |
batch_ys = labels[batch_points[i]:batch_points[i + 1], :] | |
feed_dict = {model.x: batch_xs, model.labels: batch_ys} | |
if cov_net == 1: | |
feed_dict[model.drouput] = 1 | |
p_y_given_t_i_local, acc = sess.run([model.prediction, model.accuracy], | |
feed_dict=feed_dict) | |
acc_all.append(acc) | |
if i == 0: | |
p_y_given_t_i = np.array(p_y_given_t_i_local) | |
else: | |
p_y_given_t_i = np.concatenate((p_y_given_t_i, np.array(p_y_given_t_i_local)), axis=0) | |
# print ("The accuracy of layer number - {} - {}".format(from_layer, np.mean(acc_all))) | |
max_indx = len(p_y_given_t_i) | |
labels_cut = labels[:max_indx, :] | |
true_label_index = np.argmax(labels_cut, 1) | |
s = np.log2(p_y_given_t_i[np.arange(len(p_y_given_t_i)), true_label_index]) | |
I_TY = np.mean(s[np.isfinite(s)]) | |
PYs = np.sum(labels_cut, axis=0) / labels_cut.shape[0] | |
Hy = np.nansum(-PYs * np.log2(PYs + np.spacing(1))) | |
I_TY = Hy + I_TY | |
I_TY = I_TY if I_TY >= 0 else 0 | |
acc = np.mean(acc_all) | |
sys.stdout.flush() | |
return I_TY, acc | |
def calc_varitional_information(data, labels, model_path, layer_numer, num_of_layers, epoch_index, input_size, | |
layerSize, sigma, pys, ks, | |
search_sigma=False, estimate_y_by_network=False): | |
"""Calculate estimation of the information using vartional IB""" | |
# Assumpations | |
estimate_y_by_network = True | |
# search_sigma = False | |
data_x = data.reshape(data.shape[0], -1) | |
if search_sigma: | |
sigmas = np.linspace(0.2, 10, 20) | |
sigmas = [0.2] | |
else: | |
sigmas = [sigma] | |
if False: | |
I_XT = calc_all_sigams(data_x, sigmas) | |
else: | |
I_XT = 0 | |
if estimate_y_by_network: | |
I_TY, acc = estimate_IY_by_network(data, labels, from_layer=layer_numer) | |
else: | |
I_TY = 0 | |
with printoptions(precision=3, suppress=True, formatter={'float': '{: 0.3f}'.format}): | |
print('[{0}:{1}] - I(X;T) - {2}, I(X;Y) - {3}, accuracy - {4}'.format(epoch_index, layer_numer, | |
np.array(I_XT).flatten(), I_TY, acc)) | |
sys.stdout.flush() | |
# I_est = mutual_inform[ation((data, labels[:, 0][:, None]), PYs, k=ks) | |
# I_est,I_XT = 0, 0 | |
params = {} | |
# params['DKL_YgX_YgT'] = DKL_YgX_YgT | |
# params['pts'] = p_ts | |
# params['H_Xgt'] = H_Xgt | |
params['local_IXT'] = I_XT | |
params['local_ITY'] = I_TY | |
return params | |
def estimate_Information(Xs, Ys, Ts): | |
"""Estimation of the MI from missing data based on k-means clustring""" | |
estimate_IXT = ee.mi(Xs, Ts) | |
estimate_IYT = ee.mi(Ys, Ts) | |
# estimate_IXT1 = ee.mi(Xs, Ts) | |
# estimate_IYT1 = ee.mi(Ys, Ts) | |
return estimate_IXT, estimate_IYT | |