Spaces:

aagoluoglu
/

AI-Midterm-IDNN

Build error

AI-Midterm-IDNN / idnns /information /mutual_info_estimation.py

Ashley Goluoglu

add files from pantelis/IDNN

96283ff over 1 year ago

7.44 kB

	import numpy as np
	from scipy.optimize import minimize
	import sys
	import tensorflow as tf
	from idnns.networks import model as mo
	import contextlib
	import idnns.information.entropy_estimators as ee

	@contextlib.contextmanager
	def printoptions(args, *kwargs):
	original = np.get_printoptions()
	np.set_printoptions(args, *kwargs)
	try:
	yield
	finally:
	np.set_printoptions(**original)


	def optimiaze_func(s, diff_mat, d, N):
	diff_mat1 = (1. / (np.sqrt(2. * np.pi) * (s 2) (d / 2.))) * np.exp(-diff_mat / (2. * s ** 2))
	np.fill_diagonal(diff_mat1, 0)
	diff_mat2 = (1. / (N - 1)) * np.sum(diff_mat1, axis=0)
	diff_mat3 = np.sum(np.log2(diff_mat2), axis=0)
	return -diff_mat3


	def calc_all_sigams(data, sigmas):
	batchs = 128
	num_of_bins = 8
	# bins = np.linspace(-1, 1, num_of_bins).astype(np.float32)
	# bins = stats.mstats.mquantiles(np.squeeze(data.reshape(1, -1)), np.linspace(0,1, num=num_of_bins))
	# data = bins[np.digitize(np.squeeze(data.reshape(1, -1)), bins) - 1].reshape(len(data), -1)

	batch_points = np.rint(np.arange(0, data.shape[0] + 1, batchs)).astype(dtype=np.int32)
	I_XT = []
	num_of_rand = min(800, data.shape[1])
	for sigma in sigmas:
	# print sigma
	I_XT_temp = 0
	for i in range(0, len(batch_points) - 1):
	new_data = data[batch_points[i]:batch_points[i + 1], :]
	rand_indexs = np.random.randint(0, new_data.shape[1], num_of_rand)
	new_data = new_data[:, :]
	N = new_data.shape[0]
	d = new_data.shape[1]
	diff_mat = np.linalg.norm(((new_data[:, np.newaxis, :] - new_data)), axis=2)
	# print diff_mat.shape, new_data.shape
	s0 = 0.2
	# DOTO -add leaveoneout validation
	res = minimize(optimiaze_func, s0, args=(diff_mat, d, N), method='nelder-mead',
	options={'xtol': 1e-8, 'disp': False, 'maxiter': 6})
	eta = res.x
	diff_mat0 = - 0.5 * (diff_mat / (sigma 2 + eta 2))
	diff_mat1 = np.sum(np.exp(diff_mat0), axis=0)
	diff_mat2 = -(1.0 / N) * np.sum(np.log2((1.0 / N) * diff_mat1))
	I_XT_temp += diff_mat2 - d * np.log2((sigma 2) / (eta 2 + sigma ** 2))
	# print diff_mat2 - dnp.log2((sigma2)/(eta2+sigma*2))
	I_XT_temp /= len(batch_points)
	I_XT.append(I_XT_temp)
	sys.stdout.flush()
	return I_XT


	def estimate_IY_by_network(data, labels, from_layer=0):
	if len(data.shape) > 2:
	input_size = data.shape[1:]
	else:
	input_size = data.shape[1]
	p_y_given_t_i = data
	acc_all = [0]
	if from_layer < 5:

	acc_all = []
	g1 = tf.Graph() ## This is one graph
	with g1.as_default():
	# For each epoch and for each layer we calculate the best decoder - we train a 2 lyaer network
	cov_net = 4
	model = mo.Model(input_size, [400, 100, 50], labels.shape[1], 0.0001, '', cov_net=cov_net,
	from_layer=from_layer)
	if from_layer < 5:
	optimizer = model.optimize
	init = tf.global_variables_initializer()
	num_of_ephocs = 50
	batch_size = 51
	batch_points = np.rint(np.arange(0, data.shape[0] + 1, batch_size)).astype(dtype=np.int32)
	if data.shape[0] not in batch_points:
	batch_points = np.append(batch_points, [data.shape[0]])
	with tf.Session(graph=g1) as sess:
	sess.run(init)
	if from_layer < 5:
	for j in range(0, num_of_ephocs):
	for i in range(0, len(batch_points) - 1):
	batch_xs = data[batch_points[i]:batch_points[i + 1], :]
	batch_ys = labels[batch_points[i]:batch_points[i + 1], :]
	feed_dict = {model.x: batch_xs, model.labels: batch_ys}
	if cov_net == 1:
	feed_dict[model.drouput] = 0.5
	optimizer.run(feed_dict)
	p_y_given_t_i = []
	batch_size = 256
	batch_points = np.rint(np.arange(0, data.shape[0] + 1, batch_size)).astype(dtype=np.int32)
	if data.shape[0] not in batch_points:
	batch_points = np.append(batch_points, [data.shape[0]])
	for i in range(0, len(batch_points) - 1):
	batch_xs = data[batch_points[i]:batch_points[i + 1], :]
	batch_ys = labels[batch_points[i]:batch_points[i + 1], :]
	feed_dict = {model.x: batch_xs, model.labels: batch_ys}
	if cov_net == 1:
	feed_dict[model.drouput] = 1
	p_y_given_t_i_local, acc = sess.run([model.prediction, model.accuracy],
	feed_dict=feed_dict)
	acc_all.append(acc)
	if i == 0:
	p_y_given_t_i = np.array(p_y_given_t_i_local)
	else:
	p_y_given_t_i = np.concatenate((p_y_given_t_i, np.array(p_y_given_t_i_local)), axis=0)
	# print ("The accuracy of layer number - {} - {}".format(from_layer, np.mean(acc_all)))
	max_indx = len(p_y_given_t_i)
	labels_cut = labels[:max_indx, :]
	true_label_index = np.argmax(labels_cut, 1)
	s = np.log2(p_y_given_t_i[np.arange(len(p_y_given_t_i)), true_label_index])
	I_TY = np.mean(s[np.isfinite(s)])
	PYs = np.sum(labels_cut, axis=0) / labels_cut.shape[0]
	Hy = np.nansum(-PYs * np.log2(PYs + np.spacing(1)))
	I_TY = Hy + I_TY
	I_TY = I_TY if I_TY >= 0 else 0
	acc = np.mean(acc_all)
	sys.stdout.flush()
	return I_TY, acc


	def calc_varitional_information(data, labels, model_path, layer_numer, num_of_layers, epoch_index, input_size,
	layerSize, sigma, pys, ks,
	search_sigma=False, estimate_y_by_network=False):
	"""Calculate estimation of the information using vartional IB"""
	# Assumpations
	estimate_y_by_network = True
	# search_sigma = False
	data_x = data.reshape(data.shape[0], -1)

	if search_sigma:
	sigmas = np.linspace(0.2, 10, 20)
	sigmas = [0.2]

	else:
	sigmas = [sigma]
	if False:
	I_XT = calc_all_sigams(data_x, sigmas)
	else:
	I_XT = 0
	if estimate_y_by_network:

	I_TY, acc = estimate_IY_by_network(data, labels, from_layer=layer_numer)
	else:
	I_TY = 0
	with printoptions(precision=3, suppress=True, formatter={'float': '{: 0.3f}'.format}):
	print('[{0}:{1}] - I(X;T) - {2}, I(X;Y) - {3}, accuracy - {4}'.format(epoch_index, layer_numer,
	np.array(I_XT).flatten(), I_TY, acc))
	sys.stdout.flush()

	# I_est = mutual_inform[ation((data, labels[:, 0][:, None]), PYs, k=ks)
	# I_est,I_XT = 0, 0
	params = {}
	# params['DKL_YgX_YgT'] = DKL_YgX_YgT
	# params['pts'] = p_ts
	# params['H_Xgt'] = H_Xgt
	params['local_IXT'] = I_XT
	params['local_ITY'] = I_TY
	return params

	def estimate_Information(Xs, Ys, Ts):
	"""Estimation of the MI from missing data based on k-means clustring"""
	estimate_IXT = ee.mi(Xs, Ts)
	estimate_IYT = ee.mi(Ys, Ts)
	# estimate_IXT1 = ee.mi(Xs, Ts)
	# estimate_IYT1 = ee.mi(Ys, Ts)
	return estimate_IXT, estimate_IYT