File size: 8,517 Bytes
96283ff
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
import _pickle as cPickle
import multiprocessing
import os
import sys
import numpy as np
from joblib import Parallel, delayed
import idnns.networks.network as nn
from idnns.information import information_process  as inn
from idnns.plots import plot_figures as plt_fig
from idnns.networks import network_paramters as netp
from idnns.networks.utils import load_data

# from idnns.network import utils
# import idnns.plots.plot_gradients as plt_grads
NUM_CORES = multiprocessing.cpu_count()


class informationNetwork():
	"""A class that store the network, train it and calc it's information (can be several of networks) """

	def __init__(self, rand_int=0, num_of_samples=None, args=None):
		if args == None:
			args = netp.get_default_parser(num_of_samples)
		self.cov_net = args.cov_net
		self.calc_information = args.calc_information
		self.run_in_parallel = args.run_in_parallel
		self.num_ephocs = args.num_ephocs
		self.learning_rate = args.learning_rate
		self.batch_size = args.batch_size
		self.activation_function = args.activation_function
		self.interval_accuracy_display = args.interval_accuracy_display
		self.save_grads = args.save_grads
		self.num_of_repeats = args.num_of_repeats
		self.calc_information_last = args.calc_information_last
		self.num_of_bins = args.num_of_bins
		self.interval_information_display = args.interval_information_display
		self.save_ws = args.save_ws
		self.name = args.data_dir + args.data_name
		# The arch of the networks
		self.layers_sizes = netp.select_network_arch(args.net_type)
		# The percents of the train data samples
		self.train_samples = np.linspace(1, 100, 199)[[[x * 2 - 2 for x in index] for index in args.inds]]
		# The indexs that we want to calculate the information for them in logspace interval
		self.epochs_indexes = np.unique(
			np.logspace(np.log2(args.start_samples), np.log2(args.num_ephocs), args.num_of_samples, dtype=int,
			            base=2)) - 1
		max_size = np.max([len(layers_size) for layers_size in self.layers_sizes])
		# load data
		self.data_sets = load_data(self.name, args.random_labels)
		# create arrays for saving the data
		self.ws, self.grads, self.information, self.models, self.names, self.networks, self.weights = [
			[[[[None] for k in range(len(self.train_samples))] for j in range(len(self.layers_sizes))]
			 for i in range(self.num_of_repeats)] for _ in range(7)]

		self.loss_train, self.loss_test, self.test_error, self.train_error, self.l1_norms, self.l2_norms = \
			[np.zeros((self.num_of_repeats, len(self.layers_sizes), len(self.train_samples), len(self.epochs_indexes)))
			 for _ in range(6)]

		params = {'sampleLen': len(self.train_samples),
		          'nDistSmpls': args.nDistSmpls,
		          'layerSizes': ",".join(str(i) for i in self.layers_sizes[0]), 'nEpoch': args.num_ephocs, 'batch': args.batch_size,
		          'nRepeats': args.num_of_repeats, 'nEpochInds': len(self.epochs_indexes),
		          'LastEpochsInds': self.epochs_indexes[-1], 'DataName': args.data_name,
		          'lr': args.learning_rate}

		self.name_to_save = args.name + "_" + "_".join([str(i) + '=' + str(params[i]) for i in params])
		params['train_samples'], params['CPUs'], params[
			'directory'], params['epochsInds'] = self.train_samples, NUM_CORES, self.name_to_save, self.epochs_indexes
		self.params = params
		self.rand_int = rand_int
		# If we trained already the network
		self.traind_network = False

	def save_data(self, parent_dir='jobs/', file_to_save='data.pickle'):
		"""Save the data to the file """
		directory = '{0}/{1}{2}/'.format(os.getcwd(), parent_dir, self.params['directory'])

		data = {'information': self.information,
		        'test_error': self.test_error, 'train_error': self.train_error, 'var_grad_val': self.grads,
		        'loss_test': self.loss_test, 'loss_train': self.loss_train, 'params': self.params
			, 'l1_norms': self.l1_norms, 'weights': self.weights, 'ws': self.ws}

		if not os.path.exists(directory):
			os.makedirs(directory)
		self.dir_saved = directory
		with open(self.dir_saved + file_to_save, 'wb') as f:
			cPickle.dump(data, f, protocol=2)

	def run_network(self):
		"""Train and calculated the network's information"""
		if self.run_in_parallel:
			results = Parallel(n_jobs=NUM_CORES)(delayed(nn.train_network)
			                                     (self.layers_sizes[j],
			                                      self.num_ephocs, self.learning_rate, self.batch_size,
			                                      self.epochs_indexes, self.save_grads, self.data_sets,
			                                      self.activation_function,
			                                      self.train_samples, self.interval_accuracy_display,
			                                      self.calc_information,
			                                      self.calc_information_last, self.num_of_bins,
			                                      self.interval_information_display, self.save_ws, self.rand_int,
			                                      self.cov_net)
			                                     for i in range(len(self.train_samples)) for j in
			                                     range(len(self.layers_sizes)) for k in range(self.num_of_repeats))

		else:
			results = [nn.train_and_calc_inf_network(i, j, k,
			                                         self.layers_sizes[j],
			                                         self.num_ephocs, self.learning_rate, self.batch_size,
			                                         self.epochs_indexes, self.save_grads, self.data_sets,
			                                         self.activation_function,
			                                         self.train_samples, self.interval_accuracy_display,
			                                         self.calc_information,
			                                         self.calc_information_last, self.num_of_bins,
			                                         self.interval_information_display,
			                                         self.save_ws, self.rand_int, self.cov_net)
			           for i in range(len(self.train_samples)) for j in range(len(self.layers_sizes)) for k in
			           range(self.num_of_repeats)]

		# Extract all the measures and orgainze it
		for i in range(len(self.train_samples)):
			for j in range(len(self.layers_sizes)):
				for k in range(self.num_of_repeats):
					index = i * len(self.layers_sizes) * self.num_of_repeats + j * self.num_of_repeats + k
					current_network = results[index]
					self.networks[k][j][i] = current_network
					self.ws[k][j][i] = current_network['ws']
					self.weights[k][j][i] = current_network['weights']
					self.information[k][j][i] = current_network['information']
					self.grads[k][i][i] = current_network['gradients']
					self.test_error[k, j, i, :] = current_network['test_prediction']
					self.train_error[k, j, i, :] = current_network['train_prediction']
					self.loss_test[k, j, i, :] = current_network['loss_test']
					self.loss_train[k, j, i, :] = current_network['loss_train']
		self.traind_network = True

	def print_information(self):
		"""Print the networks params"""
		for val in self.params:
			if val != 'epochsInds':
				print (val, self.params[val])

	def calc_information(self):
		"""Calculate the infomration of the network for all the epochs - only valid if we save the activation values and trained the network"""
		if self.traind_network and self.save_ws:
			self.information = np.array(
				[inn.get_information(self.ws[k][j][i], self.data_sets.data, self.data_sets.labels,
				                     self.args.num_of_bins, self.args.interval_information_display, self.epochs_indexes)
				 for i in range(len(self.train_samples)) for j in
				 range(len(self.layers_sizes)) for k in range(self.args.num_of_repeats)])
		else:
			print ('Cant calculate the infomration of the networks!!!')

	def calc_information_last(self):
		"""Calculate the information of the last epoch"""
		if self.traind_network and self.save_ws:
			return np.array([inn.get_information([self.ws[k][j][i][-1]], self.data_sets.data, self.data_sets.labels,
			                                     self.args.num_of_bins, self.args.interval_information_display,
			                                     self.epochs_indexes)
			                 for i in range(len(self.train_samples)) for j in
			                 range(len(self.layers_sizes)) for k in range(self.args.num_of_repeats)])

	def plot_network(self):
		str_names = [[self.dir_saved]]
		mode = 2
		save_name = 'figure'
		plt_fig.plot_figures(str_names, mode, save_name)