File size: 5,908 Bytes
96283ff
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
import numpy as np
import scipy.io as sio
import os
import sys
import tensorflow as tf
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()

# TF2 doesn't include the mnist module in tensorflow.examples.tutorials
# Use tf.keras.datasets.mnist instead
def load_data(name, random_labels=False):
    """Load the data
    name - the name of the dataset
    random_labels - True if we want to return random labels to the dataset
    return object with data and labels"""
    print('Loading Data...')
    C = type('type_C', (object,), {})
    data_sets = C()
    if name.split('/')[-1] == 'MNIST':
        (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
        data_sets.data = np.concatenate((x_train, x_test), axis=0).reshape((-1, 28*28)) / 255.0  # Normalize
        data_sets.labels = np.concatenate((tf.one_hot(y_train, depth=10), tf.one_hot(y_test, depth=10)), axis=0)
    else:
        d = sio.loadmat(os.path.join(os.path.dirname(sys.argv[0]), name + '.mat'))
        F = d['F']
        y = d['y']
        data_sets = C()
        data_sets.data = F
        data_sets.labels = np.squeeze(np.concatenate((y[None, :], 1 - y[None, :]), axis=0).T)
    # If we want to assign random labels to the data
    if random_labels:
        labels = np.zeros(data_sets.labels.shape)
        labels_index = np.random.randint(low=0, high=labels.shape[1], size=labels.shape[0])
        labels[np.arange(len(labels)), labels_index] = 1
        data_sets.labels = labels
    return data_sets

def shuffle_in_unison_inplace(a, b):
    """Shuffle the arrays randomly"""
    assert len(a) == len(b)
    p = np.random.permutation(len(a))
    return a[p], b[p]

def data_shuffle(data_sets_org, percent_of_train, min_test_data=80, shuffle_data=False):
	"""Divided the data to train and test and shuffle it"""
	perc = lambda i, t: np.rint((i * t) / 100).astype(np.int32)
	C = type('type_C', (object,), {})
	data_sets = C()
	stop_train_index = int(perc(percent_of_train, data_sets_org.data.shape[0]))
	start_test_index = int(stop_train_index)
	if percent_of_train > min_test_data:
		start_test_index = int(perc(min_test_data, data_sets_org.data.shape[0]))
	data_sets.train = C()
	data_sets.test = C()
	if shuffle_data:
		shuffled_data, shuffled_labels = shuffle_in_unison_inplace(data_sets_org.data, data_sets_org.labels)
	else:
		shuffled_data, shuffled_labels = data_sets_org.data, data_sets_org.labels
	data_sets.train.data = shuffled_data[:stop_train_index, :]
	data_sets.train.labels = shuffled_labels[:stop_train_index, :]
	data_sets.test.data = shuffled_data[start_test_index:, :]
	data_sets.test.labels = shuffled_labels[start_test_index:, :]
	return data_sets

# This function was used for dtype conversion, which might not be necessary in the simplified context
# However, if needed, TF2 supports these types directly without conversion



# import numpy as np
# from tensorflow.examples.tutorials.mnist import input_data
# import scipy.io as sio
# import os
# import sys
# import tensorflow as tf


# def load_data(name, random_labels=False):
# 	"""Load the data
# 	name - the name of the dataset
# 	random_labels - True if we want to return random labels to the dataset
# 	return object with data and labels"""
# 	print ('Loading Data...')
# 	C = type('type_C', (object,), {})
# 	data_sets = C()
# 	if name.split('/')[-1] == 'MNIST':
# 		data_sets_temp = input_data.read_data_sets(os.path.dirname(sys.argv[0]) + "/data/MNIST_data/", one_hot=True)
# 		data_sets.data = np.concatenate((data_sets_temp.train.images, data_sets_temp.test.images), axis=0)
# 		data_sets.labels = np.concatenate((data_sets_temp.train.labels, data_sets_temp.test.labels), axis=0)
# 	else:
# 		d = sio.loadmat(os.path.join(os.path.dirname(sys.argv[0]), name + '.mat'))
# 		F = d['F']
# 		y = d['y']
# 		C = type('type_C', (object,), {})
# 		data_sets = C()
# 		data_sets.data = F
# 		data_sets.labels = np.squeeze(np.concatenate((y[None, :], 1 - y[None, :]), axis=0).T)
# 	# If we want to assign random labels to the  data
# 	if random_labels:
# 		labels = np.zeros(data_sets.labels.shape)
# 		labels_index = np.random.randint(low=0, high=labels.shape[1], size=labels.shape[0])
# 		labels[np.arange(len(labels)), labels_index] = 1
# 		data_sets.labels = labels
# 	return data_sets


# def shuffle_in_unison_inplace(a, b):
# 	"""Shuffle the arrays randomly"""
# 	assert len(a) == len(b)
# 	p = np.random.permutation(len(a))
# 	return a[p], b[p]


# def data_shuffle(data_sets_org, percent_of_train, min_test_data=80, shuffle_data=False):
# 	"""Divided the data to train and test and shuffle it"""
# 	perc = lambda i, t: np.rint((i * t) / 100).astype(np.int32)
# 	C = type('type_C', (object,), {})
# 	data_sets = C()
# 	stop_train_index = perc(percent_of_train[0], data_sets_org.data.shape[0])
# 	start_test_index = stop_train_index
# 	if percent_of_train > min_test_data:
# 		start_test_index = perc(min_test_data, data_sets_org.data.shape[0])
# 	data_sets.train = C()
# 	data_sets.test = C()
# 	if shuffle_data:
# 		shuffled_data, shuffled_labels = shuffle_in_unison_inplace(data_sets_org.data, data_sets_org.labels)
# 	else:
# 		shuffled_data, shuffled_labels = data_sets_org.data, data_sets_org.labels
# 	data_sets.train.data = shuffled_data[:stop_train_index, :]
# 	data_sets.train.labels = shuffled_labels[:stop_train_index, :]
# 	data_sets.test.data = shuffled_data[start_test_index:, :]
# 	data_sets.test.labels = shuffled_labels[start_test_index:, :]
# 	return data_sets


def _convert_string_dtype(dtype):
	if dtype == 'float16':
		return tf.float16
	if dtype == 'float32':
		return tf.float32
	elif dtype == 'float64':
		return tf.float64
	elif dtype == 'int16':
		return tf.int16
	elif dtype == 'int32':
		return tf.int32
	elif dtype == 'int64':
		return tf.int64
	elif dtype == 'uint8':
		return tf.int8
	elif dtype == 'uint16':
		return tf.uint16
	else:
		raise ValueError('Unsupported dtype:', dtype)