Ashley Goluoglu
add files from pantelis/IDNN
96283ff
import numpy as np
import scipy.io as sio
import os
import sys
import tensorflow as tf
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()
# TF2 doesn't include the mnist module in tensorflow.examples.tutorials
# Use tf.keras.datasets.mnist instead
def load_data(name, random_labels=False):
"""Load the data
name - the name of the dataset
random_labels - True if we want to return random labels to the dataset
return object with data and labels"""
print('Loading Data...')
C = type('type_C', (object,), {})
data_sets = C()
if name.split('/')[-1] == 'MNIST':
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
data_sets.data = np.concatenate((x_train, x_test), axis=0).reshape((-1, 28*28)) / 255.0 # Normalize
data_sets.labels = np.concatenate((tf.one_hot(y_train, depth=10), tf.one_hot(y_test, depth=10)), axis=0)
else:
d = sio.loadmat(os.path.join(os.path.dirname(sys.argv[0]), name + '.mat'))
F = d['F']
y = d['y']
data_sets = C()
data_sets.data = F
data_sets.labels = np.squeeze(np.concatenate((y[None, :], 1 - y[None, :]), axis=0).T)
# If we want to assign random labels to the data
if random_labels:
labels = np.zeros(data_sets.labels.shape)
labels_index = np.random.randint(low=0, high=labels.shape[1], size=labels.shape[0])
labels[np.arange(len(labels)), labels_index] = 1
data_sets.labels = labels
return data_sets
def shuffle_in_unison_inplace(a, b):
"""Shuffle the arrays randomly"""
assert len(a) == len(b)
p = np.random.permutation(len(a))
return a[p], b[p]
def data_shuffle(data_sets_org, percent_of_train, min_test_data=80, shuffle_data=False):
"""Divided the data to train and test and shuffle it"""
perc = lambda i, t: np.rint((i * t) / 100).astype(np.int32)
C = type('type_C', (object,), {})
data_sets = C()
stop_train_index = int(perc(percent_of_train, data_sets_org.data.shape[0]))
start_test_index = int(stop_train_index)
if percent_of_train > min_test_data:
start_test_index = int(perc(min_test_data, data_sets_org.data.shape[0]))
data_sets.train = C()
data_sets.test = C()
if shuffle_data:
shuffled_data, shuffled_labels = shuffle_in_unison_inplace(data_sets_org.data, data_sets_org.labels)
else:
shuffled_data, shuffled_labels = data_sets_org.data, data_sets_org.labels
data_sets.train.data = shuffled_data[:stop_train_index, :]
data_sets.train.labels = shuffled_labels[:stop_train_index, :]
data_sets.test.data = shuffled_data[start_test_index:, :]
data_sets.test.labels = shuffled_labels[start_test_index:, :]
return data_sets
# This function was used for dtype conversion, which might not be necessary in the simplified context
# However, if needed, TF2 supports these types directly without conversion
# import numpy as np
# from tensorflow.examples.tutorials.mnist import input_data
# import scipy.io as sio
# import os
# import sys
# import tensorflow as tf
# def load_data(name, random_labels=False):
# """Load the data
# name - the name of the dataset
# random_labels - True if we want to return random labels to the dataset
# return object with data and labels"""
# print ('Loading Data...')
# C = type('type_C', (object,), {})
# data_sets = C()
# if name.split('/')[-1] == 'MNIST':
# data_sets_temp = input_data.read_data_sets(os.path.dirname(sys.argv[0]) + "/data/MNIST_data/", one_hot=True)
# data_sets.data = np.concatenate((data_sets_temp.train.images, data_sets_temp.test.images), axis=0)
# data_sets.labels = np.concatenate((data_sets_temp.train.labels, data_sets_temp.test.labels), axis=0)
# else:
# d = sio.loadmat(os.path.join(os.path.dirname(sys.argv[0]), name + '.mat'))
# F = d['F']
# y = d['y']
# C = type('type_C', (object,), {})
# data_sets = C()
# data_sets.data = F
# data_sets.labels = np.squeeze(np.concatenate((y[None, :], 1 - y[None, :]), axis=0).T)
# # If we want to assign random labels to the data
# if random_labels:
# labels = np.zeros(data_sets.labels.shape)
# labels_index = np.random.randint(low=0, high=labels.shape[1], size=labels.shape[0])
# labels[np.arange(len(labels)), labels_index] = 1
# data_sets.labels = labels
# return data_sets
# def shuffle_in_unison_inplace(a, b):
# """Shuffle the arrays randomly"""
# assert len(a) == len(b)
# p = np.random.permutation(len(a))
# return a[p], b[p]
# def data_shuffle(data_sets_org, percent_of_train, min_test_data=80, shuffle_data=False):
# """Divided the data to train and test and shuffle it"""
# perc = lambda i, t: np.rint((i * t) / 100).astype(np.int32)
# C = type('type_C', (object,), {})
# data_sets = C()
# stop_train_index = perc(percent_of_train[0], data_sets_org.data.shape[0])
# start_test_index = stop_train_index
# if percent_of_train > min_test_data:
# start_test_index = perc(min_test_data, data_sets_org.data.shape[0])
# data_sets.train = C()
# data_sets.test = C()
# if shuffle_data:
# shuffled_data, shuffled_labels = shuffle_in_unison_inplace(data_sets_org.data, data_sets_org.labels)
# else:
# shuffled_data, shuffled_labels = data_sets_org.data, data_sets_org.labels
# data_sets.train.data = shuffled_data[:stop_train_index, :]
# data_sets.train.labels = shuffled_labels[:stop_train_index, :]
# data_sets.test.data = shuffled_data[start_test_index:, :]
# data_sets.test.labels = shuffled_labels[start_test_index:, :]
# return data_sets
def _convert_string_dtype(dtype):
if dtype == 'float16':
return tf.float16
if dtype == 'float32':
return tf.float32
elif dtype == 'float64':
return tf.float64
elif dtype == 'int16':
return tf.int16
elif dtype == 'int32':
return tf.int32
elif dtype == 'int64':
return tf.int64
elif dtype == 'uint8':
return tf.int8
elif dtype == 'uint16':
return tf.uint16
else:
raise ValueError('Unsupported dtype:', dtype)