Spaces:
Build error
Build error
File size: 5,908 Bytes
96283ff |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 |
import numpy as np
import scipy.io as sio
import os
import sys
import tensorflow as tf
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()
# TF2 doesn't include the mnist module in tensorflow.examples.tutorials
# Use tf.keras.datasets.mnist instead
def load_data(name, random_labels=False):
"""Load the data
name - the name of the dataset
random_labels - True if we want to return random labels to the dataset
return object with data and labels"""
print('Loading Data...')
C = type('type_C', (object,), {})
data_sets = C()
if name.split('/')[-1] == 'MNIST':
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
data_sets.data = np.concatenate((x_train, x_test), axis=0).reshape((-1, 28*28)) / 255.0 # Normalize
data_sets.labels = np.concatenate((tf.one_hot(y_train, depth=10), tf.one_hot(y_test, depth=10)), axis=0)
else:
d = sio.loadmat(os.path.join(os.path.dirname(sys.argv[0]), name + '.mat'))
F = d['F']
y = d['y']
data_sets = C()
data_sets.data = F
data_sets.labels = np.squeeze(np.concatenate((y[None, :], 1 - y[None, :]), axis=0).T)
# If we want to assign random labels to the data
if random_labels:
labels = np.zeros(data_sets.labels.shape)
labels_index = np.random.randint(low=0, high=labels.shape[1], size=labels.shape[0])
labels[np.arange(len(labels)), labels_index] = 1
data_sets.labels = labels
return data_sets
def shuffle_in_unison_inplace(a, b):
"""Shuffle the arrays randomly"""
assert len(a) == len(b)
p = np.random.permutation(len(a))
return a[p], b[p]
def data_shuffle(data_sets_org, percent_of_train, min_test_data=80, shuffle_data=False):
"""Divided the data to train and test and shuffle it"""
perc = lambda i, t: np.rint((i * t) / 100).astype(np.int32)
C = type('type_C', (object,), {})
data_sets = C()
stop_train_index = int(perc(percent_of_train, data_sets_org.data.shape[0]))
start_test_index = int(stop_train_index)
if percent_of_train > min_test_data:
start_test_index = int(perc(min_test_data, data_sets_org.data.shape[0]))
data_sets.train = C()
data_sets.test = C()
if shuffle_data:
shuffled_data, shuffled_labels = shuffle_in_unison_inplace(data_sets_org.data, data_sets_org.labels)
else:
shuffled_data, shuffled_labels = data_sets_org.data, data_sets_org.labels
data_sets.train.data = shuffled_data[:stop_train_index, :]
data_sets.train.labels = shuffled_labels[:stop_train_index, :]
data_sets.test.data = shuffled_data[start_test_index:, :]
data_sets.test.labels = shuffled_labels[start_test_index:, :]
return data_sets
# This function was used for dtype conversion, which might not be necessary in the simplified context
# However, if needed, TF2 supports these types directly without conversion
# import numpy as np
# from tensorflow.examples.tutorials.mnist import input_data
# import scipy.io as sio
# import os
# import sys
# import tensorflow as tf
# def load_data(name, random_labels=False):
# """Load the data
# name - the name of the dataset
# random_labels - True if we want to return random labels to the dataset
# return object with data and labels"""
# print ('Loading Data...')
# C = type('type_C', (object,), {})
# data_sets = C()
# if name.split('/')[-1] == 'MNIST':
# data_sets_temp = input_data.read_data_sets(os.path.dirname(sys.argv[0]) + "/data/MNIST_data/", one_hot=True)
# data_sets.data = np.concatenate((data_sets_temp.train.images, data_sets_temp.test.images), axis=0)
# data_sets.labels = np.concatenate((data_sets_temp.train.labels, data_sets_temp.test.labels), axis=0)
# else:
# d = sio.loadmat(os.path.join(os.path.dirname(sys.argv[0]), name + '.mat'))
# F = d['F']
# y = d['y']
# C = type('type_C', (object,), {})
# data_sets = C()
# data_sets.data = F
# data_sets.labels = np.squeeze(np.concatenate((y[None, :], 1 - y[None, :]), axis=0).T)
# # If we want to assign random labels to the data
# if random_labels:
# labels = np.zeros(data_sets.labels.shape)
# labels_index = np.random.randint(low=0, high=labels.shape[1], size=labels.shape[0])
# labels[np.arange(len(labels)), labels_index] = 1
# data_sets.labels = labels
# return data_sets
# def shuffle_in_unison_inplace(a, b):
# """Shuffle the arrays randomly"""
# assert len(a) == len(b)
# p = np.random.permutation(len(a))
# return a[p], b[p]
# def data_shuffle(data_sets_org, percent_of_train, min_test_data=80, shuffle_data=False):
# """Divided the data to train and test and shuffle it"""
# perc = lambda i, t: np.rint((i * t) / 100).astype(np.int32)
# C = type('type_C', (object,), {})
# data_sets = C()
# stop_train_index = perc(percent_of_train[0], data_sets_org.data.shape[0])
# start_test_index = stop_train_index
# if percent_of_train > min_test_data:
# start_test_index = perc(min_test_data, data_sets_org.data.shape[0])
# data_sets.train = C()
# data_sets.test = C()
# if shuffle_data:
# shuffled_data, shuffled_labels = shuffle_in_unison_inplace(data_sets_org.data, data_sets_org.labels)
# else:
# shuffled_data, shuffled_labels = data_sets_org.data, data_sets_org.labels
# data_sets.train.data = shuffled_data[:stop_train_index, :]
# data_sets.train.labels = shuffled_labels[:stop_train_index, :]
# data_sets.test.data = shuffled_data[start_test_index:, :]
# data_sets.test.labels = shuffled_labels[start_test_index:, :]
# return data_sets
def _convert_string_dtype(dtype):
if dtype == 'float16':
return tf.float16
if dtype == 'float32':
return tf.float32
elif dtype == 'float64':
return tf.float64
elif dtype == 'int16':
return tf.int16
elif dtype == 'int32':
return tf.int32
elif dtype == 'int64':
return tf.int64
elif dtype == 'uint8':
return tf.int8
elif dtype == 'uint16':
return tf.uint16
else:
raise ValueError('Unsupported dtype:', dtype)
|