|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from __future__ import print_function |
|
|
|
import os |
|
import h5py |
|
import json |
|
|
|
import numpy as np |
|
import tensorflow as tf |
|
|
|
|
|
def log_sum_exp(x_k): |
|
"""Computes log \sum exp in a numerically stable way. |
|
log ( sum_i exp(x_i) ) |
|
log ( sum_i exp(x_i - m + m) ), with m = max(x_i) |
|
log ( sum_i exp(x_i - m)*exp(m) ) |
|
log ( sum_i exp(x_i - m) + m |
|
|
|
Args: |
|
x_k - k -dimensional list of arguments to log_sum_exp. |
|
|
|
Returns: |
|
log_sum_exp of the arguments. |
|
""" |
|
m = tf.reduce_max(x_k) |
|
x1_k = x_k - m |
|
u_k = tf.exp(x1_k) |
|
z = tf.reduce_sum(u_k) |
|
return tf.log(z) + m |
|
|
|
|
|
def linear(x, out_size, do_bias=True, alpha=1.0, identity_if_possible=False, |
|
normalized=False, name=None, collections=None): |
|
"""Linear (affine) transformation, y = x W + b, for a variety of |
|
configurations. |
|
|
|
Args: |
|
x: input The tensor to tranformation. |
|
out_size: The integer size of non-batch output dimension. |
|
do_bias (optional): Add a learnable bias vector to the operation. |
|
alpha (optional): A multiplicative scaling for the weight initialization |
|
of the matrix, in the form \alpha * 1/\sqrt{x.shape[1]}. |
|
identity_if_possible (optional): just return identity, |
|
if x.shape[1] == out_size. |
|
normalized (optional): Option to divide out by the norms of the rows of W. |
|
name (optional): The name prefix to add to variables. |
|
collections (optional): List of additional collections. (Placed in |
|
tf.GraphKeys.GLOBAL_VARIABLES already, so no need for that.) |
|
|
|
Returns: |
|
In the equation, y = x W + b, returns the tensorflow op that yields y. |
|
""" |
|
in_size = int(x.get_shape()[1]) |
|
stddev = alpha/np.sqrt(float(in_size)) |
|
mat_init = tf.random_normal_initializer(0.0, stddev) |
|
wname = (name + "/W") if name else "/W" |
|
|
|
if identity_if_possible and in_size == out_size: |
|
|
|
return tf.identity(x, name=(wname+'_ident')) |
|
|
|
W,b = init_linear(in_size, out_size, do_bias=do_bias, alpha=alpha, |
|
normalized=normalized, name=name, collections=collections) |
|
|
|
if do_bias: |
|
return tf.matmul(x, W) + b |
|
else: |
|
return tf.matmul(x, W) |
|
|
|
|
|
def init_linear(in_size, out_size, do_bias=True, mat_init_value=None, |
|
bias_init_value=None, alpha=1.0, identity_if_possible=False, |
|
normalized=False, name=None, collections=None, trainable=True): |
|
"""Linear (affine) transformation, y = x W + b, for a variety of |
|
configurations. |
|
|
|
Args: |
|
in_size: The integer size of the non-batc input dimension. [(x),y] |
|
out_size: The integer size of non-batch output dimension. [x,(y)] |
|
do_bias (optional): Add a (learnable) bias vector to the operation, |
|
if false, b will be None |
|
mat_init_value (optional): numpy constant for matrix initialization, if None |
|
, do random, with additional parameters. |
|
alpha (optional): A multiplicative scaling for the weight initialization |
|
of the matrix, in the form \alpha * 1/\sqrt{x.shape[1]}. |
|
identity_if_possible (optional): just return identity, |
|
if x.shape[1] == out_size. |
|
normalized (optional): Option to divide out by the norms of the rows of W. |
|
name (optional): The name prefix to add to variables. |
|
collections (optional): List of additional collections. (Placed in |
|
tf.GraphKeys.GLOBAL_VARIABLES already, so no need for that.) |
|
|
|
Returns: |
|
In the equation, y = x W + b, returns the pair (W, b). |
|
""" |
|
|
|
if mat_init_value is not None and mat_init_value.shape != (in_size, out_size): |
|
raise ValueError( |
|
'Provided mat_init_value must have shape [%d, %d].'%(in_size, out_size)) |
|
if bias_init_value is not None and bias_init_value.shape != (1,out_size): |
|
raise ValueError( |
|
'Provided bias_init_value must have shape [1,%d].'%(out_size,)) |
|
|
|
if mat_init_value is None: |
|
stddev = alpha/np.sqrt(float(in_size)) |
|
mat_init = tf.random_normal_initializer(0.0, stddev) |
|
|
|
wname = (name + "/W") if name else "/W" |
|
|
|
if identity_if_possible and in_size == out_size: |
|
return (tf.constant(np.eye(in_size).astype(np.float32)), |
|
tf.zeros(in_size)) |
|
|
|
|
|
|
|
if normalized: |
|
w_collections = [tf.GraphKeys.GLOBAL_VARIABLES, "norm-variables"] |
|
if collections: |
|
w_collections += collections |
|
if mat_init_value is not None: |
|
w = tf.Variable(mat_init_value, name=wname, collections=w_collections, |
|
trainable=trainable) |
|
else: |
|
w = tf.get_variable(wname, [in_size, out_size], initializer=mat_init, |
|
collections=w_collections, trainable=trainable) |
|
w = tf.nn.l2_normalize(w, dim=0) |
|
else: |
|
w_collections = [tf.GraphKeys.GLOBAL_VARIABLES] |
|
if collections: |
|
w_collections += collections |
|
if mat_init_value is not None: |
|
w = tf.Variable(mat_init_value, name=wname, collections=w_collections, |
|
trainable=trainable) |
|
else: |
|
w = tf.get_variable(wname, [in_size, out_size], initializer=mat_init, |
|
collections=w_collections, trainable=trainable) |
|
b = None |
|
if do_bias: |
|
b_collections = [tf.GraphKeys.GLOBAL_VARIABLES] |
|
if collections: |
|
b_collections += collections |
|
bname = (name + "/b") if name else "/b" |
|
if bias_init_value is None: |
|
b = tf.get_variable(bname, [1, out_size], |
|
initializer=tf.zeros_initializer(), |
|
collections=b_collections, |
|
trainable=trainable) |
|
else: |
|
b = tf.Variable(bias_init_value, name=bname, |
|
collections=b_collections, |
|
trainable=trainable) |
|
|
|
return (w, b) |
|
|
|
|
|
def write_data(data_fname, data_dict, use_json=False, compression=None): |
|
"""Write data in HD5F format. |
|
|
|
Args: |
|
data_fname: The filename of teh file in which to write the data. |
|
data_dict: The dictionary of data to write. The keys are strings |
|
and the values are numpy arrays. |
|
use_json (optional): human readable format for simple items |
|
compression (optional): The compression to use for h5py (disabled by |
|
default because the library borks on scalars, otherwise try 'gzip'). |
|
""" |
|
|
|
dir_name = os.path.dirname(data_fname) |
|
if not os.path.exists(dir_name): |
|
os.makedirs(dir_name) |
|
|
|
if use_json: |
|
the_file = open(data_fname,'wb') |
|
json.dump(data_dict, the_file) |
|
the_file.close() |
|
else: |
|
try: |
|
with h5py.File(data_fname, 'w') as hf: |
|
for k, v in data_dict.items(): |
|
clean_k = k.replace('/', '_') |
|
if clean_k is not k: |
|
print('Warning: saving variable with name: ', k, ' as ', clean_k) |
|
else: |
|
print('Saving variable with name: ', clean_k) |
|
hf.create_dataset(clean_k, data=v, compression=compression) |
|
except IOError: |
|
print("Cannot open %s for writing.", data_fname) |
|
raise |
|
|
|
|
|
def read_data(data_fname): |
|
""" Read saved data in HDF5 format. |
|
|
|
Args: |
|
data_fname: The filename of the file from which to read the data. |
|
Returns: |
|
A dictionary whose keys will vary depending on dataset (but should |
|
always contain the keys 'train_data' and 'valid_data') and whose |
|
values are numpy arrays. |
|
""" |
|
|
|
try: |
|
with h5py.File(data_fname, 'r') as hf: |
|
data_dict = {k: np.array(v) for k, v in hf.items()} |
|
return data_dict |
|
except IOError: |
|
print("Cannot open %s for reading." % data_fname) |
|
raise |
|
|
|
|
|
def write_datasets(data_path, data_fname_stem, dataset_dict, compression=None): |
|
"""Write datasets in HD5F format. |
|
|
|
This function assumes the dataset_dict is a mapping ( string -> |
|
to data_dict ). It calls write_data for each data dictionary, |
|
post-fixing the data filename with the key of the dataset. |
|
|
|
Args: |
|
data_path: The path to the save directory. |
|
data_fname_stem: The filename stem of the file in which to write the data. |
|
dataset_dict: The dictionary of datasets. The keys are strings |
|
and the values data dictionaries (str -> numpy arrays) associations. |
|
compression (optional): The compression to use for h5py (disabled by |
|
default because the library borks on scalars, otherwise try 'gzip'). |
|
""" |
|
|
|
full_name_stem = os.path.join(data_path, data_fname_stem) |
|
for s, data_dict in dataset_dict.items(): |
|
write_data(full_name_stem + "_" + s, data_dict, compression=compression) |
|
|
|
|
|
def read_datasets(data_path, data_fname_stem): |
|
"""Read dataset sin HD5F format. |
|
|
|
This function assumes the dataset_dict is a mapping ( string -> |
|
to data_dict ). It calls write_data for each data dictionary, |
|
post-fixing the data filename with the key of the dataset. |
|
|
|
Args: |
|
data_path: The path to the save directory. |
|
data_fname_stem: The filename stem of the file in which to write the data. |
|
""" |
|
|
|
dataset_dict = {} |
|
fnames = os.listdir(data_path) |
|
|
|
print ('loading data from ' + data_path + ' with stem ' + data_fname_stem) |
|
for fname in fnames: |
|
if fname.startswith(data_fname_stem): |
|
data_dict = read_data(os.path.join(data_path,fname)) |
|
idx = len(data_fname_stem) + 1 |
|
key = fname[idx:] |
|
data_dict['data_dim'] = data_dict['train_data'].shape[2] |
|
data_dict['num_steps'] = data_dict['train_data'].shape[1] |
|
dataset_dict[key] = data_dict |
|
|
|
if len(dataset_dict) == 0: |
|
raise ValueError("Failed to load any datasets, are you sure that the " |
|
"'--data_dir' and '--data_filename_stem' flag values " |
|
"are correct?") |
|
|
|
print (str(len(dataset_dict)) + ' datasets loaded') |
|
return dataset_dict |
|
|
|
|
|
|
|
def list_t_bxn_to_list_b_txn(values_t_bxn): |
|
"""Convert a length T list of BxN numpy tensors of length B list of TxN numpy |
|
tensors. |
|
|
|
Args: |
|
values_t_bxn: The length T list of BxN numpy tensors. |
|
|
|
Returns: |
|
The length B list of TxN numpy tensors. |
|
""" |
|
T = len(values_t_bxn) |
|
B, N = values_t_bxn[0].shape |
|
values_b_txn = [] |
|
for b in range(B): |
|
values_pb_txn = np.zeros([T,N]) |
|
for t in range(T): |
|
values_pb_txn[t,:] = values_t_bxn[t][b,:] |
|
values_b_txn.append(values_pb_txn) |
|
|
|
return values_b_txn |
|
|
|
|
|
def list_t_bxn_to_tensor_bxtxn(values_t_bxn): |
|
"""Convert a length T list of BxN numpy tensors to single numpy tensor with |
|
shape BxTxN. |
|
|
|
Args: |
|
values_t_bxn: The length T list of BxN numpy tensors. |
|
|
|
Returns: |
|
values_bxtxn: The BxTxN numpy tensor. |
|
""" |
|
|
|
T = len(values_t_bxn) |
|
B, N = values_t_bxn[0].shape |
|
values_bxtxn = np.zeros([B,T,N]) |
|
for t in range(T): |
|
values_bxtxn[:,t,:] = values_t_bxn[t] |
|
|
|
return values_bxtxn |
|
|
|
|
|
def tensor_bxtxn_to_list_t_bxn(tensor_bxtxn): |
|
"""Convert a numpy tensor with shape BxTxN to a length T list of numpy tensors |
|
with shape BxT. |
|
|
|
Args: |
|
tensor_bxtxn: The BxTxN numpy tensor. |
|
|
|
Returns: |
|
A length T list of numpy tensors with shape BxT. |
|
""" |
|
|
|
values_t_bxn = [] |
|
B, T, N = tensor_bxtxn.shape |
|
for t in range(T): |
|
values_t_bxn.append(np.squeeze(tensor_bxtxn[:,t,:])) |
|
|
|
return values_t_bxn |
|
|
|
|
|
def flatten(list_of_lists): |
|
"""Takes a list of lists and returns a list of the elements. |
|
|
|
Args: |
|
list_of_lists: List of lists. |
|
|
|
Returns: |
|
flat_list: Flattened list. |
|
flat_list_idxs: Flattened list indices. |
|
""" |
|
flat_list = [] |
|
flat_list_idxs = [] |
|
start_idx = 0 |
|
for item in list_of_lists: |
|
if isinstance(item, list): |
|
flat_list += item |
|
l = len(item) |
|
idxs = range(start_idx, start_idx+l) |
|
start_idx = start_idx+l |
|
else: |
|
flat_list.append(item) |
|
idxs = [start_idx] |
|
start_idx += 1 |
|
flat_list_idxs.append(idxs) |
|
|
|
return flat_list, flat_list_idxs |
|
|