NCTCMumbai's picture
Upload 2583 files
97b6013 verified
# Copyright 2018 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Datasets."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import tensorflow as tf
import models
def make_long_chain_dataset(
state_size=1,
num_obs=5,
steps_per_obs=3,
variance=1.,
observation_variance=1.,
batch_size=4,
num_samples=1,
observation_type=models.STANDARD_OBSERVATION,
transition_type=models.STANDARD_TRANSITION,
fixed_observation=None,
dtype="float32"):
"""Creates a long chain data generating process.
Creates a tf.data.Dataset that provides batches of data from a long
chain.
Args:
state_size: The dimension of the state space of the process.
num_obs: The number of observations in the chain.
steps_per_obs: The number of steps between each observation.
variance: The variance of the normal distributions used at each timestep.
batch_size: The number of trajectories to include in each batch.
num_samples: The number of replicas of each trajectory to include in each
batch.
dtype: The datatype of the states and observations.
Returns:
dataset: A tf.data.Dataset that can be iterated over.
"""
num_timesteps = num_obs * steps_per_obs
def data_generator():
"""An infinite generator of latents and observations from the model."""
while True:
states = []
observations = []
# z0 ~ Normal(0, sqrt(variance)).
states.append(
np.random.normal(size=[state_size],
scale=np.sqrt(variance)).astype(dtype))
# start at 1 because we've already generated z0
# go to num_timesteps+1 because we want to include the num_timesteps-th step
for t in xrange(1, num_timesteps+1):
if transition_type == models.ROUND_TRANSITION:
loc = np.round(states[-1])
elif transition_type == models.STANDARD_TRANSITION:
loc = states[-1]
new_state = np.random.normal(size=[state_size],
loc=loc,
scale=np.sqrt(variance))
states.append(new_state.astype(dtype))
if t % steps_per_obs == 0:
if fixed_observation is None:
if observation_type == models.SQUARED_OBSERVATION:
loc = np.square(states[-1])
elif observation_type == models.ABS_OBSERVATION:
loc = np.abs(states[-1])
elif observation_type == models.STANDARD_OBSERVATION:
loc = states[-1]
new_obs = np.random.normal(size=[state_size],
loc=loc,
scale=np.sqrt(observation_variance)).astype(dtype)
else:
new_obs = np.ones([state_size])* fixed_observation
observations.append(new_obs)
yield states, observations
dataset = tf.data.Dataset.from_generator(
data_generator,
output_types=(tf.as_dtype(dtype), tf.as_dtype(dtype)),
output_shapes=([num_timesteps+1, state_size], [num_obs, state_size]))
dataset = dataset.repeat().batch(batch_size)
def tile_batch(state, observation):
state = tf.tile(state, [num_samples, 1, 1])
observation = tf.tile(observation, [num_samples, 1, 1])
return state, observation
dataset = dataset.map(tile_batch, num_parallel_calls=12).prefetch(1024)
return dataset
def make_dataset(bs=None,
state_size=1,
num_timesteps=10,
variance=1.,
prior_type="unimodal",
bimodal_prior_weight=0.5,
bimodal_prior_mean=1,
transition_type=models.STANDARD_TRANSITION,
fixed_observation=None,
batch_size=4,
num_samples=1,
dtype='float32'):
"""Creates a data generating process.
Creates a tf.data.Dataset that provides batches of data.
Args:
bs: The parameters of the data generating process. If None, new bs are
randomly generated.
state_size: The dimension of the state space of the process.
num_timesteps: The length of the state sequences in the process.
variance: The variance of the normal distributions used at each timestep.
batch_size: The number of trajectories to include in each batch.
num_samples: The number of replicas of each trajectory to include in each
batch.
Returns:
bs: The true bs used to generate the data
dataset: A tf.data.Dataset that can be iterated over.
"""
if bs is None:
bs = [np.random.uniform(size=[state_size]).astype(dtype) for _ in xrange(num_timesteps)]
tf.logging.info("data generating processs bs: %s",
np.array(bs).reshape(num_timesteps))
def data_generator():
"""An infinite generator of latents and observations from the model."""
while True:
states = []
if prior_type == "unimodal" or prior_type == "nonlinear":
# Prior is Normal(0, sqrt(variance)).
states.append(np.random.normal(size=[state_size], scale=np.sqrt(variance)).astype(dtype))
elif prior_type == "bimodal":
if np.random.uniform() > bimodal_prior_weight:
loc = bimodal_prior_mean
else:
loc = - bimodal_prior_mean
states.append(np.random.normal(size=[state_size],
loc=loc,
scale=np.sqrt(variance)
).astype(dtype))
for t in xrange(num_timesteps):
if transition_type == models.ROUND_TRANSITION:
loc = np.round(states[-1])
elif transition_type == models.STANDARD_TRANSITION:
loc = states[-1]
loc += bs[t]
new_state = np.random.normal(size=[state_size],
loc=loc,
scale=np.sqrt(variance)).astype(dtype)
states.append(new_state)
if fixed_observation is None:
observation = states[-1]
else:
observation = np.ones_like(states[-1]) * fixed_observation
yield np.array(states[:-1]), observation
dataset = tf.data.Dataset.from_generator(
data_generator,
output_types=(tf.as_dtype(dtype), tf.as_dtype(dtype)),
output_shapes=([num_timesteps, state_size], [state_size]))
dataset = dataset.repeat().batch(batch_size)
def tile_batch(state, observation):
state = tf.tile(state, [num_samples, 1, 1])
observation = tf.tile(observation, [num_samples, 1])
return state, observation
dataset = dataset.map(tile_batch, num_parallel_calls=12).prefetch(1024)
return np.array(bs), dataset