"""A library of tasks. |
This interface is intended to implement a wide variety of navigation |
tasks. See go/navigation_tasks for a list. |
""" |
import abc |
import collections |
import math |
import threading |
import networkx as nx |
import numpy as np |
import tensorflow as tf |
from envs import task_env |
from envs import util as envs_util |
def _pad_or_clip_array(np_arr, arr_len, is_front_clip=True, output_mask=False): |
"""Make np_arr array to have length arr_len. |
If the array is shorter than arr_len, then it is padded from the front with |
zeros. If it is longer, then it is clipped either from the back or from the |
front. Only the first dimension is modified. |
Args: |
np_arr: numpy array. |
arr_len: integer scalar. |
is_front_clip: a boolean. If true then clipping is done in the front, |
otherwise in the back. |
output_mask: If True, outputs a numpy array of rank 1 which represents |
a mask of which values have been added (0 - added, 1 - actual output). |
Returns: |
A numpy array and the size of padding (as a python int32). This size is |
negative is the array is clipped. |
""" |
shape = list(np_arr.shape) |
pad_size = arr_len - shape[0] |
padded_or_clipped = None |
if pad_size < 0: |
if is_front_clip: |
padded_or_clipped = np_arr[-pad_size:, :] |
else: |
padded_or_clipped = np_arr[:arr_len, :] |
elif pad_size > 0: |
padding = np.zeros([pad_size] + shape[1:], dtype=np_arr.dtype) |
padded_or_clipped = np.concatenate([np_arr, padding], axis=0) |
else: |
padded_or_clipped = np_arr |
if output_mask: |
mask = np.ones((arr_len,), dtype=np.int) |
if pad_size > 0: |
mask[-pad_size:] = 0 |
return padded_or_clipped, pad_size, mask |
else: |
return padded_or_clipped, pad_size |
def classification_loss(truth, predicted, weights=None, is_one_hot=True): |
"""A cross entropy loss. |
Computes the mean of cross entropy losses for all pairs of true labels and |
predictions. It wraps around a tf implementation of the cross entropy loss |
with additional reformating of the inputs. If the truth and predicted are |
n-rank Tensors with n > 2, then these are reshaped to 2-rank Tensors. It |
allows for truth to be specified as one hot vector or class indices. Finally, |
a weight can be specified for each element in truth and predicted. |
Args: |
truth: an n-rank or (n-1)-rank Tensor containing labels. If is_one_hot is |
True, then n-rank Tensor is expected, otherwise (n-1) rank one. |
predicted: an n-rank float Tensor containing prediction probabilities. |
weights: an (n-1)-rank float Tensor of weights |
is_one_hot: a boolean. |
Returns: |
A TF float scalar. |
""" |
num_labels = predicted.get_shape().as_list()[-1] |
if not is_one_hot: |
truth = tf.reshape(truth, [-1]) |
truth = tf.one_hot( |
truth, depth=num_labels, on_value=1.0, off_value=0.0, axis=-1) |
else: |
truth = tf.reshape(truth, [-1, num_labels]) |
predicted = tf.reshape(predicted, [-1, num_labels]) |
losses = tf.nn.softmax_cross_entropy_with_logits( |
labels=truth, logits=predicted) |
if weights is not None: |
losses = tf.boolean_mask(losses, |
tf.cast(tf.reshape(weights, [-1]), dtype=tf.bool)) |
return tf.reduce_mean(losses) |
class UnrolledTaskIOConfig(object): |
"""Configuration of task inputs and outputs. |
A task can have multiple inputs, which define the context, and a task query |
which defines what is to be executed in this context. The desired execution |
is encoded in an output. The config defines the shapes of the inputs, the |
query and the outputs. |
""" |
def __init__(self, inputs, output, query=None): |
"""Constructs a Task input/output config. |
Args: |
inputs: a list of tuples. Each tuple represents the configuration of an |
input, with first element being the type (a string value) and the second |
element the shape. |
output: a tuple representing the configuration of the output. |
query: a tuple representing the configuration of the query. If no query, |
then None. |
""" |
io_config = collections.namedtuple('IOConfig', ['type', 'shape']) |
def assert_config(config): |
if not isinstance(config, tuple): |
raise ValueError('config must be a tuple. Received {}'.format( |
type(config))) |
if len(config) != 2: |
raise ValueError('config must have 2 elements, has %d' % len(config)) |
if not isinstance(config[0], tf.DType): |
raise ValueError('First element of config must be a tf.DType.') |
if not isinstance(config[1], list): |
raise ValueError('Second element of config must be a list.') |
assert isinstance(inputs, collections.OrderedDict) |
for modality_type in inputs: |
assert_config(inputs[modality_type]) |
self._inputs = collections.OrderedDict( |
[(k, io_config(*value)) for k, value in inputs.iteritems()]) |
if query is not None: |
assert_config(query) |
self._query = io_config(*query) |
else: |
self._query = None |
assert_config(output) |
self._output = io_config(*output) |
@property |
def inputs(self): |
return self._inputs |
@property |
def output(self): |
return self._output |
@property |
def query(self): |
return self._query |
class UnrolledTask(object): |
"""An interface for a Task which can be unrolled during training. |
Each example is called episode and consists of inputs and target output, where |
the output can be considered as desired unrolled sequence of actions for the |
inputs. For the specified tasks, these action sequences are to be |
unambiguously definable. |
""" |
__metaclass__ = abc.ABCMeta |
def __init__(self, config): |
assert isinstance(config, UnrolledTaskIOConfig) |
self._config = config |
self.info = {} |
self._lock = threading.Lock() |
@property |
def config(self): |
return self._config |
@abc.abstractmethod |
def episode(self): |
"""Returns data needed to train and test a single episode. |
Each episode consists of inputs, which define the context of the task, a |
query which defines the task, and a target output, which defines a |
sequence of actions to be executed for this query. This sequence should not |
require feedback, i.e. can be predicted purely from input and query.] |
Returns: |
inputs, query, output, where inputs is a list of numpy arrays and query |
and output are numpy arrays. These arrays must be of shape and type as |
specified in the task configuration. |
""" |
pass |
def reset(self, observation): |
"""Called after the environment is reset.""" |
pass |
def episode_batch(self, batch_size): |
"""Returns a batch of episodes. |
Args: |
batch_size: size of batch. |
Returns: |
(inputs, query, output, masks) where inputs is list of numpy arrays and |
query, output, and mask are numpy arrays. These arrays must be of shape |
and type as specified in the task configuration with one additional |
preceding dimension corresponding to the batch. |
Raises: |
ValueError: if self.episode() returns illegal values. |
""" |
batched_inputs = collections.OrderedDict( |
[[mtype, []] for mtype in self.config.inputs]) |
batched_queries = [] |
batched_outputs = [] |
batched_masks = [] |
for _ in range(int(batch_size)): |
with self._lock: |
inputs, query, outputs = self.episode() |
if not isinstance(outputs, tuple): |
raise ValueError('Outputs return value must be tuple.') |
if len(outputs) != 2: |
raise ValueError('Output tuple must be of size 2.') |
if inputs is not None: |
for modality_type in batched_inputs: |
batched_inputs[modality_type].append( |
np.expand_dims(inputs[modality_type], axis=0)) |
if query is not None: |
batched_queries.append(np.expand_dims(query, axis=0)) |
batched_outputs.append(np.expand_dims(outputs[0], axis=0)) |
if outputs[1] is not None: |
batched_masks.append(np.expand_dims(outputs[1], axis=0)) |
batched_inputs = { |
k: np.concatenate(i, axis=0) for k, i in batched_inputs.iteritems() |
} |
if batched_queries: |
batched_queries = np.concatenate(batched_queries, axis=0) |
batched_outputs = np.concatenate(batched_outputs, axis=0) |
if batched_masks: |
batched_masks = np.concatenate(batched_masks, axis=0).astype(np.float32) |
else: |
batched_masks = np.array([], dtype=np.float32) |
batched_inputs = [batched_inputs[k] for k in self._config.inputs] |
return batched_inputs, batched_queries, batched_outputs, batched_masks |
def tf_episode_batch(self, batch_size): |
"""A batch of episodes as TF Tensors. |
Same as episode_batch with the difference that the return values are TF |
Tensors. |
Args: |
batch_size: a python float for the batch size. |
Returns: |
inputs, query, output, mask where inputs is a dictionary of tf.Tensor |
where the keys are the modality types specified in the config.inputs. |
query, output, and mask are TF Tensors. These tensors must |
be of shape and type as specified in the task configuration with one |
additional preceding dimension corresponding to the batch. Both mask and |
output have the same shape as output. |
""" |
touts = [] |
shapes = [] |
for _, i in self._config.inputs.iteritems(): |
touts.append(i.type) |
shapes.append(i.shape) |
if self._config.query is not None: |
touts.append(self._config.query.type) |
shapes.append(self._config.query.shape) |
touts.append(self._config.output.type) |
shapes.append(self._config.output.shape) |
touts.append(self._config.output.type) |
shapes.append(self._config.output.shape[0:1]) |
def episode_batch_func(): |
if self.config.query is None: |
inp, _, output, masks = self.episode_batch(int(batch_size)) |
return tuple(inp) + (output, masks) |
else: |
inp, query, output, masks = self.episode_batch(int(batch_size)) |
return tuple(inp) + (query, output, masks) |
tf_episode_batch = tf.py_func(episode_batch_func, [], touts, |
stateful=True, name='taskdata') |
for episode, shape in zip(tf_episode_batch, shapes): |
episode.set_shape([batch_size] + shape) |
tf_episode_batch_dict = collections.OrderedDict([ |
(mtype, episode) |
for mtype, episode in zip(self.config.inputs.keys(), tf_episode_batch) |
]) |
cur_index = len(self.config.inputs.keys()) |
tf_query = None |
if self.config.query is not None: |
tf_query = tf_episode_batch[cur_index] |
cur_index += 1 |
tf_outputs = tf_episode_batch[cur_index] |
tf_masks = tf_episode_batch[cur_index + 1] |
return tf_episode_batch_dict, tf_query, tf_outputs, tf_masks |
@abc.abstractmethod |
def target_loss(self, true_targets, targets, weights=None): |
"""A loss for training a task model. |
This loss measures the discrepancy between the task outputs, the true and |
predicted ones. |
Args: |
true_targets: tf.Tensor of shape and type as defined in the task config |
containing the true outputs. |
targets: tf.Tensor of shape and type as defined in the task config |
containing the predicted outputs. |
weights: a bool tf.Tensor of shape as targets. Only true values are |
considered when formulating the loss. |
""" |
pass |
def reward(self, obs, done, info): |
"""Returns a reward. |
The tasks has to compute a reward based on the state of the environment. The |
reward computation, though, is task specific. The task is to use the |
environment interface, as defined in task_env.py, to compute the reward. If |
this interface does not expose enough information, it is to be updated. |
Args: |
obs: Observation from environment's step function. |
done: Done flag from environment's step function. |
info: Info dict from environment's step function. |
Returns: |
obs: Observation. |
reward: Floating point value. |
done: Done flag. |
info: Info dict. |
""" |
return obs, 0.0, done, info |
class RandomExplorationBasedTask(UnrolledTask): |
"""A Task which starts with a random exploration of the environment.""" |
def __init__(self, |
env, |
seed, |
add_query_noise=False, |
query_noise_var=0.0, |
*args, |
**kwargs): |
"""Initializes a Task using a random exploration runs. |
Args: |
env: an instance of type TaskEnv and gym.Env. |
seed: a random seed. |
add_query_noise: boolean, if True then whatever queries are generated, |
they are randomly perturbed. The semantics of the queries depends on the |
concrete task implementation. |
query_noise_var: float, the variance of Gaussian noise used for query |
perturbation. Used iff add_query_noise==True. |
*args: see super class. |
**kwargs: see super class. |
""" |
super(RandomExplorationBasedTask, self).__init__(*args, **kwargs) |
assert isinstance(env, task_env.TaskEnv) |
self._env = env |
self._env.set_task(self) |
self._rng = np.random.RandomState(seed) |
self._add_query_noise = add_query_noise |
self._query_noise_var = query_noise_var |
if len(self.config.inputs.keys()) > 1: |
raise NotImplementedError('current implementation supports input ' |
'with only one modality type or less.') |
def _exploration(self): |
"""Generates a random exploration run. |
The function uses the environment to generate a run. |
Returns: |
A tuple of numpy arrays. The i-th array contains observation of type and |
shape as specified in config.inputs[i]. |
A list of states along the exploration path. |
A list of vertex indices corresponding to the path of the exploration. |
""" |
in_seq_len = self._config.inputs.values()[0].shape[0] |
path, _, states, step_outputs = self._env.random_step_sequence( |
min_len=in_seq_len) |
obs = {modality_type: [] for modality_type in self._config.inputs} |
for o in step_outputs: |
step_obs, _, done, _ = o |
for modality_type in self._config.inputs: |
assert modality_type in step_obs, '{}'.format(type(step_obs)) |
o = step_obs[modality_type] |
i = self._config.inputs[modality_type] |
assert len(o.shape) == len(i.shape) - 1 |
for dim_o, dim_i in zip(o.shape, i.shape[1:]): |
assert dim_o == dim_i, '{} != {}'.format(dim_o, dim_i) |
obs[modality_type].append(o) |
if done: |
break |
if not obs: |
return obs, states, path |
max_path_len = int( |
round(in_seq_len * float(len(path)) / float(len(obs.values()[0])))) |
path = path[-max_path_len:] |
states = states[-in_seq_len:] |
def regroup(obs, i): |
"""Regroups observations. |
Args: |
obs: a list of tuples of same size. The k-th tuple contains all the |
observations from k-th step. Each observation is a numpy array. |
i: the index of the observation in each tuple to be grouped. |
Returns: |
A numpy array of shape config.inputs[i] which contains all i-th |
observations from all steps. These are concatenated along the first |
dimension. In addition, if the number of observations is different from |
the one specified in config.inputs[i].shape[0], then the array is either |
padded from front or clipped. |
""" |
grouped_obs = np.concatenate( |
[np.expand_dims(o, axis=0) for o in obs[i]], axis=0) |
in_seq_len = self._config.inputs[i].shape[0] |
grouped_obs, _ = _pad_or_clip_array( |
grouped_obs, in_seq_len, is_front_clip=True) |
return grouped_obs |
all_obs = {i: regroup(obs, i) for i in self._config.inputs} |
return all_obs, states, path |
def _obs_to_state(self, path, states): |
"""Computes mapping between path nodes and states.""" |
path_coordinates = map(self._env.vertex_to_pose, path) |
path_coordinates = np.concatenate( |
[np.reshape(p, [1, 2]) for p in path_coordinates]) |
path_to_obs = collections.defaultdict(list) |
obs_to_state = [] |
for i, s in enumerate(states): |
location = np.reshape(s[0:2], [1, 2]) |
index = np.argmin( |
np.reshape( |
np.sum(np.power(path_coordinates - location, 2), axis=1), [-1])) |
index = path[index] |
path_to_obs[index].append(i) |
obs_to_state.append(index) |
return path_to_obs, obs_to_state |
def _perturb_state(self, state, noise_var): |
"""Perturbes the state. |
The location are purturbed using a Gaussian noise with variance |
noise_var. The orientation is uniformly sampled. |
Args: |
state: a numpy array containing an env state (x, y locations). |
noise_var: float |
Returns: |
The perturbed state. |
""" |
def normal(v, std): |
if std > 0: |
n = self._rng.normal(0.0, std) |
n = min(n, 2.0 * std) |
n = max(n, -2.0 * std) |
return v + n |
else: |
return v |
state = state.copy() |
state[0] = normal(state[0], noise_var) |
state[1] = normal(state[1], noise_var) |
if state.size > 2: |
state[2] = self._rng.uniform(-math.pi, math.pi) |
return state |
def _sample_obs(self, |
indices, |
observations, |
observation_states, |
path_to_obs, |
max_obs_index=None, |
use_exploration_obs=True): |
"""Samples one observation which corresponds to vertex_index in path. |
In addition, the sampled observation must have index in observations less |
than max_obs_index. If these two conditions cannot be satisfied the |
function returns None. |
Args: |
indices: a list of integers. |
observations: a list of numpy arrays containing all the observations. |
observation_states: a list of numpy arrays, each array representing the |
state of the observation. |
path_to_obs: a dict of path indices to lists of observation indices. |
max_obs_index: an integer. |
use_exploration_obs: if True, then the observation is sampled among the |
specified observations, otherwise it is obtained from the environment. |
Returns: |
A tuple of: |
-- A numpy array of size width x height x 3 representing the sampled |
observation. |
-- The index of the sampld observation among the input observations. |
-- The state at which the observation is captured. |
Raises: |
ValueError: if the observation and observation_states lists are of |
different lengths. |
""" |
if len(observations) != len(observation_states): |
raise ValueError('observation and observation_states lists must have ' |
'equal lengths') |
if not indices: |
return None, None, None |
vertex_index = self._rng.choice(indices) |
if use_exploration_obs: |
obs_indices = path_to_obs[vertex_index] |
if max_obs_index is not None: |
obs_indices = [i for i in obs_indices if i < max_obs_index] |
if obs_indices: |
index = self._rng.choice(obs_indices) |
if self._add_query_noise: |
xytheta = self._perturb_state(observation_states[index], |
self._query_noise_var) |
return self._env.observation(xytheta), index, xytheta |
else: |
return observations[index], index, observation_states[index] |
else: |
return None, None, None |
else: |
xy = self._env.vertex_to_pose(vertex_index) |
xytheta = np.array([xy[0], xy[1], 0.0]) |
xytheta = self._perturb_state(xytheta, self._query_noise_var) |
return self._env.observation(xytheta), None, xytheta |
class AreNearbyTask(RandomExplorationBasedTask): |
"""A task of identifying whether a query is nearby current location or not. |
The query is guaranteed to be in proximity of an already visited location, |
i.e. close to one of the observations. For each observation we have one |
query, which is either close or not to this observation. |
""" |
def __init__( |
self, |
max_distance=0, |
*args, |
**kwargs): |
super(AreNearbyTask, self).__init__(*args, **kwargs) |
self._max_distance = max_distance |
if len(self.config.inputs.keys()) != 1: |
raise NotImplementedError('current implementation supports input ' |
'with only one modality type') |
def episode(self): |
"""Episode data. |
Returns: |
observations: a tuple with one element. This element is a numpy array of |
size in_seq_len x observation_size x observation_size x 3 containing |
in_seq_len images. |
query: a numpy array of size |
in_seq_len x observation_size X observation_size x 3 containing a query |
image. |
A tuple of size two. First element is a in_seq_len x 2 numpy array of |
either 1.0 or 0.0. The i-th element denotes whether the i-th query |
image is neraby (value 1.0) or not (value 0.0) to the i-th observation. |
The second element in the tuple is a mask, a numpy array of size |
in_seq_len x 1 and values 1.0 or 0.0 denoting whether the query is |
valid or not (it can happen that the query is not valid, e.g. there are |
not enough observations to have a meaningful queries). |
""" |
observations, states, path = self._exploration() |
assert len(observations.values()[0]) == len(states) |
path_to_obs, obs_to_path = self._obs_to_state(path, states) |
g = self._env.graph |
queries = [] |
labels = [] |
validity_masks = [] |
query_index_in_observations = [] |
for i, curr_o in enumerate(observations.values()[0]): |
p = obs_to_path[i] |
low = max(0, i - self._max_distance) |
index_groups = [[], [], []] |
nearby_visited = [ |
ii for ii in path[low:i + 1] + g[p].keys() if ii in obs_to_path[:i] |
] |
nearby_visited = [ii for ii in index_groups[1] if ii in path_to_obs] |
not_nearby_visited = [ii for ii in path[:low] if ii not in g[p].keys()] |
not_nearby_visited = [ii for ii in index_groups[0] if ii in path_to_obs] |
not_visited = [ |
ii for ii in range(g.number_of_nodes()) if ii not in path[:i + 1] |
] |
index_groups = [not_nearby_visited, nearby_visited, not_visited] |
allowed_labels = [ii for ii, group in enumerate(index_groups) if group] |
label = self._rng.choice(allowed_labels) |
indices = list(set(index_groups[label])) |
max_obs_index = None if label == 2 else i |
use_exploration_obs = False if label == 2 else True |
o, obs_index, _ = self._sample_obs( |
indices=indices, |
observations=observations.values()[0], |
observation_states=states, |
path_to_obs=path_to_obs, |
max_obs_index=max_obs_index, |
use_exploration_obs=use_exploration_obs) |
query_index_in_observations.append(obs_index) |
if o is None: |
label = 0.0 |
o = curr_o |
validity_masks.append(0) |
else: |
validity_masks.append(1) |
queries.append(o.values()[0]) |
labels.append(label) |
query = np.concatenate([np.expand_dims(q, axis=0) for q in queries], axis=0) |
def one_hot(label, num_labels=3): |
a = np.zeros((num_labels,), dtype=np.float) |
a[int(label)] = 1.0 |
return a |
outputs = np.stack([one_hot(l) for l in labels], axis=0) |
validity_mask = np.reshape( |
np.array(validity_masks, dtype=np.int32), [-1, 1]) |
self.info['query_index_in_observations'] = query_index_in_observations |
self.info['observation_states'] = states |
return observations, query, (outputs, validity_mask) |
def target_loss(self, truth, predicted, weights=None): |
pass |
class NeighboringQueriesTask(RandomExplorationBasedTask): |
"""A task of identifying whether two queries are closeby or not. |
The proximity between queries is defined by the length of the shorest path |
between them. |
""" |
def __init__( |
self, |
max_distance=1, |
*args, |
**kwargs): |
"""Initializes a NeighboringQueriesTask. |
Args: |
max_distance: integer, the maximum distance in terms of number of vertices |
between the two queries, so that they are considered neighboring. |
*args: for super class. |
**kwargs: for super class. |
""" |
super(NeighboringQueriesTask, self).__init__(*args, **kwargs) |
self._max_distance = max_distance |
if len(self.config.inputs.keys()) != 1: |
raise NotImplementedError('current implementation supports input ' |
'with only one modality type') |
def episode(self): |
"""Episode data. |
Returns: |
observations: a tuple with one element. This element is a numpy array of |
size in_seq_len x observation_size x observation_size x 3 containing |
in_seq_len images. |
query: a numpy array of size |
2 x observation_size X observation_size x 3 containing a pair of query |
images. |
A tuple of size two. First element is a numpy array of size 2 containing |
a one hot vector of whether the two observations are neighobring. Second |
element is a boolean numpy value denoting whether this is a valid |
episode. |
""" |
observations, states, path = self._exploration() |
assert len(observations.values()[0]) == len(states) |
path_to_obs, _ = self._obs_to_state(path, states) |
path = [p for p in path if p in path_to_obs] |
query1_index = self._rng.choice(path) |
label = self._rng.randint(2) |
closest_indices = nx.single_source_shortest_path( |
self._env.graph, query1_index, self._max_distance).keys() |
if label == 0: |
indices = [p for p in path if p not in closest_indices] |
else: |
indices = [p for p in closest_indices if p in path] |
query2_index = self._rng.choice(indices) |
query1, query1_index, _ = self._sample_obs( |
[query1_index], |
observations.values()[0], |
states, |
path_to_obs, |
max_obs_index=None, |
use_exploration_obs=True) |
query2, query2_index, _ = self._sample_obs( |
[query2_index], |
observations.values()[0], |
states, |
path_to_obs, |
max_obs_index=None, |
use_exploration_obs=True) |
queries = np.concatenate( |
[np.expand_dims(q, axis=0) for q in [query1, query2]]) |
labels = np.array([0, 0]) |
labels[label] = 1 |
is_valid = np.array([1]) |
self.info['observation_states'] = states |
self.info['query_indices_in_observations'] = [query1_index, query2_index] |
return observations, queries, (labels, is_valid) |
def target_loss(self, truth, predicted, weights=None): |
pass |
class GotoStaticXTask(RandomExplorationBasedTask): |
"""Task go to a static X. |
If continuous reward is used only one goal is allowed so that the reward can |
be computed as a delta-distance to that goal.. |
""" |
def __init__(self, |
step_reward=0.0, |
goal_reward=1.0, |
hit_wall_reward=-1.0, |
done_at_target=False, |
use_continuous_reward=False, |
*args, |
**kwargs): |
super(GotoStaticXTask, self).__init__(*args, **kwargs) |
if len(self.config.inputs.keys()) > 1: |
raise NotImplementedError('current implementation supports input ' |
'with only one modality type or less.') |
self._step_reward = step_reward |
self._goal_reward = goal_reward |
self._hit_wall_reward = hit_wall_reward |
self._done_at_target = done_at_target |
self._use_continuous_reward = use_continuous_reward |
self._previous_path_length = None |
def episode(self): |
observations, _, path = self._exploration() |
if len(path) < 2: |
raise ValueError('The exploration path has only one node.') |
g = self._env.graph |
start = path[-1] |
while True: |
goal = self._rng.choice(path[:-1]) |
if goal != start: |
break |
goal_path = nx.shortest_path(g, start, goal) |
init_orientation = self._rng.uniform(0, np.pi, (1,)) |
trajectory = np.array( |
[list(self._env.vertex_to_pose(p)) for p in goal_path]) |
init_xy = np.reshape(trajectory[0, :], [-1]) |
init_state = np.concatenate([init_xy, init_orientation], 0) |
trajectory = trajectory[1:, :] |
deltas = envs_util.trajectory_to_deltas(trajectory, init_state) |
output_seq_len = self._config.output.shape[0] |
arr = _pad_or_clip_array(deltas, output_seq_len, output_mask=True) |
thetas, _, thetas_mask = arr |
query = self._env.observation(self._env.vertex_to_pose(goal)).values()[0] |
return observations, query, (thetas, thetas_mask) |
def reward(self, obs, done, info): |
if 'wall_collision' in info and info['wall_collision']: |
return obs, self._hit_wall_reward, done, info |
reward = 0.0 |
current_vertex = self._env.pose_to_vertex(self._env.state) |
if current_vertex in self._env.targets(): |
if self._done_at_target: |
done = True |
else: |
obs = self._env.reset() |
reward = self._goal_reward |
else: |
if self._use_continuous_reward: |
if len(self._env.targets()) != 1: |
raise ValueError( |
'FindX task with continuous reward is assuming only one target.') |
goal_vertex = self._env.targets()[0] |
path_length = self._compute_path_length(goal_vertex) |
reward = self._previous_path_length - path_length |
self._previous_path_length = path_length |
else: |
reward = self._step_reward |
return obs, reward, done, info |
def _compute_path_length(self, goal_vertex): |
current_vertex = self._env.pose_to_vertex(self._env.state) |
path = nx.shortest_path(self._env.graph, current_vertex, goal_vertex) |
assert len(path) >= 2 |
curr_xy = np.array(self._env.state[:2]) |
next_xy = np.array(self._env.vertex_to_pose(path[1])) |
last_step_distance = np.linalg.norm(next_xy - curr_xy) |
return (len(path) - 2) * self._env.cell_size_px + last_step_distance |
def reset(self, observation): |
if self._use_continuous_reward: |
if len(self._env.targets()) != 1: |
raise ValueError( |
'FindX task with continuous reward is assuming only one target.') |
goal_vertex = self._env.targets()[0] |
self._previous_path_length = self._compute_path_length(goal_vertex) |
def target_loss(self, truth, predicted, weights=None): |
"""Action classification loss. |
Args: |
truth: a batch_size x sequence length x number of labels float |
Tensor containing a one hot vector for each label in each batch and |
time. |
predicted: a batch_size x sequence length x number of labels float |
Tensor containing a predicted distribution over all actions. |
weights: a batch_size x sequence_length float Tensor of bool |
denoting which actions are valid. |
Returns: |
An average cross entropy over all batches and elements in sequence. |
""" |
return classification_loss( |
truth=truth, predicted=predicted, weights=weights, is_one_hot=True) |
class RelativeLocationTask(RandomExplorationBasedTask): |
"""A task of estimating the relative location of a query w.r.t current. |
It is to be used for debugging. It is designed such that the output is a |
single value, out of a discrete set of values, so that it can be phrased as |
a classification problem. |
""" |
def __init__(self, num_labels, *args, **kwargs): |
"""Initializes a relative location task. |
Args: |
num_labels: integer, number of orientations to bin the relative |
orientation into. |
*args: see super class. |
**kwargs: see super class. |
""" |
super(RelativeLocationTask, self).__init__(*args, **kwargs) |
self._num_labels = num_labels |
if len(self.config.inputs.keys()) != 1: |
raise NotImplementedError('current implementation supports input ' |
'with only one modality type') |
def episode(self): |
observations, states, path = self._exploration() |
path_to_obs, _ = self._obs_to_state(path, states) |
use_exploration_obs = not self._add_query_noise |
query, _, query_state = self._sample_obs( |
path[:-1], |
observations.values()[0], |
states, |
path_to_obs, |
max_obs_index=None, |
use_exploration_obs=use_exploration_obs) |
x, y, theta = tuple(states[-1]) |
q_x, q_y, _ = tuple(query_state) |
t_x, t_y = q_x - x, q_y - y |
(rt_x, rt_y) = (np.sin(theta) * t_x - np.cos(theta) * t_y, |
np.cos(theta) * t_x + np.sin(theta) * t_y) |
shift = np.pi * (1 - 1.0 / (2.0 * self._num_labels)) |
orientation = np.arctan2(rt_y, rt_x) + shift |
if orientation < 0: |
orientation += 2 * np.pi |
label = int(np.floor(self._num_labels * orientation / (2 * np.pi))) |
out_shape = self._config.output.shape |
if len(out_shape) != 1: |
raise ValueError('Output shape should be of rank 1.') |
if out_shape[0] != self._num_labels: |
raise ValueError('Output shape must be of size %d' % self._num_labels) |
output = np.zeros(out_shape, dtype=np.float32) |
output[label] = 1 |
return observations, query, (output, None) |
def target_loss(self, truth, predicted, weights=None): |
return classification_loss( |
truth=truth, predicted=predicted, weights=weights, is_one_hot=True) |
class LocationClassificationTask(UnrolledTask): |
"""A task of classifying a location as one of several classes. |
The task does not have an input, but just a query and an output. The query |
is an observation of the current location, e.g. an image taken from the |
current state. The output is a label classifying this location in one of |
predefined set of locations (or landmarks). |
The current implementation classifies locations as intersections based on the |
number and directions of biforcations. It is expected that a location can have |
at most 4 different directions, aligned with the axes. As each of these four |
directions might be present or not, the number of possible intersections are |
2^4 = 16. |
""" |
def __init__(self, env, seed, *args, **kwargs): |
super(LocationClassificationTask, self).__init__(*args, **kwargs) |
self._env = env |
self._rng = np.random.RandomState(seed) |
self._location = None |
if len(self.config.inputs.keys()) > 1: |
raise NotImplementedError('current implementation supports input ' |
'with only one modality type or less.') |
@property |
def location(self): |
return self._location |
@location.setter |
def location(self, location): |
self._location = location |
def episode(self): |
location = self._location |
if location is None: |
num_nodes = self._env.graph.number_of_nodes() |
vertex = int(math.floor(self._rng.uniform(0, num_nodes))) |
xy = self._env.vertex_to_pose(vertex) |
theta = self._rng.uniform(0, 2 * math.pi) |
location = np.concatenate( |
[np.reshape(xy, [-1]), np.array([theta])], axis=0) |
else: |
vertex = self._env.pose_to_vertex(location) |
theta = location[2] |
neighbors = self._env.graph.neighbors(vertex) |
xy_s = [self._env.vertex_to_pose(n) for n in neighbors] |
def rotate(xy, theta): |
"""Rotates a vector around the origin by angle theta. |
Args: |
xy: a numpy darray of shape (2, ) of floats containing the x and y |
coordinates of a vector. |
theta: a python float containing the rotation angle in radians. |
Returns: |
A numpy darray of floats of shape (2,) containing the x and y |
coordinates rotated xy. |
""" |
rotated_x = np.cos(theta) * xy[0] - np.sin(theta) * xy[1] |
rotated_y = np.sin(theta) * xy[0] + np.cos(theta) * xy[1] |
return np.array([rotated_x, rotated_y]) |
xy_s = [ |
rotate(xy - location[0:2], -location[2] - math.pi / 4) for xy in xy_s |
] |
th_s = [np.arctan2(xy[1], xy[0]) for xy in xy_s] |
out_shape = self._config.output.shape |
if len(out_shape) != 1: |
raise ValueError('Output shape should be of rank 1.') |
num_labels = out_shape[0] |
if num_labels != 16: |
raise ValueError('Currently only 16 labels are supported ' |
'(there are 16 different 4 way intersection types).') |
th_s = set([int(math.floor(4 * (th / (2 * np.pi) + 0.5))) for th in th_s]) |
one_hot_label = np.zeros((num_labels,), dtype=np.float32) |
label = 0 |
for th in th_s: |
label += pow(2, th) |
one_hot_label[int(label)] = 1.0 |
query = self._env.observation(location).values()[0] |
return [], query, (one_hot_label, None) |
def reward(self, obs, done, info): |
raise ValueError('Do not call.') |
def target_loss(self, truth, predicted, weights=None): |
return classification_loss( |
truth=truth, predicted=predicted, weights=weights, is_one_hot=True) |
class GotoStaticXNoExplorationTask(UnrolledTask): |
"""An interface for findX tasks without exploration. |
The agent is initialized a random location in a random world and a random goal |
and the objective is for the agent to move toward the goal. This class |
generates episode for such task. Each generates a sequence of observations x |
and target outputs y. x is the observations and is an OrderedDict with keys |
provided from config.inputs.keys() and the shapes provided in the |
config.inputs. The output is a numpy arrays with the shape specified in the |
config.output. The shape of the array is (sequence_length x action_size) where |
action is the number of actions that can be done in the environment. Note that |
config.output.shape should be set according to the number of actions that can |
be done in the env. |
target outputs y are the groundtruth value of each action that is computed |
from the environment graph. The target output for each action is proportional |
to the progress that each action makes. Target value of 1 means that the |
action takes the agent one step closer, -1 means the action takes the agent |
one step farther. Value of -2 means that action should not take place at all. |
This can be because the action leads to collision or it wants to terminate the |
episode prematurely. |
""" |
def __init__(self, env, *args, **kwargs): |
super(GotoStaticXNoExplorationTask, self).__init__(*args, **kwargs) |
if self._config.query is not None: |
raise ValueError('query should be None.') |
if len(self._config.output.shape) != 2: |
raise ValueError('output should only have two dimensions:' |
'(sequence_length x number_of_actions)') |
for input_config in self._config.inputs.values(): |
if input_config.shape[0] != self._config.output.shape[0]: |
raise ValueError('the first dimension of the input and output should' |
'be the same.') |
if len(self._config.output.shape) != 2: |
raise ValueError('output shape should be ' |
'(sequence_length x number_of_actions)') |
self._env = env |
def _compute_shortest_path_length(self, vertex, target_vertices): |
"""Computes length of the shortest path from vertex to any target vertexes. |
Args: |
vertex: integer, index of the vertex in the environment graph. |
target_vertices: list of the target vertexes |
Returns: |
integer, minimum distance from the vertex to any of the target_vertices. |
Raises: |
ValueError: if there is no path between the vertex and at least one of |
the target_vertices. |
""" |
try: |
return np.min([ |
len(nx.shortest_path(self._env.graph, vertex, t)) |
for t in target_vertices |
]) |
except: |
raise |
def _compute_gt_value(self, vertex, target_vertices): |
"""Computes groundtruth value of all the actions at the vertex. |
The value of each action is the difference each action makes in the length |
of the shortest path to the goal. If an action takes the agent one step |
closer to the goal the value is 1. In case, it takes the agent one step away |
from the goal it would be -1. If it leads to collision or if the agent uses |
action stop before reaching to the goal it is -2. To avoid scale issues the |
gt_values are multipled by 0.5. |
Args: |
vertex: integer, the index of current vertex. |
target_vertices: list of the integer indexes of the target views. |
Returns: |
numpy array with shape (action_size,) and each element is the groundtruth |
value of each action based on the progress each action makes. |
""" |
action_size = self._config.output.shape[1] |
output_value = np.ones((action_size), dtype=np.float32) * -2 |
my_distance = self._compute_shortest_path_length(vertex, target_vertices) |
for adj in self._env.graph[vertex]: |
adj_distance = self._compute_shortest_path_length(adj, target_vertices) |
if adj_distance is None: |
continue |
action_index = self._env.action( |
self._env.vertex_to_pose(vertex), self._env.vertex_to_pose(adj)) |
assert action_index is not None, ('{} is not adjacent to {}. There might ' |
'be a problem in environment graph ' |
'connectivity because there is no ' |
'direct edge between the given ' |
'vertices').format( |
self._env.vertex_to_pose(vertex), |
self._env.vertex_to_pose(adj)) |
output_value[action_index] = my_distance - adj_distance |
return output_value * 0.5 |
def episode(self): |
"""Returns data needed to train and test a single episode. |
Returns: |
(inputs, None, output) where inputs is a dictionary of modality types to |
numpy arrays. The second element is query but we assume that the goal |
is also given as part of observation so it should be None for this task, |
and the outputs is the tuple of ground truth action values with the |
shape of (sequence_length x action_size) that is coming from |
config.output.shape and a numpy array with the shape of |
(sequence_length,) that is 1 if the corresponding element of the |
input and output should be used in the training optimization. |
Raises: |
ValueError: If the output values for env.random_step_sequence is not |
valid. |
ValueError: If the shape of observations coming from the env is not |
consistent with the config. |
ValueError: If there is a modality type specified in the config but the |
environment does not return that. |
""" |
sequence_length = self._config.inputs.values()[0].shape[0] |
modality_types = self._config.inputs.keys() |
path, _, _, step_outputs = self._env.random_step_sequence( |
max_len=sequence_length) |
target_vertices = [self._env.pose_to_vertex(x) for x in self._env.targets()] |
if len(path) != len(step_outputs): |
raise ValueError('path, and step_outputs should have equal length' |
' {}!={}'.format(len(path), len(step_outputs))) |
observations = collections.OrderedDict([k, []] for k in modality_types) |
for step_output in step_outputs: |
obs_dict = step_output[0] |
for modality_type in modality_types: |
if modality_type not in obs_dict: |
raise ValueError('modality type is not returned from the environment.' |
'{} not in {}'.format(modality_type, |
obs_dict.keys())) |
obs = obs_dict[modality_type] |
if np.any( |
obs.shape != tuple(self._config.inputs[modality_type].shape[1:])): |
raise ValueError( |
'The observations should have the same size as speicifed in' |
'config for modality type {}. {} != {}'.format( |
modality_type, obs.shape, |
self._config.inputs[modality_type].shape[1:])) |
observations[modality_type].append(obs) |
gt_value = [self._compute_gt_value(v, target_vertices) for v in path] |
gt_value, _, value_mask = _pad_or_clip_array( |
np.array(gt_value), |
sequence_length, |
is_front_clip=False, |
output_mask=True, |
) |
for modality_type, obs in observations.iteritems(): |
observations[modality_type], _, mask = _pad_or_clip_array( |
np.array(obs), sequence_length, is_front_clip=False, output_mask=True) |
assert np.all(mask == value_mask) |
return observations, None, (gt_value, value_mask) |
def reset(self, observation): |
"""Called after the environment is reset.""" |
pass |
def target_loss(self, true_targets, targets, weights=None): |
"""A loss for training a task model. |
This loss measures the discrepancy between the task outputs, the true and |
predicted ones. |
Args: |
true_targets: tf.Tensor of tf.float32 with the shape of |
(batch_size x sequence_length x action_size). |
targets: tf.Tensor of tf.float32 with the shape of |
(batch_size x sequence_length x action_size). |
weights: tf.Tensor of tf.bool with the shape of |
(batch_size x sequence_length). |
Raises: |
ValueError: if the shapes of the input tensors are not consistent. |
Returns: |
L2 loss between the predicted action values and true action values. |
""" |
targets_shape = targets.get_shape().as_list() |
true_targets_shape = true_targets.get_shape().as_list() |
if len(targets_shape) != 3 or len(true_targets_shape) != 3: |
raise ValueError('invalid shape for targets or true_targets_shape') |
if np.any(targets_shape != true_targets_shape): |
raise ValueError('the shape of targets and true_targets are not the same' |
'{} != {}'.format(targets_shape, true_targets_shape)) |
if weights is not None: |
weights_shape = weights.get_shape().as_list() |
if np.any(weights_shape != targets_shape[0:2]): |
raise ValueError('The first two elements of weights shape should match' |
'target. {} != {}'.format(weights_shape, |
targets_shape)) |
true_targets = tf.boolean_mask(true_targets, weights) |
targets = tf.boolean_mask(targets, weights) |
return tf.losses.mean_squared_error(tf.reshape(targets, [-1]), |
tf.reshape(true_targets, [-1])) |
def reward(self, obs, done, info): |
raise NotImplementedError('reward is not implemented for this task') |
class NewTask(UnrolledTask): |
def __init__(self, env, *args, **kwargs): |
super(NewTask, self).__init__(*args, **kwargs) |
self._env = env |
def _compute_shortest_path_length(self, vertex, target_vertices): |
"""Computes length of the shortest path from vertex to any target vertexes. |
Args: |
vertex: integer, index of the vertex in the environment graph. |
target_vertices: list of the target vertexes |
Returns: |
integer, minimum distance from the vertex to any of the target_vertices. |
Raises: |
ValueError: if there is no path between the vertex and at least one of |
the target_vertices. |
""" |
try: |
return np.min([ |
len(nx.shortest_path(self._env.graph, vertex, t)) |
for t in target_vertices |
]) |
except: |
logging.error('there is no path between vertex %d and at least one of ' |
'the targets %r', vertex, target_vertices) |
raise |
def _compute_gt_value(self, vertex, target_vertices): |
"""Computes groundtruth value of all the actions at the vertex. |
The value of each action is the difference each action makes in the length |
of the shortest path to the goal. If an action takes the agent one step |
closer to the goal the value is 1. In case, it takes the agent one step away |
from the goal it would be -1. If it leads to collision or if the agent uses |
action stop before reaching to the goal it is -2. To avoid scale issues the |
gt_values are multipled by 0.5. |
Args: |
vertex: integer, the index of current vertex. |
target_vertices: list of the integer indexes of the target views. |
Returns: |
numpy array with shape (action_size,) and each element is the groundtruth |
value of each action based on the progress each action makes. |
""" |
action_size = self._config.output.shape[1] |
output_value = np.ones((action_size), dtype=np.float32) * -2 |
my_distance = self._compute_shortest_path_length(vertex, target_vertices) |
for adj in self._env.graph[vertex]: |
adj_distance = self._compute_shortest_path_length(adj, target_vertices) |
if adj_distance is None: |
continue |
action_index = self._env.action( |
self._env.vertex_to_pose(vertex), self._env.vertex_to_pose(adj)) |
assert action_index is not None, ('{} is not adjacent to {}. There might ' |
'be a problem in environment graph ' |
'connectivity because there is no ' |
'direct edge between the given ' |
'vertices').format( |
self._env.vertex_to_pose(vertex), |
self._env.vertex_to_pose(adj)) |
output_value[action_index] = my_distance - adj_distance |
return output_value * 0.5 |
def episode(self): |
"""Returns data needed to train and test a single episode. |
Returns: |
(inputs, None, output) where inputs is a dictionary of modality types to |
numpy arrays. The second element is query but we assume that the goal |
is also given as part of observation so it should be None for this task, |
and the outputs is the tuple of ground truth action values with the |
shape of (sequence_length x action_size) that is coming from |
config.output.shape and a numpy array with the shape of |
(sequence_length,) that is 1 if the corresponding element of the |
input and output should be used in the training optimization. |
Raises: |
ValueError: If the output values for env.random_step_sequence is not |
valid. |
ValueError: If the shape of observations coming from the env is not |
consistent with the config. |
ValueError: If there is a modality type specified in the config but the |
environment does not return that. |
""" |
sequence_length = self._config.inputs.values()[0].shape[0] |
modality_types = self._config.inputs.keys() |
path, _, _, step_outputs = self._env.random_step_sequence( |
max_len=sequence_length) |
target_vertices = [self._env.pose_to_vertex(x) for x in self._env.targets()] |
if len(path) != len(step_outputs): |
raise ValueError('path, and step_outputs should have equal length' |
' {}!={}'.format(len(path), len(step_outputs))) |
observations = collections.OrderedDict([k, []] for k in modality_types) |
for step_output in step_outputs: |
obs_dict = step_output[0] |
for modality_type in modality_types: |
if modality_type not in obs_dict: |
raise ValueError('modality type is not returned from the environment.' |
'{} not in {}'.format(modality_type, |
obs_dict.keys())) |
obs = obs_dict[modality_type] |
if np.any( |
obs.shape != tuple(self._config.inputs[modality_type].shape[1:])): |
raise ValueError( |
'The observations should have the same size as speicifed in' |
'config for modality type {}. {} != {}'.format( |
modality_type, obs.shape, |
self._config.inputs[modality_type].shape[1:])) |
observations[modality_type].append(obs) |
gt_value = [self._compute_gt_value(v, target_vertices) for v in path] |
gt_value, _, value_mask = _pad_or_clip_array( |
np.array(gt_value), |
sequence_length, |
is_front_clip=False, |
output_mask=True, |
) |
for modality_type, obs in observations.iteritems(): |
observations[modality_type], _, mask = _pad_or_clip_array( |
np.array(obs), sequence_length, is_front_clip=False, output_mask=True) |
assert np.all(mask == value_mask) |
return observations, None, (gt_value, value_mask) |
def reset(self, observation): |
"""Called after the environment is reset.""" |
pass |
def target_loss(self, true_targets, targets, weights=None): |
"""A loss for training a task model. |
This loss measures the discrepancy between the task outputs, the true and |
predicted ones. |
Args: |
true_targets: tf.Tensor of tf.float32 with the shape of |
(batch_size x sequence_length x action_size). |
targets: tf.Tensor of tf.float32 with the shape of |
(batch_size x sequence_length x action_size). |
weights: tf.Tensor of tf.bool with the shape of |
(batch_size x sequence_length). |
Raises: |
ValueError: if the shapes of the input tensors are not consistent. |
Returns: |
L2 loss between the predicted action values and true action values. |
""" |
targets_shape = targets.get_shape().as_list() |
true_targets_shape = true_targets.get_shape().as_list() |
if len(targets_shape) != 3 or len(true_targets_shape) != 3: |
raise ValueError('invalid shape for targets or true_targets_shape') |
if np.any(targets_shape != true_targets_shape): |
raise ValueError('the shape of targets and true_targets are not the same' |
'{} != {}'.format(targets_shape, true_targets_shape)) |
if weights is not None: |
weights_shape = weights.get_shape().as_list() |
if np.any(weights_shape != targets_shape[0:2]): |
raise ValueError('The first two elements of weights shape should match' |
'target. {} != {}'.format(weights_shape, |
targets_shape)) |
true_targets = tf.boolean_mask(true_targets, weights) |
targets = tf.boolean_mask(targets, weights) |
return tf.losses.mean_squared_error(tf.reshape(targets, [-1]), |
tf.reshape(true_targets, [-1])) |
def reward(self, obs, done, info): |
raise NotImplementedError('reward is not implemented for this task') |