NCTCMumbai's picture
Upload 2583 files
18ddfe2 verified
raw
history blame
6.85 kB
# Copyright 2018 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""An interface representing the topology of an environment.
Allows for high level planning and high level instruction generation for
navigation tasks.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import abc
import enum
import gym
import gin
@gin.config.constants_from_enum
class ModalityTypes(enum.Enum):
"""Types of the modalities that can be used."""
IMAGE = 0
SEMANTIC_SEGMENTATION = 1
OBJECT_DETECTION = 2
DEPTH = 3
GOAL = 4
PREV_ACTION = 5
PREV_SUCCESS = 6
STATE = 7
DISTANCE = 8
CAN_STEP = 9
def __lt__(self, other):
if self.__class__ is other.__class__:
return self.value < other.value
return NotImplemented
class TaskEnvInterface(object):
"""Interface for an environment topology.
An environment can implement this interface if there is a topological graph
underlying this environment. All paths below are defined as paths in this
graph. Using path_to_actions function one can translate a topological path
to a geometric path in the environment.
"""
__metaclass__ = abc.ABCMeta
@abc.abstractmethod
def random_step_sequence(self, min_len=None, max_len=None):
"""Generates a random sequence of actions and executes them.
Args:
min_len: integer, minimum length of a step sequence.
max_len: integer, if it is set to non-None, the method returns only
the first n steps of a random sequence. If the environment is
computationally heavy this argument should be set to speed up the
training and avoid unnecessary computations by the environment.
Returns:
A path, defined as a list of vertex indices, a list of actions, a list of
states, and a list of step() return tuples.
"""
raise NotImplementedError(
'Needs implementation as part of EnvTopology interface.')
@abc.abstractmethod
def targets(self):
"""A list of targets in the environment.
Returns:
A list of target locations.
"""
raise NotImplementedError(
'Needs implementation as part of EnvTopology interface.')
@abc.abstractproperty
def state(self):
"""Returns the position for the current location of agent."""
raise NotImplementedError(
'Needs implementation as part of EnvTopology interface.')
@abc.abstractproperty
def graph(self):
"""Returns a graph representing the environment topology.
Returns:
nx.Graph object.
"""
raise NotImplementedError(
'Needs implementation as part of EnvTopology interface.')
@abc.abstractmethod
def vertex_to_pose(self, vertex_index):
"""Maps a vertex index to a pose in the environment.
Pose of the camera can be represented by (x,y,theta) or (x,y,z,theta).
Args:
vertex_index: index of a vertex in the topology graph.
Returns:
A np.array of floats of size 3 or 4 representing the pose of the vertex.
"""
raise NotImplementedError(
'Needs implementation as part of EnvTopology interface.')
@abc.abstractmethod
def pose_to_vertex(self, pose):
"""Maps a coordinate in the maze to the closest vertex in topology graph.
Args:
pose: np.array of floats containing a the pose of the view.
Returns:
index of a vertex.
"""
raise NotImplementedError(
'Needs implementation as part of EnvTopology interface.')
@abc.abstractmethod
def observation(self, state):
"""Returns observation at location xy and orientation theta.
Args:
state: a np.array of floats containing coordinates of a location and
orientation.
Returns:
Dictionary of observations in the case of multiple observations.
The keys are the modality names and the values are the np.array of float
of observations for corresponding modality.
"""
raise NotImplementedError(
'Needs implementation as part of EnvTopology interface.')
def action(self, init_state, final_state):
"""Computes the transition action from state1 to state2.
If the environment is discrete and the views are not adjacent in the
environment. i.e. it is not possible to move from the first view to the
second view with one action it should return None. In the continuous case,
it will be the continuous difference of first view and second view.
Args:
init_state: numpy array, the initial view of the agent.
final_state: numpy array, the final view of the agent.
"""
raise NotImplementedError(
'Needs implementation as part of EnvTopology interface.')
@gin.configurable
class TaskEnv(gym.Env, TaskEnvInterface):
"""An environment which uses a Task to compute reward.
The environment implements a a gym interface, as well as EnvTopology. The
former makes sure it can be used within an RL training, while the latter
makes sure it can be used by a Task.
This environment requires _step_no_reward to be implemented, which steps
through it but does not return reward. Instead, the reward calculation is
delegated to the Task object, which in return can access needed properties
of the environment. These properties are exposed via the EnvTopology
interface.
"""
def __init__(self, task=None):
self._task = task
def set_task(self, task):
self._task = task
@abc.abstractmethod
def _step_no_reward(self, action):
"""Same as _step without returning reward.
Args:
action: see _step.
Returns:
state, done, info as defined in _step.
"""
raise NotImplementedError('Implement step.')
@abc.abstractmethod
def _reset_env(self):
"""Resets the environment. Returns initial observation."""
raise NotImplementedError('Implement _reset. Must call super!')
def step(self, action):
obs, done, info = self._step_no_reward(action)
reward = 0.0
if self._task is not None:
obs, reward, done, info = self._task.reward(obs, done, info)
return obs, reward, done, info
def reset(self):
"""Resets the environment. Gym API."""
obs = self._reset_env()
if self._task is not None:
self._task.reset(obs)
return obs