|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
"""Defines many boolean functions indicating when to step and reset. |
|
""" |
|
|
|
import tensorflow as tf |
|
import gin.tf |
|
|
|
|
|
@gin.configurable |
|
def env_transition(agent, state, action, transition_type, environment_steps, |
|
num_episodes): |
|
"""True if the transition_type is TRANSITION or FINAL_TRANSITION. |
|
|
|
Args: |
|
agent: RL agent. |
|
state: A [num_state_dims] tensor representing a state. |
|
action: Action performed. |
|
transition_type: Type of transition after action |
|
environment_steps: Number of steps performed by environment. |
|
num_episodes: Number of episodes. |
|
Returns: |
|
cond: Returns an op that evaluates to true if the transition type is |
|
not RESTARTING |
|
""" |
|
del agent, state, action, num_episodes, environment_steps |
|
cond = tf.logical_not(transition_type) |
|
return cond |
|
|
|
|
|
@gin.configurable |
|
def env_restart(agent, state, action, transition_type, environment_steps, |
|
num_episodes): |
|
"""True if the transition_type is RESTARTING. |
|
|
|
Args: |
|
agent: RL agent. |
|
state: A [num_state_dims] tensor representing a state. |
|
action: Action performed. |
|
transition_type: Type of transition after action |
|
environment_steps: Number of steps performed by environment. |
|
num_episodes: Number of episodes. |
|
Returns: |
|
cond: Returns an op that evaluates to true if the transition type equals |
|
RESTARTING. |
|
""" |
|
del agent, state, action, num_episodes, environment_steps |
|
cond = tf.identity(transition_type) |
|
return cond |
|
|
|
|
|
@gin.configurable |
|
def every_n_steps(agent, |
|
state, |
|
action, |
|
transition_type, |
|
environment_steps, |
|
num_episodes, |
|
n=150): |
|
"""True once every n steps. |
|
|
|
Args: |
|
agent: RL agent. |
|
state: A [num_state_dims] tensor representing a state. |
|
action: Action performed. |
|
transition_type: Type of transition after action |
|
environment_steps: Number of steps performed by environment. |
|
num_episodes: Number of episodes. |
|
n: Return true once every n steps. |
|
Returns: |
|
cond: Returns an op that evaluates to true if environment_steps |
|
equals 0 mod n. We increment the step before checking this condition, so |
|
we do not need to add one to environment_steps. |
|
""" |
|
del agent, state, action, transition_type, num_episodes |
|
cond = tf.equal(tf.mod(environment_steps, n), 0) |
|
return cond |
|
|
|
|
|
@gin.configurable |
|
def every_n_episodes(agent, |
|
state, |
|
action, |
|
transition_type, |
|
environment_steps, |
|
num_episodes, |
|
n=2, |
|
steps_per_episode=None): |
|
"""True once every n episodes. |
|
|
|
Specifically, evaluates to True on the 0th step of every nth episode. |
|
Unlike environment_steps, num_episodes starts at 0, so we do want to add |
|
one to ensure it does not reset on the first call. |
|
|
|
Args: |
|
agent: RL agent. |
|
state: A [num_state_dims] tensor representing a state. |
|
action: Action performed. |
|
transition_type: Type of transition after action |
|
environment_steps: Number of steps performed by environment. |
|
num_episodes: Number of episodes. |
|
n: Return true once every n episodes. |
|
steps_per_episode: How many steps per episode. Needed to determine when a |
|
new episode starts. |
|
Returns: |
|
cond: Returns an op that evaluates to true on the last step of the episode |
|
(i.e. if num_episodes equals 0 mod n). |
|
""" |
|
assert steps_per_episode is not None |
|
del agent, action, transition_type |
|
ant_fell = tf.logical_or(state[2] < 0.2, state[2] > 1.0) |
|
cond = tf.logical_and( |
|
tf.logical_or( |
|
ant_fell, |
|
tf.equal(tf.mod(num_episodes + 1, n), 0)), |
|
tf.equal(tf.mod(environment_steps, steps_per_episode), 0)) |
|
return cond |
|
|
|
|
|
@gin.configurable |
|
def failed_reset_after_n_episodes(agent, |
|
state, |
|
action, |
|
transition_type, |
|
environment_steps, |
|
num_episodes, |
|
steps_per_episode=None, |
|
reset_state=None, |
|
max_dist=1.0, |
|
epsilon=1e-10): |
|
"""Every n episodes, returns True if the reset agent fails to return. |
|
|
|
Specifically, evaluates to True if the distance between the state and the |
|
reset state is greater than max_dist at the end of the episode. |
|
|
|
Args: |
|
agent: RL agent. |
|
state: A [num_state_dims] tensor representing a state. |
|
action: Action performed. |
|
transition_type: Type of transition after action |
|
environment_steps: Number of steps performed by environment. |
|
num_episodes: Number of episodes. |
|
steps_per_episode: How many steps per episode. Needed to determine when a |
|
new episode starts. |
|
reset_state: State to which the reset controller should return. |
|
max_dist: Agent is considered to have successfully reset if its distance |
|
from the reset_state is less than max_dist. |
|
epsilon: small offset to ensure non-negative/zero distance. |
|
Returns: |
|
cond: Returns an op that evaluates to true if num_episodes+1 equals 0 |
|
mod n. We add one to the num_episodes so the environment is not reset after |
|
the 0th step. |
|
""" |
|
assert steps_per_episode is not None |
|
assert reset_state is not None |
|
del agent, state, action, transition_type, num_episodes |
|
dist = tf.sqrt( |
|
tf.reduce_sum(tf.squared_difference(state, reset_state)) + epsilon) |
|
cond = tf.logical_and( |
|
tf.greater(dist, tf.constant(max_dist)), |
|
tf.equal(tf.mod(environment_steps, steps_per_episode), 0)) |
|
return cond |
|
|
|
|
|
@gin.configurable |
|
def q_too_small(agent, |
|
state, |
|
action, |
|
transition_type, |
|
environment_steps, |
|
num_episodes, |
|
q_min=0.5): |
|
"""True of q is too small. |
|
|
|
Args: |
|
agent: RL agent. |
|
state: A [num_state_dims] tensor representing a state. |
|
action: Action performed. |
|
transition_type: Type of transition after action |
|
environment_steps: Number of steps performed by environment. |
|
num_episodes: Number of episodes. |
|
q_min: Returns true if the qval is less than q_min |
|
Returns: |
|
cond: Returns an op that evaluates to true if qval is less than q_min. |
|
""" |
|
del transition_type, environment_steps, num_episodes |
|
state_for_reset_agent = tf.stack(state[:-1], tf.constant([0], dtype=tf.float)) |
|
qval = agent.BASE_AGENT_CLASS.critic_net( |
|
tf.expand_dims(state_for_reset_agent, 0), tf.expand_dims(action, 0))[0, :] |
|
cond = tf.greater(tf.constant(q_min), qval) |
|
return cond |
|
|
|
|
|
@gin.configurable |
|
def true_fn(agent, state, action, transition_type, environment_steps, |
|
num_episodes): |
|
"""Returns an op that evaluates to true. |
|
|
|
Args: |
|
agent: RL agent. |
|
state: A [num_state_dims] tensor representing a state. |
|
action: Action performed. |
|
transition_type: Type of transition after action |
|
environment_steps: Number of steps performed by environment. |
|
num_episodes: Number of episodes. |
|
Returns: |
|
cond: op that always evaluates to True. |
|
""" |
|
del agent, state, action, transition_type, environment_steps, num_episodes |
|
cond = tf.constant(True, dtype=tf.bool) |
|
return cond |
|
|
|
|
|
@gin.configurable |
|
def false_fn(agent, state, action, transition_type, environment_steps, |
|
num_episodes): |
|
"""Returns an op that evaluates to false. |
|
|
|
Args: |
|
agent: RL agent. |
|
state: A [num_state_dims] tensor representing a state. |
|
action: Action performed. |
|
transition_type: Type of transition after action |
|
environment_steps: Number of steps performed by environment. |
|
num_episodes: Number of episodes. |
|
Returns: |
|
cond: op that always evaluates to False. |
|
""" |
|
del agent, state, action, transition_type, environment_steps, num_episodes |
|
cond = tf.constant(False, dtype=tf.bool) |
|
return cond |
|
|