AdvaitBERT-AI_Explanability
/
models
/research
/efficient-hrl
/context
/configs
/ant_push_single.gin
#-*-Python-*- | |
create_maze_env.env_name = "AntPush" | |
context_range = (%CONTEXT_RANGE_MIN, %CONTEXT_RANGE_MAX) | |
meta_context_range = ((-16, -4), (16, 20)) | |
RESET_EPISODE_PERIOD = 500 | |
RESET_ENV_PERIOD = 1 | |
# End episode every N steps | |
UvfAgent.reset_episode_cond_fn = @every_n_steps | |
every_n_steps.n = %RESET_EPISODE_PERIOD | |
train_uvf.max_steps_per_episode = %RESET_EPISODE_PERIOD | |
# Do a manual reset every N episodes | |
UvfAgent.reset_env_cond_fn = @every_n_episodes | |
every_n_episodes.n = %RESET_ENV_PERIOD | |
every_n_episodes.steps_per_episode = %RESET_EPISODE_PERIOD | |
## Config defaults | |
EVAL_MODES = ["eval2"] | |
## Config agent | |
CONTEXT = @agent/Context | |
META_CONTEXT = @meta/Context | |
## Config agent context | |
agent/Context.context_ranges = [%context_range] | |
agent/Context.context_shapes = [%SUBGOAL_DIM] | |
agent/Context.meta_action_every_n = 10 | |
agent/Context.samplers = { | |
"train": [@train/DirectionSampler], | |
"explore": [@train/DirectionSampler], | |
} | |
agent/Context.context_transition_fn = @relative_context_transition_fn | |
agent/Context.context_multi_transition_fn = @relative_context_multi_transition_fn | |
agent/Context.reward_fn = @uvf/negative_distance | |
## Config meta context | |
meta/Context.context_ranges = [%meta_context_range] | |
meta/Context.context_shapes = [2] | |
meta/Context.samplers = { | |
"train": [@eval2/ConstantSampler], | |
"explore": [@eval2/ConstantSampler], | |
"eval2": [@eval2/ConstantSampler], | |
} | |
meta/Context.reward_fn = @task/negative_distance | |
## Config rewards | |
task/negative_distance.state_indices = [0, 1] | |
task/negative_distance.relative_context = False | |
task/negative_distance.diff = False | |
task/negative_distance.offset = 0.0 | |
## Config samplers | |
train/RandomSampler.context_range = %meta_context_range | |
train/DirectionSampler.context_range = %context_range | |
train/DirectionSampler.k = %SUBGOAL_DIM | |
relative_context_transition_fn.k = %SUBGOAL_DIM | |
relative_context_multi_transition_fn.k = %SUBGOAL_DIM | |
MetaAgent.k = %SUBGOAL_DIM | |
eval2/ConstantSampler.value = [0, 19] | |