|
|
|
create_maze_env.env_name = "AntBlock" |
|
ZERO_OBS = False |
|
context_range = (%CONTEXT_RANGE_MIN, %CONTEXT_RANGE_MAX) |
|
meta_context_range = ((-4, -4), (20, 20)) |
|
|
|
RESET_EPISODE_PERIOD = 500 |
|
RESET_ENV_PERIOD = 1 |
|
|
|
UvfAgent.reset_episode_cond_fn = @every_n_steps |
|
every_n_steps.n = %RESET_EPISODE_PERIOD |
|
train_uvf.max_steps_per_episode = %RESET_EPISODE_PERIOD |
|
|
|
UvfAgent.reset_env_cond_fn = @every_n_episodes |
|
every_n_episodes.n = %RESET_ENV_PERIOD |
|
every_n_episodes.steps_per_episode = %RESET_EPISODE_PERIOD |
|
|
|
|
|
EVAL_MODES = ["eval1", "eval2", "eval3"] |
|
|
|
|
|
CONTEXT = @agent/Context |
|
META_CONTEXT = @meta/Context |
|
|
|
|
|
agent/Context.context_ranges = [%context_range] |
|
agent/Context.context_shapes = [%SUBGOAL_DIM] |
|
agent/Context.meta_action_every_n = 10 |
|
agent/Context.samplers = { |
|
"train": [@train/DirectionSampler], |
|
"explore": [@train/DirectionSampler], |
|
} |
|
|
|
agent/Context.context_transition_fn = @relative_context_transition_fn |
|
agent/Context.context_multi_transition_fn = @relative_context_multi_transition_fn |
|
|
|
agent/Context.reward_fn = @uvf/negative_distance |
|
|
|
|
|
meta/Context.context_ranges = [%meta_context_range] |
|
meta/Context.context_shapes = [2] |
|
meta/Context.samplers = { |
|
"train": [@train/RandomSampler], |
|
"explore": [@train/RandomSampler], |
|
"eval1": [@eval1/ConstantSampler], |
|
"eval2": [@eval2/ConstantSampler], |
|
"eval3": [@eval3/ConstantSampler], |
|
} |
|
meta/Context.reward_fn = @task/negative_distance |
|
|
|
|
|
task/negative_distance.state_indices = [3, 4] |
|
task/negative_distance.relative_context = False |
|
task/negative_distance.diff = False |
|
task/negative_distance.offset = 0.0 |
|
|
|
|
|
train/RandomSampler.context_range = %meta_context_range |
|
train/DirectionSampler.context_range = %context_range |
|
train/DirectionSampler.k = %SUBGOAL_DIM |
|
relative_context_transition_fn.k = %SUBGOAL_DIM |
|
relative_context_multi_transition_fn.k = %SUBGOAL_DIM |
|
MetaAgent.k = %SUBGOAL_DIM |
|
|
|
eval1/ConstantSampler.value = [16, 0] |
|
eval2/ConstantSampler.value = [16, 16] |
|
eval3/ConstantSampler.value = [0, 16] |
|
|