|
|
|
|
|
|
|
create_maze_env.env_name = "PointMaze" |
|
context_range_min = -10 |
|
context_range_max = 10 |
|
context_range = (%context_range_min, %context_range_max) |
|
meta_context_range = ((-2, -2), (10, 10)) |
|
|
|
RESET_EPISODE_PERIOD = 500 |
|
RESET_ENV_PERIOD = 1 |
|
|
|
UvfAgent.reset_episode_cond_fn = @every_n_steps |
|
every_n_steps.n = %RESET_EPISODE_PERIOD |
|
train_uvf.max_steps_per_episode = %RESET_EPISODE_PERIOD |
|
|
|
UvfAgent.reset_env_cond_fn = @every_n_episodes |
|
every_n_episodes.n = %RESET_ENV_PERIOD |
|
every_n_episodes.steps_per_episode = %RESET_EPISODE_PERIOD |
|
|
|
|
|
EVAL_MODES = ["eval1", "eval2", "eval3"] |
|
|
|
|
|
CONTEXT = @agent/Context |
|
META_CONTEXT = @meta/Context |
|
|
|
|
|
agent/Context.context_ranges = [%context_range] |
|
agent/Context.context_shapes = [%SUBGOAL_DIM] |
|
agent/Context.meta_action_every_n = 10 |
|
agent/Context.samplers = { |
|
"train": [@train/DirectionSampler], |
|
"explore": [@train/DirectionSampler], |
|
"eval1": [@uvf_eval1/ConstantSampler], |
|
"eval2": [@uvf_eval2/ConstantSampler], |
|
"eval3": [@uvf_eval3/ConstantSampler], |
|
} |
|
|
|
agent/Context.context_transition_fn = @relative_context_transition_fn |
|
agent/Context.context_multi_transition_fn = @relative_context_multi_transition_fn |
|
|
|
agent/Context.reward_fn = @uvf/negative_distance |
|
|
|
|
|
meta/Context.context_ranges = [%meta_context_range] |
|
meta/Context.context_shapes = [2] |
|
meta/Context.samplers = { |
|
"train": [@train/RandomSampler], |
|
"explore": [@train/RandomSampler], |
|
"eval1": [@eval1/ConstantSampler], |
|
"eval2": [@eval2/ConstantSampler], |
|
"eval3": [@eval3/ConstantSampler], |
|
} |
|
meta/Context.reward_fn = @task/negative_distance |
|
|
|
|
|
task/negative_distance.state_indices = [0, 1] |
|
task/negative_distance.relative_context = False |
|
task/negative_distance.diff = False |
|
task/negative_distance.offset = 0.0 |
|
|
|
|
|
train/RandomSampler.context_range = %meta_context_range |
|
train/DirectionSampler.context_range = %context_range |
|
train/DirectionSampler.k = %SUBGOAL_DIM |
|
relative_context_transition_fn.k = %SUBGOAL_DIM |
|
relative_context_multi_transition_fn.k = %SUBGOAL_DIM |
|
MetaAgent.k = %SUBGOAL_DIM |
|
|
|
eval1/ConstantSampler.value = [8, 0] |
|
eval2/ConstantSampler.value = [8, 8] |
|
eval3/ConstantSampler.value = [0, 8] |
|
|