#-*-Python-*- create_maze_env.env_name = "AntBlock" ZERO_OBS = False context_range = (%CONTEXT_RANGE_MIN, %CONTEXT_RANGE_MAX) meta_context_range = ((-4, -4), (20, 20)) RESET_EPISODE_PERIOD = 500 RESET_ENV_PERIOD = 1 # End episode every N steps UvfAgent.reset_episode_cond_fn = @every_n_steps every_n_steps.n = %RESET_EPISODE_PERIOD train_uvf.max_steps_per_episode = %RESET_EPISODE_PERIOD # Do a manual reset every N episodes UvfAgent.reset_env_cond_fn = @every_n_episodes every_n_episodes.n = %RESET_ENV_PERIOD every_n_episodes.steps_per_episode = %RESET_EPISODE_PERIOD ## Config defaults EVAL_MODES = ["eval1", "eval2", "eval3"] ## Config agent CONTEXT = @agent/Context META_CONTEXT = @meta/Context ## Config agent context agent/Context.context_ranges = [%context_range] agent/Context.context_shapes = [%SUBGOAL_DIM] agent/Context.meta_action_every_n = 10 agent/Context.samplers = { "train": [@train/DirectionSampler], "explore": [@train/DirectionSampler], } agent/Context.context_transition_fn = @relative_context_transition_fn agent/Context.context_multi_transition_fn = @relative_context_multi_transition_fn agent/Context.reward_fn = @uvf/negative_distance ## Config meta context meta/Context.context_ranges = [%meta_context_range] meta/Context.context_shapes = [2] meta/Context.samplers = { "train": [@train/RandomSampler], "explore": [@train/RandomSampler], "eval1": [@eval1/ConstantSampler], "eval2": [@eval2/ConstantSampler], "eval3": [@eval3/ConstantSampler], } meta/Context.reward_fn = @task/negative_distance ## Config rewards task/negative_distance.state_indices = [3, 4] task/negative_distance.relative_context = False task/negative_distance.diff = False task/negative_distance.offset = 0.0 ## Config samplers train/RandomSampler.context_range = %meta_context_range train/DirectionSampler.context_range = %context_range train/DirectionSampler.k = %SUBGOAL_DIM relative_context_transition_fn.k = %SUBGOAL_DIM relative_context_multi_transition_fn.k = %SUBGOAL_DIM MetaAgent.k = %SUBGOAL_DIM eval1/ConstantSampler.value = [16, 0] eval2/ConstantSampler.value = [16, 16] eval3/ConstantSampler.value = [0, 16]