#-*-Python-*- create_maze_env.env_name = "AntFall" context_range = (%CONTEXT_RANGE_MIN, %CONTEXT_RANGE_MAX) meta_context_range = ((-4, -4, 0), (12, 28, 5)) RESET_EPISODE_PERIOD = 500 RESET_ENV_PERIOD = 1 # End episode every N steps UvfAgent.reset_episode_cond_fn = @every_n_steps every_n_steps.n = %RESET_EPISODE_PERIOD train_uvf.max_steps_per_episode = %RESET_EPISODE_PERIOD # Do a manual reset every N episodes UvfAgent.reset_env_cond_fn = @every_n_episodes every_n_episodes.n = %RESET_ENV_PERIOD every_n_episodes.steps_per_episode = %RESET_EPISODE_PERIOD ## Config defaults EVAL_MODES = ["eval1"] ## Config agent CONTEXT = @agent/Context META_CONTEXT = @meta/Context ## Config agent context agent/Context.context_ranges = [%context_range] agent/Context.context_shapes = [%SUBGOAL_DIM] agent/Context.meta_action_every_n = 10 agent/Context.samplers = { "train": [@train/DirectionSampler], "explore": [@train/DirectionSampler], } agent/Context.context_transition_fn = @relative_context_transition_fn agent/Context.context_multi_transition_fn = @relative_context_multi_transition_fn agent/Context.reward_fn = @uvf/negative_distance ## Config meta context meta/Context.context_ranges = [%meta_context_range] meta/Context.context_shapes = [3] meta/Context.samplers = { "train": [@train/RandomSampler], "explore": [@train/RandomSampler], "eval1": [@eval1/ConstantSampler], } meta/Context.reward_fn = @task/negative_distance ## Config rewards task/negative_distance.state_indices = [0, 1, 2] task/negative_distance.relative_context = False task/negative_distance.diff = False task/negative_distance.offset = 0.0 ## Config samplers train/RandomSampler.context_range = %meta_context_range train/DirectionSampler.context_range = %context_range train/DirectionSampler.k = %SUBGOAL_DIM relative_context_transition_fn.k = %SUBGOAL_DIM relative_context_multi_transition_fn.k = %SUBGOAL_DIM MetaAgent.k = %SUBGOAL_DIM eval1/ConstantSampler.value = [0, 27, 4.5]