File size: 2,421 Bytes
97b6013
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
#-*-Python-*-
# NOTE: For best training, low-level exploration (uvf_add_noise_fn.stddev)
# should be reduced to around 0.1.
create_maze_env.env_name = "PointMaze"
context_range_min = -10
context_range_max = 10
context_range = (%context_range_min, %context_range_max)
meta_context_range = ((-2, -2), (10, 10))

RESET_EPISODE_PERIOD = 500
RESET_ENV_PERIOD = 1
# End episode every N steps
UvfAgent.reset_episode_cond_fn = @every_n_steps
every_n_steps.n = %RESET_EPISODE_PERIOD
train_uvf.max_steps_per_episode = %RESET_EPISODE_PERIOD
# Do a manual reset every N episodes
UvfAgent.reset_env_cond_fn = @every_n_episodes
every_n_episodes.n = %RESET_ENV_PERIOD
every_n_episodes.steps_per_episode = %RESET_EPISODE_PERIOD

## Config defaults
EVAL_MODES = ["eval1", "eval2", "eval3"]

## Config agent
CONTEXT = @agent/Context
META_CONTEXT = @meta/Context

## Config agent context
agent/Context.context_ranges = [%context_range]
agent/Context.context_shapes = [%SUBGOAL_DIM]
agent/Context.meta_action_every_n = 10
agent/Context.samplers = {
    "train": [@train/DirectionSampler],
    "explore": [@train/DirectionSampler],
    "eval1": [@uvf_eval1/ConstantSampler],
    "eval2": [@uvf_eval2/ConstantSampler],
    "eval3": [@uvf_eval3/ConstantSampler],
}

agent/Context.context_transition_fn = @relative_context_transition_fn
agent/Context.context_multi_transition_fn = @relative_context_multi_transition_fn

agent/Context.reward_fn = @uvf/negative_distance

## Config meta context
meta/Context.context_ranges = [%meta_context_range]
meta/Context.context_shapes = [2]
meta/Context.samplers = {
    "train": [@train/RandomSampler],
    "explore": [@train/RandomSampler],
    "eval1": [@eval1/ConstantSampler],
    "eval2": [@eval2/ConstantSampler],
    "eval3": [@eval3/ConstantSampler],
}
meta/Context.reward_fn = @task/negative_distance

## Config rewards
task/negative_distance.state_indices = [0, 1]
task/negative_distance.relative_context = False
task/negative_distance.diff = False
task/negative_distance.offset = 0.0

## Config samplers
train/RandomSampler.context_range = %meta_context_range
train/DirectionSampler.context_range = %context_range
train/DirectionSampler.k = %SUBGOAL_DIM
relative_context_transition_fn.k = %SUBGOAL_DIM
relative_context_multi_transition_fn.k = %SUBGOAL_DIM
MetaAgent.k = %SUBGOAL_DIM

eval1/ConstantSampler.value = [8, 0]
eval2/ConstantSampler.value = [8, 8]
eval3/ConstantSampler.value = [0, 8]