|
|
|
|
|
agent/CircularBuffer.buffer_size = 200000 |
|
meta/CircularBuffer.buffer_size = 200000 |
|
agent/CircularBuffer.scope = "agent" |
|
meta/CircularBuffer.scope = "meta" |
|
|
|
|
|
train_uvf.environment = @create_maze_env() |
|
train_uvf.agent_class = %AGENT_CLASS |
|
train_uvf.meta_agent_class = %META_CLASS |
|
train_uvf.state_preprocess_class = %STATE_PREPROCESS_CLASS |
|
train_uvf.inverse_dynamics_class = %INVERSE_DYNAMICS_CLASS |
|
train_uvf.replay_buffer = @agent/CircularBuffer() |
|
train_uvf.meta_replay_buffer = @meta/CircularBuffer() |
|
train_uvf.critic_optimizer = @critic/AdamOptimizer() |
|
train_uvf.actor_optimizer = @actor/AdamOptimizer() |
|
train_uvf.meta_critic_optimizer = @meta_critic/AdamOptimizer() |
|
train_uvf.meta_actor_optimizer = @meta_actor/AdamOptimizer() |
|
train_uvf.repr_optimizer = @repr/AdamOptimizer() |
|
train_uvf.num_episodes_train = 25000 |
|
train_uvf.batch_size = 100 |
|
train_uvf.initial_episodes = 5 |
|
train_uvf.gamma = 0.99 |
|
train_uvf.meta_gamma = 0.99 |
|
train_uvf.reward_scale_factor = 1.0 |
|
train_uvf.target_update_period = 2 |
|
train_uvf.num_updates_per_observation = 1 |
|
train_uvf.num_collect_per_update = 1 |
|
train_uvf.num_collect_per_meta_update = 10 |
|
train_uvf.debug_summaries = False |
|
train_uvf.log_every_n_steps = 1000 |
|
train_uvf.save_policy_every_n_steps =100000 |
|
|
|
|
|
critic/AdamOptimizer.learning_rate = 0.001 |
|
critic/AdamOptimizer.beta1 = 0.9 |
|
critic/AdamOptimizer.beta2 = 0.999 |
|
actor/AdamOptimizer.learning_rate = 0.0001 |
|
actor/AdamOptimizer.beta1 = 0.9 |
|
actor/AdamOptimizer.beta2 = 0.999 |
|
|
|
meta_critic/AdamOptimizer.learning_rate = 0.001 |
|
meta_critic/AdamOptimizer.beta1 = 0.9 |
|
meta_critic/AdamOptimizer.beta2 = 0.999 |
|
meta_actor/AdamOptimizer.learning_rate = 0.0001 |
|
meta_actor/AdamOptimizer.beta1 = 0.9 |
|
meta_actor/AdamOptimizer.beta2 = 0.999 |
|
|
|
repr/AdamOptimizer.learning_rate = 0.0001 |
|
repr/AdamOptimizer.beta1 = 0.9 |
|
repr/AdamOptimizer.beta2 = 0.999 |
|
|