File size: 2,360 Bytes
9839b09 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 |
CartPole-v1: &cartpole-defaults
n_timesteps: !!float 5e4
env_hyperparams:
rolling_length: 50
policy_hyperparams:
hidden_sizes: [256, 256]
algo_hyperparams:
learning_rate: !!float 2.3e-3
batch_size: 64
buffer_size: 100000
learning_starts: 1000
gamma: 0.99
target_update_interval: 10
train_freq: 256
gradient_steps: 128
exploration_fraction: 0.16
exploration_final_eps: 0.04
eval_params:
step_freq: !!float 1e4
CartPole-v0:
<<: *cartpole-defaults
n_timesteps: !!float 4e4
MountainCar-v0:
n_timesteps: !!float 1.2e5
env_hyperparams:
rolling_length: 50
policy_hyperparams:
hidden_sizes: [256, 256]
algo_hyperparams:
learning_rate: !!float 4e-3
batch_size: 128
buffer_size: 10000
learning_starts: 1000
gamma: 0.98
target_update_interval: 600
train_freq: 16
gradient_steps: 8
exploration_fraction: 0.2
exploration_final_eps: 0.07
Acrobot-v1:
n_timesteps: !!float 1e5
env_hyperparams:
rolling_length: 50
policy_hyperparams:
hidden_sizes: [256, 256]
algo_hyperparams:
learning_rate: !!float 6.3e-4
batch_size: 128
buffer_size: 50000
learning_starts: 0
gamma: 0.99
target_update_interval: 250
train_freq: 4
gradient_steps: -1
exploration_fraction: 0.12
exploration_final_eps: 0.1
LunarLander-v2:
n_timesteps: !!float 5e5
env_hyperparams:
rolling_length: 50
policy_hyperparams:
hidden_sizes: [256, 256]
algo_hyperparams:
learning_rate: !!float 1e-4
batch_size: 256
buffer_size: 100000
learning_starts: 10000
gamma: 0.99
target_update_interval: 250
train_freq: 8
gradient_steps: -1
exploration_fraction: 0.12
exploration_final_eps: 0.1
max_grad_norm: 0.5
eval_params:
step_freq: 25_000
atari: &atari-defaults
n_timesteps: !!float 1e7
env_hyperparams:
frame_stack: 4
no_reward_timeout_steps: 1_000
n_envs: 8
vec_env_class: "subproc"
algo_hyperparams:
buffer_size: 100000
learning_rate: !!float 1e-4
batch_size: 32
learning_starts: 100000
target_update_interval: 1000
train_freq: 8
gradient_steps: 2
exploration_fraction: 0.1
exploration_final_eps: 0.01
eval_params:
deterministic: false
PongNoFrameskip-v4:
<<: *atari-defaults
n_timesteps: !!float 2.5e6
|