File size: 4,153 Bytes
0ca6846 85e4a43 0ca6846 85e4a43 0ca6846 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 |
CartPole-v1: &cartpole-defaults
n_timesteps: !!float 1e5
env_hyperparams:
n_envs: 8
algo_hyperparams:
n_steps: 32
batch_size: 256
n_epochs: 20
gae_lambda: 0.8
gamma: 0.98
ent_coef: 0.0
learning_rate: 0.001
learning_rate_decay: linear
clip_range: 0.2
clip_range_decay: linear
eval_params:
step_freq: !!float 2.5e4
CartPole-v0:
<<: *cartpole-defaults
n_timesteps: !!float 5e4
MountainCar-v0:
n_timesteps: !!float 1e6
env_hyperparams:
normalize: true
n_envs: 16
algo_hyperparams:
n_steps: 16
n_epochs: 4
gae_lambda: 0.98
gamma: 0.99
ent_coef: 0.0
MountainCarContinuous-v0:
n_timesteps: !!float 1e5
env_hyperparams:
normalize: true
n_envs: 4
# policy_hyperparams:
# init_layers_orthogonal: false
# log_std_init: -3.29
# use_sde: true
algo_hyperparams:
n_steps: 512
batch_size: 256
n_epochs: 10
learning_rate: !!float 7.77e-5
ent_coef: 0.01 # 0.00429
ent_coef_decay: linear
clip_range: 0.1
gae_lambda: 0.9
max_grad_norm: 5
vf_coef: 0.19
eval_params:
step_freq: 5000
Acrobot-v1:
n_timesteps: !!float 1e6
env_hyperparams:
n_envs: 16
normalize: true
algo_hyperparams:
n_steps: 256
n_epochs: 4
gae_lambda: 0.94
gamma: 0.99
ent_coef: 0.0
LunarLander-v2:
n_timesteps: !!float 1e6
env_hyperparams:
n_envs: 16
algo_hyperparams:
n_steps: 1024
batch_size: 64
n_epochs: 4
gae_lambda: 0.98
gamma: 0.999
ent_coef: 0.01
ent_coef_decay: linear
normalize_advantage: false
CarRacing-v0:
n_timesteps: !!float 4e6
env_hyperparams:
n_envs: 8
frame_stack: 4
policy_hyperparams:
use_sde: true
log_std_init: -2
init_layers_orthogonal: false
activation_fn: relu
share_features_extractor: false
cnn_feature_dim: 256
hidden_sizes: [256]
algo_hyperparams:
n_steps: 512
batch_size: 128
n_epochs: 10
learning_rate: !!float 1e-4
learning_rate_decay: linear
gamma: 0.99
gae_lambda: 0.95
ent_coef: 0.0
sde_sample_freq: 4
max_grad_norm: 0.5
vf_coef: 0.5
clip_range: 0.2
# BreakoutNoFrameskip-v4
# PongNoFrameskip-v4
# SpaceInvadersNoFrameskip-v4
# QbertNoFrameskip-v4
atari: &atari-defaults
n_timesteps: !!float 1e7
policy_hyperparams:
activation_fn: relu
env_hyperparams: &atari-env-defaults
n_envs: 8
frame_stack: 4
no_reward_timeout_steps: 1000
no_reward_fire_steps: 500
vec_env_class: subproc
algo_hyperparams:
n_steps: 128
batch_size: 256
n_epochs: 4
learning_rate: !!float 2.5e-4
learning_rate_decay: linear
clip_range: 0.1
clip_range_decay: linear
vf_coef: 0.5
ent_coef: 0.01
eval_params:
deterministic: false
HalfCheetahBulletEnv-v0: &pybullet-defaults
n_timesteps: !!float 2e6
env_hyperparams: &pybullet-env-defaults
n_envs: 16
normalize: true
policy_hyperparams: &pybullet-policy-defaults
pi_hidden_sizes: [256, 256]
v_hidden_sizes: [256, 256]
activation_fn: relu
algo_hyperparams: &pybullet-algo-defaults
n_steps: 512
batch_size: 128
n_epochs: 20
gamma: 0.99
gae_lambda: 0.9
ent_coef: 0.0
sde_sample_freq: 4
max_grad_norm: 0.5
vf_coef: 0.5
learning_rate: !!float 3e-5
clip_range: 0.4
AntBulletEnv-v0:
<<: *pybullet-defaults
policy_hyperparams:
<<: *pybullet-policy-defaults
algo_hyperparams:
<<: *pybullet-algo-defaults
Walker2DBulletEnv-v0:
<<: *pybullet-defaults
algo_hyperparams:
<<: *pybullet-algo-defaults
clip_range_decay: linear
HopperBulletEnv-v0:
<<: *pybullet-defaults
algo_hyperparams:
<<: *pybullet-algo-defaults
clip_range_decay: linear
HumanoidBulletEnv-v0:
<<: *pybullet-defaults
n_timesteps: !!float 1e7
env_hyperparams:
<<: *pybullet-env-defaults
n_envs: 8
policy_hyperparams:
<<: *pybullet-policy-defaults
# log_std_init: -1
algo_hyperparams:
<<: *pybullet-algo-defaults
n_steps: 2048
batch_size: 64
n_epochs: 10
gae_lambda: 0.95
learning_rate: !!float 2.5e-4
clip_range: 0.2
|