DQN playing LunarLander-v2 from https://github.com/sgoodfriend/rl-algo-impls/tree/1d4094fbcc9082de7f53f4348dd4c7c354152907
9839b09
CartPole-v1: &cartpole-defaults | |
n_timesteps: !!float 4e5 | |
policy_hyperparams: | |
hidden_sizes: [32] | |
algo_hyperparams: | |
steps_per_epoch: 4096 | |
pi_lr: 0.01 | |
gamma: 0.99 | |
lam: 1 | |
val_lr: 0.01 | |
train_v_iters: 80 | |
eval_params: | |
step_freq: !!float 2.5e4 | |
n_episodes: 10 | |
save_best: true | |
CartPole-v0: | |
<<: *cartpole-defaults | |
n_timesteps: !!float 1e5 | |
algo_hyperparams: | |
steps_per_epoch: 1024 | |
pi_lr: 0.01 | |
gamma: 0.99 | |
lam: 1 | |
val_lr: 0.01 | |
train_v_iters: 80 | |
Acrobot-v1: | |
n_timesteps: !!float 2e5 | |
policy_hyperparams: | |
hidden_sizes: [32, 32] | |
algo_hyperparams: | |
steps_per_epoch: 2048 | |
pi_lr: 0.005 | |
gamma: 0.99 | |
lam: 0.97 | |
val_lr: 0.01 | |
train_v_iters: 80 | |
max_grad_norm: 0.5 | |
eval_params: | |
step_freq: !!float 4e4 | |
n_episodes: 10 | |
save_best: true | |
LunarLander-v2: | |
n_timesteps: !!float 4e6 | |
policy_hyperparams: | |
hidden_sizes: [256, 256] | |
algo_hyperparams: | |
steps_per_epoch: 2048 | |
pi_lr: 0.0001 | |
gamma: 0.999 | |
lam: 0.97 | |
val_lr: 0.0001 | |
train_v_iters: 80 | |
max_grad_norm: 0.5 | |
eval_params: | |
step_freq: !!float 5e4 | |
n_episodes: 10 | |
save_best: true | |
CarRacing-v0: | |
n_timesteps: !!float 4e6 | |
env_hyperparams: | |
frame_stack: 4 | |
n_envs: 4 | |
vec_env_class: "dummy" | |
policy_hyperparams: | |
hidden_sizes: [256, 256] | |
algo_hyperparams: | |
steps_per_epoch: 4000 | |
pi_lr: !!float 7e-5 | |
gamma: 0.99 | |
lam: 0.95 | |
val_lr: !!float 1e-4 | |
train_v_iters: 40 | |
max_grad_norm: 0.5 | |
eval_params: | |
step_freq: !!float 5e4 | |
n_episodes: 10 | |
save_best: true | |
HalfCheetahBulletEnv-v0: &pybullet-defaults | |
n_timesteps: !!float 2e6 | |
policy_hyperparams: | |
hidden_sizes: [64, 64] | |
init_layers_orthogonal: true | |
algo_hyperparams: | |
steps_per_epoch: 4000 | |
pi_lr: !!float 3e-4 | |
gamma: 0.99 | |
lam: 0.97 | |
val_lr: !!float 1e-3 | |
train_v_iters: 80 | |
max_grad_norm: 0.5 | |
eval_params: | |
step_freq: !!float 1e5 | |
n_episodes: 10 | |
save_best: true | |
HopperBulletEnv-v0: | |
<<: *pybullet-defaults | |
AntBulletEnv-v0: | |
<<: *pybullet-defaults | |
policy_hyperparams: | |
hidden_sizes: [400, 300] | |
algo_hyperparams: | |
pi_lr: !!float 7e-4 | |
gamma: 0.99 | |
lam: 0.97 | |
val_lr: !!float 7e-3 | |
train_v_iters: 80 | |
max_grad_norm: 0.5 | |
FrozenLake-v1: | |
n_timesteps: !!float 8e5 | |
env_params: | |
make_kwargs: | |
map_name: 8x8 | |
is_slippery: true | |
policy_hyperparams: | |
hidden_sizes: [64] | |
algo_hyperparams: | |
steps_per_epoch: 2048 | |
pi_lr: 0.01 | |
gamma: 0.99 | |
lam: 0.98 | |
val_lr: 0.01 | |
train_v_iters: 80 | |
max_grad_norm: 0.5 | |
eval_params: | |
step_freq: !!float 5e4 | |
n_episodes: 10 | |
save_best: true | |
SpaceInvadersNoFrameskip-v4: &atari-defaults | |
n_timesteps: !!float 1e7 | |
env_hyperparams: | |
frame_stack: 4 | |
no_reward_timeout_steps: 1_000 | |
n_envs: 8 | |
vec_env_class: "subproc" | |
policy_hyperparams: | |
hidden_sizes: [256, 256] | |
algo_hyperparams: | |
steps_per_epoch: 4096 | |
pi_lr: !!float 1e-4 | |
gamma: 0.99 | |
lam: 0.95 | |
val_lr: !!float 2e-4 | |
train_v_iters: 80 | |
max_grad_norm: 0.5 | |
eval_params: | |
step_freq: !!float 1e5 | |
n_episodes: 10 | |
save_best: true | |