|
!!python/object/apply:collections.OrderedDict |
|
- - - batch_size |
|
- 128 |
|
- - clip_range |
|
- 0.2 |
|
- - ent_coef |
|
- 0.0 |
|
- - env_wrapper |
|
- - utils.wrappers.FrameSkip: |
|
skip: 2 |
|
- gym.wrappers.resize_observation.ResizeObservation: |
|
shape: 64 |
|
- gym.wrappers.gray_scale_observation.GrayScaleObservation: |
|
keep_dim: true |
|
- - frame_stack |
|
- 2 |
|
- - gae_lambda |
|
- 0.95 |
|
- - gamma |
|
- 0.99 |
|
- - learning_rate |
|
- lin_1e-4 |
|
- - max_grad_norm |
|
- 0.5 |
|
- - n_envs |
|
- 8 |
|
- - n_epochs |
|
- 10 |
|
- - n_steps |
|
- 512 |
|
- - n_timesteps |
|
- 4000000.0 |
|
- - normalize |
|
- '{''norm_obs'': False, ''norm_reward'': True}' |
|
- - policy |
|
- CnnPolicy |
|
- - policy_kwargs |
|
- dict(log_std_init=-2, ortho_init=False, activation_fn=nn.GELU, net_arch=[dict(pi=[256], |
|
vf=[256])], ) |
|
- - sde_sample_freq |
|
- 4 |
|
- - use_sde |
|
- true |
|
- - vf_coef |
|
- 0.5 |
|
|