!!python/object/apply:collections.OrderedDict - - - batch_size - 128 - - clip_range - 0.2 - - ent_coef - 0.0 - - env_wrapper - - rl_zoo3.wrappers.FrameSkip: skip: 2 - gymnasium.wrappers.resize_observation.ResizeObservation: shape: 64 - gymnasium.wrappers.gray_scale_observation.GrayScaleObservation: keep_dim: true - - frame_stack - 2 - - gae_lambda - 0.95 - - gamma - 0.99 - - learning_rate - lin_1e-4 - - max_grad_norm - 0.5 - - n_envs - 8 - - n_epochs - 10 - - n_steps - 512 - - n_timesteps - 1000000 - - normalize - '{''norm_obs'': False, ''norm_reward'': True}' - - policy - CnnPolicy - - policy_kwargs - dict(log_std_init=-2, ortho_init=False, activation_fn=nn.GELU, net_arch=dict(pi=[256], vf=[256]), ) - - sde_sample_freq - 4 - - use_sde - true - - vf_coef - 0.5