Spaces:
Sleeping
Sleeping
from typing import Dict | |
import gym | |
import numpy as np | |
from ditk import logging | |
from ding.envs import ObsNormWrapper, StaticObsNormWrapper, RewardNormWrapper | |
try: | |
import d4rl # register d4rl enviroments with open ai gym | |
except ImportError: | |
logging.warning("not found d4rl env, please install it, refer to https://github.com/rail-berkeley/d4rl") | |
def wrap_d4rl( | |
env_id, | |
norm_obs: Dict = dict(use_norm=False, offline_stats=dict(use_offline_stats=False, )), | |
norm_reward: Dict = dict(use_norm=False, ), | |
only_info=False | |
) -> gym.Env: | |
r""" | |
Overview: | |
Wrap Mujoco Env to preprocess env step's return info, e.g. observation normalization, reward normalization, etc. | |
Arguments: | |
- env_id (:obj:`str`): Mujoco environment id, for example "HalfCheetah-v3" | |
- norm_obs (:obj:`EasyDict`): Whether to normalize observation or not | |
- norm_reward (:obj:`EasyDict`): Whether to normalize reward or not. For evaluator, environment's reward \ | |
should not be normalized: Either ``norm_reward`` is None or ``norm_reward.use_norm`` is False can do this. | |
Returns: | |
- wrapped_env (:obj:`gym.Env`): The wrapped mujoco environment | |
""" | |
if not only_info: | |
env = gym.make(env_id) | |
if norm_obs is not None and norm_obs.use_norm: | |
offline_stats = norm_obs.get('offline_stats', dict(use_offline_stats=False)) | |
if offline_stats.use_offline_stats: | |
env = StaticObsNormWrapper(env, offline_stats.mean, offline_stats.std) | |
else: | |
env = ObsNormWrapper(env) | |
if norm_reward is not None and norm_reward.use_norm: | |
env = RewardNormWrapper(env, norm_reward.reward_discount) | |
return env | |
else: | |
wrapper_info = '' | |
if norm_obs is not None and norm_obs.use_norm: | |
offline_stats = norm_obs.get('offline_stats', dict(use_offline_stats=False)) | |
if offline_stats.use_offline_stats: | |
wrapper_info = StaticObsNormWrapper.__name__ + '\n' | |
else: | |
wrapper_info = ObsNormWrapper.__name__ + '\n' | |
if norm_reward is not None and norm_reward.use_norm: | |
wrapper_info += RewardNormWrapper.__name__ + '\n' | |
return wrapper_info | |