Spaces:
Sleeping
Sleeping
from easydict import EasyDict | |
import os | |
import gym | |
from ding.envs import BaseEnv, DingEnvWrapper | |
from ding.envs.env_wrappers import MaxAndSkipWrapper, WarpFrameWrapper, ScaledFloatFrameWrapper, FrameStackWrapper, \ | |
EvalEpisodeReturnWrapper, TransposeWrapper, TimeLimitWrapper, FlatObsWrapper, GymToGymnasiumWrapper | |
from ding.policy import PPOFPolicy | |
def get_instance_config(env_id: str, algorithm: str) -> EasyDict: | |
if algorithm == 'PPOF': | |
cfg = PPOFPolicy.default_config() | |
if env_id == 'LunarLander-v2': | |
cfg.n_sample = 512 | |
cfg.value_norm = 'popart' | |
cfg.entropy_weight = 1e-3 | |
elif env_id == 'LunarLanderContinuous-v2': | |
cfg.action_space = 'continuous' | |
cfg.n_sample = 400 | |
elif env_id == 'BipedalWalker-v3': | |
cfg.learning_rate = 1e-3 | |
cfg.action_space = 'continuous' | |
cfg.n_sample = 1024 | |
elif env_id == 'Pendulum-v1': | |
cfg.action_space = 'continuous' | |
cfg.n_sample = 400 | |
elif env_id == 'acrobot': | |
cfg.learning_rate = 1e-4 | |
cfg.n_sample = 400 | |
elif env_id == 'rocket_landing': | |
cfg.n_sample = 2048 | |
cfg.adv_norm = False | |
cfg.model = dict( | |
encoder_hidden_size_list=[64, 64, 128], | |
actor_head_hidden_size=128, | |
critic_head_hidden_size=128, | |
) | |
elif env_id == 'drone_fly': | |
cfg.action_space = 'continuous' | |
cfg.adv_norm = False | |
cfg.epoch_per_collect = 5 | |
cfg.learning_rate = 5e-5 | |
cfg.n_sample = 640 | |
elif env_id == 'hybrid_moving': | |
cfg.action_space = 'hybrid' | |
cfg.n_sample = 3200 | |
cfg.entropy_weight = 0.03 | |
cfg.batch_size = 320 | |
cfg.adv_norm = False | |
cfg.model = dict( | |
encoder_hidden_size_list=[256, 128, 64, 64], | |
sigma_type='fixed', | |
fixed_sigma_value=0.3, | |
bound_type='tanh', | |
) | |
elif env_id == 'evogym_carrier': | |
cfg.action_space = 'continuous' | |
cfg.n_sample = 2048 | |
cfg.batch_size = 256 | |
cfg.epoch_per_collect = 10 | |
cfg.learning_rate = 3e-3 | |
elif env_id == 'mario': | |
cfg.n_sample = 256 | |
cfg.batch_size = 64 | |
cfg.epoch_per_collect = 2 | |
cfg.learning_rate = 1e-3 | |
cfg.model = dict( | |
encoder_hidden_size_list=[64, 64, 128], | |
critic_head_hidden_size=128, | |
actor_head_hidden_size=128, | |
) | |
elif env_id == 'di_sheep': | |
cfg.n_sample = 3200 | |
cfg.batch_size = 320 | |
cfg.epoch_per_collect = 10 | |
cfg.learning_rate = 3e-4 | |
cfg.adv_norm = False | |
cfg.entropy_weight = 0.001 | |
elif env_id == 'procgen_bigfish': | |
cfg.n_sample = 16384 | |
cfg.batch_size = 16384 | |
cfg.epoch_per_collect = 10 | |
cfg.learning_rate = 5e-4 | |
cfg.model = dict( | |
encoder_hidden_size_list=[64, 128, 256], | |
critic_head_hidden_size=256, | |
actor_head_hidden_size=256, | |
) | |
elif env_id in ['KangarooNoFrameskip-v4', 'BowlingNoFrameskip-v4']: | |
cfg.n_sample = 1024 | |
cfg.batch_size = 128 | |
cfg.epoch_per_collect = 10 | |
cfg.learning_rate = 0.0001 | |
cfg.model = dict( | |
encoder_hidden_size_list=[32, 64, 64, 128], | |
actor_head_hidden_size=128, | |
critic_head_hidden_size=128, | |
critic_head_layer_num=2, | |
) | |
elif env_id == 'PongNoFrameskip-v4': | |
cfg.n_sample = 3200 | |
cfg.batch_size = 320 | |
cfg.epoch_per_collect = 10 | |
cfg.learning_rate = 3e-4 | |
cfg.model = dict( | |
encoder_hidden_size_list=[64, 64, 128], | |
actor_head_hidden_size=128, | |
critic_head_hidden_size=128, | |
) | |
elif env_id == 'SpaceInvadersNoFrameskip-v4': | |
cfg.n_sample = 320 | |
cfg.batch_size = 320 | |
cfg.epoch_per_collect = 1 | |
cfg.learning_rate = 1e-3 | |
cfg.entropy_weight = 0.01 | |
cfg.lr_scheduler = (2000, 0.1) | |
cfg.model = dict( | |
encoder_hidden_size_list=[64, 64, 128], | |
actor_head_hidden_size=128, | |
critic_head_hidden_size=128, | |
) | |
elif env_id == 'QbertNoFrameskip-v4': | |
cfg.n_sample = 3200 | |
cfg.batch_size = 320 | |
cfg.epoch_per_collect = 10 | |
cfg.learning_rate = 5e-4 | |
cfg.lr_scheduler = (1000, 0.1) | |
cfg.model = dict( | |
encoder_hidden_size_list=[64, 64, 128], | |
actor_head_hidden_size=128, | |
critic_head_hidden_size=128, | |
) | |
elif env_id == 'minigrid_fourroom': | |
cfg.n_sample = 3200 | |
cfg.batch_size = 320 | |
cfg.learning_rate = 3e-4 | |
cfg.epoch_per_collect = 10 | |
cfg.entropy_weight = 0.001 | |
elif env_id == 'metadrive': | |
cfg.learning_rate = 3e-4 | |
cfg.action_space = 'continuous' | |
cfg.entropy_weight = 0.001 | |
cfg.n_sample = 3000 | |
cfg.epoch_per_collect = 10 | |
cfg.learning_rate = 0.0001 | |
cfg.model = dict( | |
encoder_hidden_size_list=[32, 64, 64, 128], | |
actor_head_hidden_size=128, | |
critic_head_hidden_size=128, | |
critic_head_layer_num=2, | |
) | |
elif env_id == 'Hopper-v3': | |
cfg.action_space = "continuous" | |
cfg.n_sample = 3200 | |
cfg.batch_size = 320 | |
cfg.epoch_per_collect = 10 | |
cfg.learning_rate = 3e-4 | |
elif env_id == 'HalfCheetah-v3': | |
cfg.action_space = "continuous" | |
cfg.n_sample = 3200 | |
cfg.batch_size = 320 | |
cfg.epoch_per_collect = 10 | |
cfg.learning_rate = 3e-4 | |
elif env_id == 'Walker2d-v3': | |
cfg.action_space = "continuous" | |
cfg.n_sample = 3200 | |
cfg.batch_size = 320 | |
cfg.epoch_per_collect = 10 | |
cfg.learning_rate = 3e-4 | |
else: | |
raise KeyError("not supported env type: {}".format(env_id)) | |
else: | |
raise KeyError("not supported algorithm type: {}".format(algorithm)) | |
return cfg | |
def get_instance_env(env_id: str) -> BaseEnv: | |
if env_id == 'LunarLander-v2': | |
return DingEnvWrapper(gym.make('LunarLander-v2')) | |
elif env_id == 'LunarLanderContinuous-v2': | |
return DingEnvWrapper(gym.make('LunarLanderContinuous-v2', continuous=True)) | |
elif env_id == 'BipedalWalker-v3': | |
return DingEnvWrapper(gym.make('BipedalWalker-v3'), cfg={'act_scale': True, 'rew_clip': True}) | |
elif env_id == 'Pendulum-v1': | |
return DingEnvWrapper(gym.make('Pendulum-v1'), cfg={'act_scale': True}) | |
elif env_id == 'acrobot': | |
return DingEnvWrapper(gym.make('Acrobot-v1')) | |
elif env_id == 'rocket_landing': | |
from dizoo.rocket.envs import RocketEnv | |
cfg = EasyDict({ | |
'task': 'landing', | |
'max_steps': 800, | |
}) | |
return RocketEnv(cfg) | |
elif env_id == 'drone_fly': | |
from dizoo.gym_pybullet_drones.envs import GymPybulletDronesEnv | |
cfg = EasyDict({ | |
'env_id': 'flythrugate-aviary-v0', | |
'action_type': 'VEL', | |
}) | |
return GymPybulletDronesEnv(cfg) | |
elif env_id == 'hybrid_moving': | |
import gym_hybrid | |
return DingEnvWrapper(gym.make('Moving-v0')) | |
elif env_id == 'evogym_carrier': | |
import evogym.envs | |
from evogym import sample_robot, WorldObject | |
path = os.path.join(os.path.dirname(__file__), '../../dizoo/evogym/envs/world_data/carry_bot.json') | |
robot_object = WorldObject.from_json(path) | |
body = robot_object.get_structure() | |
return DingEnvWrapper( | |
gym.make('Carrier-v0', body=body), | |
cfg={ | |
'env_wrapper': [ | |
lambda env: TimeLimitWrapper(env, max_limit=300), | |
lambda env: EvalEpisodeReturnWrapper(env), | |
] | |
} | |
) | |
elif env_id == 'mario': | |
import gym_super_mario_bros | |
from nes_py.wrappers import JoypadSpace | |
return DingEnvWrapper( | |
JoypadSpace(gym_super_mario_bros.make("SuperMarioBros-1-1-v1"), [["right"], ["right", "A"]]), | |
cfg={ | |
'env_wrapper': [ | |
lambda env: MaxAndSkipWrapper(env, skip=4), | |
lambda env: WarpFrameWrapper(env, size=84), | |
lambda env: ScaledFloatFrameWrapper(env), | |
lambda env: FrameStackWrapper(env, n_frames=4), | |
lambda env: TimeLimitWrapper(env, max_limit=200), | |
lambda env: EvalEpisodeReturnWrapper(env), | |
] | |
} | |
) | |
elif env_id == 'di_sheep': | |
from sheep_env import SheepEnv | |
return DingEnvWrapper(SheepEnv(level=9)) | |
elif env_id == 'procgen_bigfish': | |
return DingEnvWrapper( | |
gym.make('procgen:procgen-bigfish-v0', start_level=0, num_levels=1), | |
cfg={ | |
'env_wrapper': [ | |
lambda env: TransposeWrapper(env), | |
lambda env: ScaledFloatFrameWrapper(env), | |
lambda env: EvalEpisodeReturnWrapper(env), | |
] | |
}, | |
seed_api=False, | |
) | |
elif env_id == 'Hopper-v3': | |
cfg = EasyDict( | |
env_id='Hopper-v3', | |
env_wrapper='mujoco_default', | |
act_scale=True, | |
rew_clip=True, | |
) | |
return DingEnvWrapper(gym.make('Hopper-v3'), cfg=cfg) | |
elif env_id == 'HalfCheetah-v3': | |
cfg = EasyDict( | |
env_id='HalfCheetah-v3', | |
env_wrapper='mujoco_default', | |
act_scale=True, | |
rew_clip=True, | |
) | |
return DingEnvWrapper(gym.make('HalfCheetah-v3'), cfg=cfg) | |
elif env_id == 'Walker2d-v3': | |
cfg = EasyDict( | |
env_id='Walker2d-v3', | |
env_wrapper='mujoco_default', | |
act_scale=True, | |
rew_clip=True, | |
) | |
return DingEnvWrapper(gym.make('Walker2d-v3'), cfg=cfg) | |
elif env_id in [ | |
'BowlingNoFrameskip-v4', | |
'BreakoutNoFrameskip-v4', | |
'GopherNoFrameskip-v4' | |
'KangarooNoFrameskip-v4', | |
'PongNoFrameskip-v4', | |
'QbertNoFrameskip-v4', | |
'SpaceInvadersNoFrameskip-v4', | |
]: | |
cfg = EasyDict({ | |
'env_id': env_id, | |
'env_wrapper': 'atari_default', | |
}) | |
ding_env_atari = DingEnvWrapper(gym.make(env_id), cfg=cfg) | |
return ding_env_atari | |
elif env_id == 'minigrid_fourroom': | |
import gymnasium | |
return DingEnvWrapper( | |
gymnasium.make('MiniGrid-FourRooms-v0'), | |
cfg={ | |
'env_wrapper': [ | |
lambda env: GymToGymnasiumWrapper(env), | |
lambda env: FlatObsWrapper(env), | |
lambda env: TimeLimitWrapper(env, max_limit=300), | |
lambda env: EvalEpisodeReturnWrapper(env), | |
] | |
} | |
) | |
elif env_id == 'metadrive': | |
from dizoo.metadrive.env.drive_env import MetaDrivePPOOriginEnv | |
from dizoo.metadrive.env.drive_wrapper import DriveEnvWrapper | |
cfg = dict( | |
map='XSOS', | |
horizon=4000, | |
out_of_road_penalty=40.0, | |
crash_vehicle_penalty=40.0, | |
out_of_route_done=True, | |
) | |
cfg = EasyDict(cfg) | |
return DriveEnvWrapper(MetaDrivePPOOriginEnv(cfg)) | |
else: | |
raise KeyError("not supported env type: {}".format(env_id)) | |
def get_hybrid_shape(action_space) -> EasyDict: | |
return EasyDict({ | |
'action_type_shape': action_space[0].n, | |
'action_args_shape': action_space[1].shape, | |
}) | |