Spaces:
Sleeping
Sleeping
import numpy as np | |
import gym | |
from typing import Any, Union, List, Optional | |
import copy | |
import slimevolleygym | |
from gym.envs.registration import registry | |
from ding.envs import BaseEnv, BaseEnvTimestep | |
from ding.utils import ENV_REGISTRY | |
from ding.torch_utils import to_ndarray | |
class SlimeVolleyEnv(BaseEnv): | |
def __init__(self, cfg) -> None: | |
self._cfg = cfg | |
self._init_flag = False | |
self._replay_path = None | |
# agent_vs_bot env is single-agent env. obs, action, done, info are all single. | |
# agent_vs_agent env is double-agent env, obs, action, info are double, done is still single. | |
self._agent_vs_agent = cfg.agent_vs_agent | |
def seed(self, seed: int, dynamic_seed: bool = True) -> None: | |
self._seed = seed | |
self._dynamic_seed = dynamic_seed | |
np.random.seed(self._seed) | |
def close(self) -> None: | |
if self._init_flag: | |
self._env.close() | |
self._init_flag = False | |
def step(self, action: Union[np.ndarray, List[np.ndarray]]) -> BaseEnvTimestep: | |
if self._agent_vs_agent: | |
assert isinstance(action, List) and all([isinstance(e, np.ndarray) for e in action]) | |
action1, action2 = action[0], action[1] | |
else: | |
assert isinstance(action, np.ndarray) | |
action1, action2 = action, None | |
assert isinstance(action1, np.ndarray), type(action1) | |
assert action2 is None or isinstance(action1, np.ndarray), type(action2) | |
if action1.shape == (1, ): | |
action1 = action1.squeeze() # 0-dim array | |
if action2 is not None and action2.shape == (1, ): | |
action2 = action2.squeeze() # 0-dim array | |
action1 = SlimeVolleyEnv._process_action(action1) | |
action2 = SlimeVolleyEnv._process_action(action2) | |
# gym version >= 0.22.0 only support action in one variable, | |
# So we have to put two actions into one tuple. | |
obs1, rew, done, info = self._env.step((action1, action2)) | |
obs1 = to_ndarray(obs1).astype(np.float32) | |
self._eval_episode_return += rew | |
# info ('ale.lives', 'ale.otherLives', 'otherObs', 'state', 'otherState') | |
if self._agent_vs_agent: | |
info = [ | |
{ | |
'ale.lives': info['ale.lives'], | |
'state': info['state'] | |
}, { | |
'ale.lives': info['ale.otherLives'], | |
'state': info['otherState'], | |
'obs': info['otherObs'] | |
} | |
] | |
if done: | |
info[0]['eval_episode_return'] = self._eval_episode_return | |
info[1]['eval_episode_return'] = -self._eval_episode_return | |
info[0]['result'] = self.get_episode_result(self._eval_episode_return) | |
info[1]['result'] = self.get_episode_result(-self._eval_episode_return) | |
else: | |
if done: | |
info['eval_episode_return'] = self._eval_episode_return | |
info['result'] = self.get_episode_result(self._eval_episode_return) | |
reward = to_ndarray([rew]).astype(np.float32) | |
if self._agent_vs_agent: | |
obs2 = info[1]['obs'] | |
obs2 = to_ndarray(obs2).astype(np.float32) | |
observations = np.stack([obs1, obs2], axis=0) | |
rewards = to_ndarray([rew, -rew]).astype(np.float32) | |
rewards = rewards[..., np.newaxis] | |
return BaseEnvTimestep(observations, rewards, done, info) | |
else: | |
return BaseEnvTimestep(obs1, reward, done, info) | |
def get_episode_result(self, eval_episode_return: float): | |
if eval_episode_return > 0: # due to using 5 games (lives) in this env, the eval_episode_return can't be zero. | |
return "wins" | |
else: | |
return "losses" | |
def reset(self): | |
if not self._init_flag: | |
self._env = gym.make(self._cfg.env_id) | |
if self._replay_path is not None: | |
if gym.version.VERSION > '0.22.0': | |
# Gym removed classic control rendering to support using pygame instead. | |
# And thus, slime_volleyball currently do not support rendering. | |
self._env.metadata.update({'render_modes': ["human"]}) | |
else: | |
self._env.metadata.update({'render.modes': ["human"]}) | |
self._env = gym.wrappers.RecordVideo( | |
self._env, | |
video_folder=self._replay_path, | |
episode_trigger=lambda episode_id: True, | |
name_prefix='rl-video-{}'.format(id(self)) | |
) | |
self._env.start_video_recorder() | |
ori_shape = self._env.observation_space.shape | |
self._observation_space = gym.spaces.Box( | |
low=float("-inf"), | |
high=float("inf"), | |
shape=(len(self.agents), ) + ori_shape if len(self.agents) >= 2 else ori_shape, | |
dtype=np.float32 | |
) | |
self._action_space = gym.spaces.Discrete(6) | |
self._reward_space = gym.spaces.Box(low=-5, high=5, shape=(1, ), dtype=np.float32) | |
self._init_flag = True | |
if hasattr(self, '_seed') and hasattr(self, '_dynamic_seed') and self._dynamic_seed: | |
np_seed = 100 * np.random.randint(1, 1000) | |
self._env.seed(self._seed + np_seed) | |
elif hasattr(self, '_seed'): | |
self._env.seed(self._seed) | |
self._eval_episode_return = 0 | |
obs = self._env.reset() | |
obs = to_ndarray(obs).astype(np.float32) | |
if self._agent_vs_agent: | |
obs = np.stack([obs, obs], axis=0) | |
return obs | |
else: | |
return obs | |
def observation_space(self) -> gym.spaces.Space: | |
return self._observation_space | |
def action_space(self) -> gym.spaces.Space: | |
return self._action_space | |
def reward_space(self) -> gym.spaces.Space: | |
return self._reward_space | |
def agents(self) -> List[str]: | |
if self._agent_vs_agent: | |
return ['home', 'away'] | |
else: | |
return ['home'] | |
def random_action(self) -> np.ndarray: | |
high = self.action_space.n | |
if self._agent_vs_agent: | |
return [np.random.randint(0, high, size=(1, )) for _ in range(2)] | |
else: | |
return np.random.randint(0, high, size=(1, )) | |
def __repr__(self): | |
return "DI-engine Slime Volley Env" | |
def enable_save_replay(self, replay_path: Optional[str] = None) -> None: | |
if replay_path is None: | |
replay_path = './video' | |
self._replay_path = replay_path | |
def _process_action(action: np.ndarray, _type: str = "binary") -> np.ndarray: | |
if action is None: | |
return None | |
action = action.item() | |
# Env receives action in [0, 5] (int type). Can translater into: | |
# 1) "binary" type: np.array([0, 1, 0]) | |
# 2) "atari" type: NOOP, LEFT, UPLEFT, UP, UPRIGHT, RIGHT | |
to_atari_action = { | |
0: 0, # NOOP | |
1: 4, # LEFT | |
2: 7, # UPLEFT | |
3: 2, # UP | |
4: 6, # UPRIGHT | |
5: 3, # RIGHT | |
} | |
to_binary_action = { | |
0: [0, 0, 0], # NOOP | |
1: [1, 0, 0], # LEFT (forward) | |
2: [1, 0, 1], # UPLEFT (forward jump) | |
3: [0, 0, 1], # UP (jump) | |
4: [0, 1, 1], # UPRIGHT (backward jump) | |
5: [0, 1, 0], # RIGHT (backward) | |
} | |
if _type == "binary": | |
return to_ndarray(to_binary_action[action]) | |
elif _type == "atari": | |
return to_atari_action[action] | |
else: | |
raise NotImplementedError | |