GameServerX

Running

File size: 16,415 Bytes

e11e4fe

from mlagents_envs.base_env import (
    ActionSpec,
    ObservationSpec,
    DimensionProperty,
    BehaviorSpec,
    DecisionSteps,
    TerminalSteps,
    ObservationType,
)
from mlagents_envs.exception import UnityObservationException
from mlagents_envs.timers import hierarchical_timer, timed
from mlagents_envs.communicator_objects.agent_info_pb2 import AgentInfoProto
from mlagents_envs.communicator_objects.observation_pb2 import (
    ObservationProto,
    NONE as COMPRESSION_TYPE_NONE,
)
from mlagents_envs.communicator_objects.brain_parameters_pb2 import BrainParametersProto
import numpy as np
import io
from typing import cast, List, Tuple, Collection, Optional, Iterable
from PIL import Image


PNG_HEADER = b"\x89PNG\r\n\x1a\n"


def behavior_spec_from_proto(
    brain_param_proto: BrainParametersProto, agent_info: AgentInfoProto
) -> BehaviorSpec:
    """
    Converts brain parameter and agent info proto to BehaviorSpec object.
    :param brain_param_proto: protobuf object.
    :param agent_info: protobuf object.
    :return: BehaviorSpec object.
    """
    observation_specs = []
    for obs in agent_info.observations:
        observation_specs.append(
            ObservationSpec(
                name=obs.name,
                shape=tuple(obs.shape),
                observation_type=ObservationType(obs.observation_type),
                dimension_property=tuple(
                    DimensionProperty(dim) for dim in obs.dimension_properties
                )
                if len(obs.dimension_properties) > 0
                else (DimensionProperty.UNSPECIFIED,) * len(obs.shape),
            )
        )

    # proto from communicator < v1.3 does not set action spec, use deprecated fields instead
    if (
        brain_param_proto.action_spec.num_continuous_actions == 0
        and brain_param_proto.action_spec.num_discrete_actions == 0
    ):
        if brain_param_proto.vector_action_space_type_deprecated == 1:
            action_spec = ActionSpec(
                brain_param_proto.vector_action_size_deprecated[0], ()
            )
        else:
            action_spec = ActionSpec(
                0, tuple(brain_param_proto.vector_action_size_deprecated)
            )
    else:
        action_spec_proto = brain_param_proto.action_spec
        action_spec = ActionSpec(
            action_spec_proto.num_continuous_actions,
            tuple(branch for branch in action_spec_proto.discrete_branch_sizes),
        )
    return BehaviorSpec(observation_specs, action_spec)


class OffsetBytesIO:
    """
    Simple file-like class that wraps a bytes, and allows moving its "start"
    position in the bytes. This is only used for reading concatenated PNGs,
    because Pillow always calls seek(0) at the start of reading.
    """

    __slots__ = ["fp", "offset"]

    def __init__(self, data: bytes):
        self.fp = io.BytesIO(data)
        self.offset = 0

    def seek(self, offset: int, whence: int = io.SEEK_SET) -> int:
        if whence == io.SEEK_SET:
            res = self.fp.seek(offset + self.offset)
            return res - self.offset
        raise NotImplementedError()

    def tell(self) -> int:
        return self.fp.tell() - self.offset

    def read(self, size: int = -1) -> bytes:
        return self.fp.read(size)

    def original_tell(self) -> int:
        """
        Returns the offset into the original byte array
        """
        return self.fp.tell()


@timed
def process_pixels(
    image_bytes: bytes, expected_channels: int, mappings: Optional[List[int]] = None
) -> np.ndarray:
    """
    Converts byte array observation image into numpy array, re-sizes it,
    and optionally converts it to grey scale
    :param image_bytes: input byte array corresponding to image
    :param expected_channels: Expected output channels
    :return: processed numpy array of observation from environment
    """
    image_fp = OffsetBytesIO(image_bytes)

    image_arrays = []
    # Read the images back from the bytes (without knowing the sizes).
    while True:
        with hierarchical_timer("image_decompress"):
            image = Image.open(image_fp)
            # Normally Image loads lazily, load() forces it to do loading in the timer scope.
            image.load()
        image_arrays.append(np.array(image, dtype=np.float32) / 255.0)

        # Look for the next header, starting from the current stream location
        try:
            new_offset = image_bytes.index(PNG_HEADER, image_fp.original_tell())
            image_fp.offset = new_offset
        except ValueError:
            # Didn't find the header, so must be at the end.
            break

    if mappings is not None and len(mappings) > 0:
        return _process_images_mapping(image_arrays, mappings)
    else:
        return _process_images_num_channels(image_arrays, expected_channels)


def _process_images_mapping(image_arrays, mappings):
    """
    Helper function for processing decompressed images with compressed channel mappings.
    """
    image_arrays = np.concatenate(image_arrays, axis=2).transpose((2, 0, 1))

    if len(mappings) != len(image_arrays):
        raise UnityObservationException(
            f"Compressed observation and its mapping had different number of channels - "
            f"observation had {len(image_arrays)} channels but its mapping had {len(mappings)} channels"
        )
    if len({m for m in mappings if m > -1}) != max(mappings) + 1:
        raise UnityObservationException(
            f"Invalid Compressed Channel Mapping: the mapping {mappings} does not have the correct format."
        )
    if max(mappings) >= len(image_arrays):
        raise UnityObservationException(
            f"Invalid Compressed Channel Mapping: the mapping has index larger than the total "
            f"number of channels in observation - mapping index {max(mappings)} is"
            f"invalid for input observation with {len(image_arrays)} channels."
        )

    processed_image_arrays: List[np.array] = [[] for _ in range(max(mappings) + 1)]
    for mapping_idx, img in zip(mappings, image_arrays):
        if mapping_idx > -1:
            processed_image_arrays[mapping_idx].append(img)

    for i, img_array in enumerate(processed_image_arrays):
        processed_image_arrays[i] = np.mean(img_array, axis=0)
    img = np.stack(processed_image_arrays, axis=2)
    return img


def _process_images_num_channels(image_arrays, expected_channels):
    """
    Helper function for processing decompressed images with number of expected channels.
    This is for old API without mapping provided. Use the first n channel, n=expected_channels.
    """
    if expected_channels == 1:
        # Convert to grayscale
        img = np.mean(image_arrays[0], axis=2)
        img = np.reshape(img, [img.shape[0], img.shape[1], 1])
    else:
        img = np.concatenate(image_arrays, axis=2)
        # We can drop additional channels since they may need to be added to include
        # numbers of observation channels not divisible by 3.
        actual_channels = list(img.shape)[2]
        if actual_channels > expected_channels:
            img = img[..., 0:expected_channels]
    return img


def _check_observations_match_spec(
    obs_index: int,
    observation_spec: ObservationSpec,
    agent_info_list: Collection[AgentInfoProto],
) -> None:
    """
    Check that all the observations match the expected size.
    This gives a nicer error than a cryptic numpy error later.
    """
    expected_obs_shape = tuple(observation_spec.shape)
    for agent_info in agent_info_list:
        agent_obs_shape = tuple(agent_info.observations[obs_index].shape)
        if expected_obs_shape != agent_obs_shape:
            raise UnityObservationException(
                f"Observation at index={obs_index} for agent with "
                f"id={agent_info.id} didn't match the ObservationSpec. "
                f"Expected shape {expected_obs_shape} but got {agent_obs_shape}."
            )


@timed
def _observation_to_np_array(
    obs: ObservationProto, expected_shape: Optional[Iterable[int]] = None
) -> np.ndarray:
    """
    Converts observation proto into numpy array of the appropriate size.
    :param obs: observation proto to be converted
    :param expected_shape: optional shape information, used for sanity checks.
    :return: processed numpy array of observation from environment
    """
    if expected_shape is not None:
        if list(obs.shape) != list(expected_shape):
            raise UnityObservationException(
                f"Observation did not have the expected shape - got {obs.shape} but expected {expected_shape}"
            )
    expected_channels = obs.shape[2]
    if obs.compression_type == COMPRESSION_TYPE_NONE:
        img = np.array(obs.float_data.data, dtype=np.float32)
        img = np.reshape(img, obs.shape)
        return img
    else:
        img = process_pixels(
            obs.compressed_data, expected_channels, list(obs.compressed_channel_mapping)
        )
        # Compare decompressed image size to observation shape and make sure they match
        if list(obs.shape) != list(img.shape):
            raise UnityObservationException(
                f"Decompressed observation did not have the expected shape - "
                f"decompressed had {img.shape} but expected {obs.shape}"
            )
        return img


@timed
def _process_maybe_compressed_observation(
    obs_index: int,
    observation_spec: ObservationSpec,
    agent_info_list: Collection[AgentInfoProto],
) -> np.ndarray:
    shape = cast(Tuple[int, int, int], observation_spec.shape)
    if len(agent_info_list) == 0:
        return np.zeros((0, shape[0], shape[1], shape[2]), dtype=np.float32)

    try:
        batched_visual = [
            _observation_to_np_array(agent_obs.observations[obs_index], shape)
            for agent_obs in agent_info_list
        ]
    except ValueError:
        # Try to get a more useful error message
        _check_observations_match_spec(obs_index, observation_spec, agent_info_list)
        # If that didn't raise anything, raise the original error
        raise
    return np.array(batched_visual, dtype=np.float32)


def _raise_on_nan_and_inf(data: np.array, source: str) -> np.array:
    # Check for NaNs or Infinite values in the observation or reward data.
    # If there's a NaN in the observations, the np.mean() result will be NaN
    # If there's an Infinite value (either sign) then the result will be Inf
    # See https://stackoverflow.com/questions/6736590/fast-check-for-nan-in-numpy for background
    # Note that a very large values (larger than sqrt(float_max)) will result in an Inf value here
    # Raise a Runtime error in the case that NaNs or Infinite values make it into the data.
    if data.size == 0:
        return data

    d = np.mean(data)
    has_nan = np.isnan(d)
    has_inf = not np.isfinite(d)

    if has_nan:
        raise RuntimeError(f"The {source} provided had NaN values.")
    if has_inf:
        raise RuntimeError(f"The {source} provided had Infinite values.")


@timed
def _process_rank_one_or_two_observation(
    obs_index: int,
    observation_spec: ObservationSpec,
    agent_info_list: Collection[AgentInfoProto],
) -> np.ndarray:
    if len(agent_info_list) == 0:
        return np.zeros((0,) + observation_spec.shape, dtype=np.float32)
    try:
        np_obs = np.array(
            [
                agent_obs.observations[obs_index].float_data.data
                for agent_obs in agent_info_list
            ],
            dtype=np.float32,
        ).reshape((len(agent_info_list),) + observation_spec.shape)
    except ValueError:
        # Try to get a more useful error message
        _check_observations_match_spec(obs_index, observation_spec, agent_info_list)
        # If that didn't raise anything, raise the original error
        raise
    _raise_on_nan_and_inf(np_obs, "observations")
    return np_obs


@timed
def steps_from_proto(
    agent_info_list: Collection[AgentInfoProto], behavior_spec: BehaviorSpec
) -> Tuple[DecisionSteps, TerminalSteps]:
    decision_agent_info_list = [
        agent_info for agent_info in agent_info_list if not agent_info.done
    ]
    terminal_agent_info_list = [
        agent_info for agent_info in agent_info_list if agent_info.done
    ]
    decision_obs_list: List[np.ndarray] = []
    terminal_obs_list: List[np.ndarray] = []
    for obs_index, observation_spec in enumerate(behavior_spec.observation_specs):
        is_visual = len(observation_spec.shape) == 3
        if is_visual:
            decision_obs_list.append(
                _process_maybe_compressed_observation(
                    obs_index, observation_spec, decision_agent_info_list
                )
            )
            terminal_obs_list.append(
                _process_maybe_compressed_observation(
                    obs_index, observation_spec, terminal_agent_info_list
                )
            )
        else:
            decision_obs_list.append(
                _process_rank_one_or_two_observation(
                    obs_index, observation_spec, decision_agent_info_list
                )
            )
            terminal_obs_list.append(
                _process_rank_one_or_two_observation(
                    obs_index, observation_spec, terminal_agent_info_list
                )
            )
    decision_rewards = np.array(
        [agent_info.reward for agent_info in decision_agent_info_list], dtype=np.float32
    )
    terminal_rewards = np.array(
        [agent_info.reward for agent_info in terminal_agent_info_list], dtype=np.float32
    )

    decision_group_rewards = np.array(
        [agent_info.group_reward for agent_info in decision_agent_info_list],
        dtype=np.float32,
    )
    terminal_group_rewards = np.array(
        [agent_info.group_reward for agent_info in terminal_agent_info_list],
        dtype=np.float32,
    )

    _raise_on_nan_and_inf(decision_rewards, "rewards")
    _raise_on_nan_and_inf(terminal_rewards, "rewards")
    _raise_on_nan_and_inf(decision_group_rewards, "group_rewards")
    _raise_on_nan_and_inf(terminal_group_rewards, "group_rewards")

    decision_group_id = [agent_info.group_id for agent_info in decision_agent_info_list]
    terminal_group_id = [agent_info.group_id for agent_info in terminal_agent_info_list]

    max_step = np.array(
        [agent_info.max_step_reached for agent_info in terminal_agent_info_list],
        dtype=bool,
    )
    decision_agent_id = np.array(
        [agent_info.id for agent_info in decision_agent_info_list], dtype=np.int32
    )
    terminal_agent_id = np.array(
        [agent_info.id for agent_info in terminal_agent_info_list], dtype=np.int32
    )
    action_mask = None
    if behavior_spec.action_spec.discrete_size > 0:
        if any(
            [agent_info.action_mask is not None]
            for agent_info in decision_agent_info_list
        ):
            n_agents = len(decision_agent_info_list)
            a_size = np.sum(behavior_spec.action_spec.discrete_branches)
            mask_matrix = np.ones((n_agents, a_size), dtype=bool)
            for agent_index, agent_info in enumerate(decision_agent_info_list):
                if agent_info.action_mask is not None:
                    if len(agent_info.action_mask) == a_size:
                        mask_matrix[agent_index, :] = [
                            False if agent_info.action_mask[k] else True
                            for k in range(a_size)
                        ]
            action_mask = (1 - mask_matrix).astype(bool)
            indices = _generate_split_indices(
                behavior_spec.action_spec.discrete_branches
            )
            action_mask = np.split(action_mask, indices, axis=1)
    return (
        DecisionSteps(
            decision_obs_list,
            decision_rewards,
            decision_agent_id,
            action_mask,
            decision_group_id,
            decision_group_rewards,
        ),
        TerminalSteps(
            terminal_obs_list,
            terminal_rewards,
            max_step,
            terminal_agent_id,
            terminal_group_id,
            terminal_group_rewards,
        ),
    )


def _generate_split_indices(dims):
    if len(dims) <= 1:
        return ()
    result = (dims[0],)
    for i in range(len(dims) - 2):
        result += (dims[i + 1] + result[i],)
    return result