Spaces:
Running
Running
"""Implementation of a space that represents closed boxes in euclidean space.""" | |
from typing import Dict, List, Optional, Sequence, SupportsFloat, Tuple, Type, Union | |
import numpy as np | |
import gym.error | |
from gym import logger | |
from gym.spaces.space import Space | |
def _short_repr(arr: np.ndarray) -> str: | |
"""Create a shortened string representation of a numpy array. | |
If arr is a multiple of the all-ones vector, return a string representation of the multiplier. | |
Otherwise, return a string representation of the entire array. | |
Args: | |
arr: The array to represent | |
Returns: | |
A short representation of the array | |
""" | |
if arr.size != 0 and np.min(arr) == np.max(arr): | |
return str(np.min(arr)) | |
return str(arr) | |
def is_float_integer(var) -> bool: | |
"""Checks if a variable is an integer or float.""" | |
return np.issubdtype(type(var), np.integer) or np.issubdtype(type(var), np.floating) | |
class Box(Space[np.ndarray]): | |
r"""A (possibly unbounded) box in :math:`\mathbb{R}^n`. | |
Specifically, a Box represents the Cartesian product of n closed intervals. | |
Each interval has the form of one of :math:`[a, b]`, :math:`(-\infty, b]`, | |
:math:`[a, \infty)`, or :math:`(-\infty, \infty)`. | |
There are two common use cases: | |
* Identical bound for each dimension:: | |
>>> Box(low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32) | |
Box(3, 4) | |
* Independent bound for each dimension:: | |
>>> Box(low=np.array([-1.0, -2.0]), high=np.array([2.0, 4.0]), dtype=np.float32) | |
Box(2,) | |
""" | |
def __init__( | |
self, | |
low: Union[SupportsFloat, np.ndarray], | |
high: Union[SupportsFloat, np.ndarray], | |
shape: Optional[Sequence[int]] = None, | |
dtype: Type = np.float32, | |
seed: Optional[Union[int, np.random.Generator]] = None, | |
): | |
r"""Constructor of :class:`Box`. | |
The argument ``low`` specifies the lower bound of each dimension and ``high`` specifies the upper bounds. | |
I.e., the space that is constructed will be the product of the intervals :math:`[\text{low}[i], \text{high}[i]]`. | |
If ``low`` (or ``high``) is a scalar, the lower bound (or upper bound, respectively) will be assumed to be | |
this value across all dimensions. | |
Args: | |
low (Union[SupportsFloat, np.ndarray]): Lower bounds of the intervals. | |
high (Union[SupportsFloat, np.ndarray]): Upper bounds of the intervals. | |
shape (Optional[Sequence[int]]): The shape is inferred from the shape of `low` or `high` `np.ndarray`s with | |
`low` and `high` scalars defaulting to a shape of (1,) | |
dtype: The dtype of the elements of the space. If this is an integer type, the :class:`Box` is essentially a discrete space. | |
seed: Optionally, you can use this argument to seed the RNG that is used to sample from the space. | |
Raises: | |
ValueError: If no shape information is provided (shape is None, low is None and high is None) then a | |
value error is raised. | |
""" | |
assert ( | |
dtype is not None | |
), "Box dtype must be explicitly provided, cannot be None." | |
self.dtype = np.dtype(dtype) | |
# determine shape if it isn't provided directly | |
if shape is not None: | |
assert all( | |
np.issubdtype(type(dim), np.integer) for dim in shape | |
), f"Expect all shape elements to be an integer, actual type: {tuple(type(dim) for dim in shape)}" | |
shape = tuple(int(dim) for dim in shape) # This changes any np types to int | |
elif isinstance(low, np.ndarray): | |
shape = low.shape | |
elif isinstance(high, np.ndarray): | |
shape = high.shape | |
elif is_float_integer(low) and is_float_integer(high): | |
shape = (1,) | |
else: | |
raise ValueError( | |
f"Box shape is inferred from low and high, expect their types to be np.ndarray, an integer or a float, actual type low: {type(low)}, high: {type(high)}" | |
) | |
# Capture the boundedness information before replacing np.inf with get_inf | |
_low = np.full(shape, low, dtype=float) if is_float_integer(low) else low | |
self.bounded_below = -np.inf < _low | |
_high = np.full(shape, high, dtype=float) if is_float_integer(high) else high | |
self.bounded_above = np.inf > _high | |
low = _broadcast(low, dtype, shape, inf_sign="-") # type: ignore | |
high = _broadcast(high, dtype, shape, inf_sign="+") # type: ignore | |
assert isinstance(low, np.ndarray) | |
assert ( | |
low.shape == shape | |
), f"low.shape doesn't match provided shape, low.shape: {low.shape}, shape: {shape}" | |
assert isinstance(high, np.ndarray) | |
assert ( | |
high.shape == shape | |
), f"high.shape doesn't match provided shape, high.shape: {high.shape}, shape: {shape}" | |
self._shape: Tuple[int, ...] = shape | |
low_precision = get_precision(low.dtype) | |
high_precision = get_precision(high.dtype) | |
dtype_precision = get_precision(self.dtype) | |
if min(low_precision, high_precision) > dtype_precision: # type: ignore | |
logger.warn(f"Box bound precision lowered by casting to {self.dtype}") | |
self.low = low.astype(self.dtype) | |
self.high = high.astype(self.dtype) | |
self.low_repr = _short_repr(self.low) | |
self.high_repr = _short_repr(self.high) | |
super().__init__(self.shape, self.dtype, seed) | |
def shape(self) -> Tuple[int, ...]: | |
"""Has stricter type than gym.Space - never None.""" | |
return self._shape | |
def is_np_flattenable(self): | |
"""Checks whether this space can be flattened to a :class:`spaces.Box`.""" | |
return True | |
def is_bounded(self, manner: str = "both") -> bool: | |
"""Checks whether the box is bounded in some sense. | |
Args: | |
manner (str): One of ``"both"``, ``"below"``, ``"above"``. | |
Returns: | |
If the space is bounded | |
Raises: | |
ValueError: If `manner` is neither ``"both"`` nor ``"below"`` or ``"above"`` | |
""" | |
below = bool(np.all(self.bounded_below)) | |
above = bool(np.all(self.bounded_above)) | |
if manner == "both": | |
return below and above | |
elif manner == "below": | |
return below | |
elif manner == "above": | |
return above | |
else: | |
raise ValueError( | |
f"manner is not in {{'below', 'above', 'both'}}, actual value: {manner}" | |
) | |
def sample(self, mask: None = None) -> np.ndarray: | |
r"""Generates a single random sample inside the Box. | |
In creating a sample of the box, each coordinate is sampled (independently) from a distribution | |
that is chosen according to the form of the interval: | |
* :math:`[a, b]` : uniform distribution | |
* :math:`[a, \infty)` : shifted exponential distribution | |
* :math:`(-\infty, b]` : shifted negative exponential distribution | |
* :math:`(-\infty, \infty)` : normal distribution | |
Args: | |
mask: A mask for sampling values from the Box space, currently unsupported. | |
Returns: | |
A sampled value from the Box | |
""" | |
if mask is not None: | |
raise gym.error.Error( | |
f"Box.sample cannot be provided a mask, actual value: {mask}" | |
) | |
high = self.high if self.dtype.kind == "f" else self.high.astype("int64") + 1 | |
sample = np.empty(self.shape) | |
# Masking arrays which classify the coordinates according to interval | |
# type | |
unbounded = ~self.bounded_below & ~self.bounded_above | |
upp_bounded = ~self.bounded_below & self.bounded_above | |
low_bounded = self.bounded_below & ~self.bounded_above | |
bounded = self.bounded_below & self.bounded_above | |
# Vectorized sampling by interval type | |
sample[unbounded] = self.np_random.normal(size=unbounded[unbounded].shape) | |
sample[low_bounded] = ( | |
self.np_random.exponential(size=low_bounded[low_bounded].shape) | |
+ self.low[low_bounded] | |
) | |
sample[upp_bounded] = ( | |
-self.np_random.exponential(size=upp_bounded[upp_bounded].shape) | |
+ self.high[upp_bounded] | |
) | |
sample[bounded] = self.np_random.uniform( | |
low=self.low[bounded], high=high[bounded], size=bounded[bounded].shape | |
) | |
if self.dtype.kind == "i": | |
sample = np.floor(sample) | |
return sample.astype(self.dtype) | |
def contains(self, x) -> bool: | |
"""Return boolean specifying if x is a valid member of this space.""" | |
if not isinstance(x, np.ndarray): | |
logger.warn("Casting input x to numpy array.") | |
try: | |
x = np.asarray(x, dtype=self.dtype) | |
except (ValueError, TypeError): | |
return False | |
return bool( | |
np.can_cast(x.dtype, self.dtype) | |
and x.shape == self.shape | |
and np.all(x >= self.low) | |
and np.all(x <= self.high) | |
) | |
def to_jsonable(self, sample_n): | |
"""Convert a batch of samples from this space to a JSONable data type.""" | |
return np.array(sample_n).tolist() | |
def from_jsonable(self, sample_n: Sequence[Union[float, int]]) -> List[np.ndarray]: | |
"""Convert a JSONable data type to a batch of samples from this space.""" | |
return [np.asarray(sample) for sample in sample_n] | |
def __repr__(self) -> str: | |
"""A string representation of this space. | |
The representation will include bounds, shape and dtype. | |
If a bound is uniform, only the corresponding scalar will be given to avoid redundant and ugly strings. | |
Returns: | |
A representation of the space | |
""" | |
return f"Box({self.low_repr}, {self.high_repr}, {self.shape}, {self.dtype})" | |
def __eq__(self, other) -> bool: | |
"""Check whether `other` is equivalent to this instance. Doesn't check dtype equivalence.""" | |
return ( | |
isinstance(other, Box) | |
and (self.shape == other.shape) | |
# and (self.dtype == other.dtype) | |
and np.allclose(self.low, other.low) | |
and np.allclose(self.high, other.high) | |
) | |
def __setstate__(self, state: Dict): | |
"""Sets the state of the box for unpickling a box with legacy support.""" | |
super().__setstate__(state) | |
# legacy support through re-adding "low_repr" and "high_repr" if missing from pickled state | |
if not hasattr(self, "low_repr"): | |
self.low_repr = _short_repr(self.low) | |
if not hasattr(self, "high_repr"): | |
self.high_repr = _short_repr(self.high) | |
def get_inf(dtype, sign: str) -> SupportsFloat: | |
"""Returns an infinite that doesn't break things. | |
Args: | |
dtype: An `np.dtype` | |
sign (str): must be either `"+"` or `"-"` | |
Returns: | |
Gets an infinite value with the sign and dtype | |
Raises: | |
TypeError: Unknown sign, use either '+' or '-' | |
ValueError: Unknown dtype for infinite bounds | |
""" | |
if np.dtype(dtype).kind == "f": | |
if sign == "+": | |
return np.inf | |
elif sign == "-": | |
return -np.inf | |
else: | |
raise TypeError(f"Unknown sign {sign}, use either '+' or '-'") | |
elif np.dtype(dtype).kind == "i": | |
if sign == "+": | |
return np.iinfo(dtype).max - 2 | |
elif sign == "-": | |
return np.iinfo(dtype).min + 2 | |
else: | |
raise TypeError(f"Unknown sign {sign}, use either '+' or '-'") | |
else: | |
raise ValueError(f"Unknown dtype {dtype} for infinite bounds") | |
def get_precision(dtype) -> SupportsFloat: | |
"""Get precision of a data type.""" | |
if np.issubdtype(dtype, np.floating): | |
return np.finfo(dtype).precision | |
else: | |
return np.inf | |
def _broadcast( | |
value: Union[SupportsFloat, np.ndarray], | |
dtype, | |
shape: Tuple[int, ...], | |
inf_sign: str, | |
) -> np.ndarray: | |
"""Handle infinite bounds and broadcast at the same time if needed.""" | |
if is_float_integer(value): | |
value = get_inf(dtype, inf_sign) if np.isinf(value) else value # type: ignore | |
value = np.full(shape, value, dtype=dtype) | |
else: | |
assert isinstance(value, np.ndarray) | |
if np.any(np.isinf(value)): | |
# create new array with dtype, but maintain old one to preserve np.inf | |
temp = value.astype(dtype) | |
temp[np.isinf(value)] = get_inf(dtype, inf_sign) | |
value = temp | |
return value | |