|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
""" |
|
High-level access to HDF5 dataspace selections |
|
""" |
|
|
|
import numpy as np |
|
|
|
from .base import product |
|
from .. import h5s, h5r, _selector |
|
|
|
def select(shape, args, dataset=None): |
|
""" High-level routine to generate a selection from arbitrary arguments |
|
to __getitem__. The arguments should be the following: |
|
|
|
shape |
|
Shape of the "source" dataspace. |
|
|
|
args |
|
Either a single argument or a tuple of arguments. See below for |
|
supported classes of argument. |
|
|
|
dataset |
|
A h5py.Dataset instance representing the source dataset. |
|
|
|
Argument classes: |
|
|
|
Single Selection instance |
|
Returns the argument. |
|
|
|
numpy.ndarray |
|
Must be a boolean mask. Returns a PointSelection instance. |
|
|
|
RegionReference |
|
Returns a Selection instance. |
|
|
|
Indices, slices, ellipses, MultiBlockSlices only |
|
Returns a SimpleSelection instance |
|
|
|
Indices, slices, ellipses, lists or boolean index arrays |
|
Returns a FancySelection instance. |
|
""" |
|
if not isinstance(args, tuple): |
|
args = (args,) |
|
|
|
|
|
if len(args) == 1: |
|
|
|
arg = args[0] |
|
if isinstance(arg, Selection): |
|
if arg.shape != shape: |
|
raise TypeError("Mismatched selection shape") |
|
return arg |
|
|
|
elif isinstance(arg, np.ndarray) and arg.dtype.kind == 'b': |
|
if arg.shape != shape: |
|
raise TypeError("Boolean indexing array has incompatible shape") |
|
return PointSelection.from_mask(arg) |
|
|
|
elif isinstance(arg, h5r.RegionReference): |
|
if dataset is None: |
|
raise TypeError("Cannot apply a region reference without a dataset") |
|
sid = h5r.get_region(arg, dataset.id) |
|
if shape != sid.shape: |
|
raise TypeError("Reference shape does not match dataset shape") |
|
|
|
return Selection(shape, spaceid=sid) |
|
|
|
if dataset is not None: |
|
selector = dataset._selector |
|
else: |
|
space = h5s.create_simple(shape) |
|
selector = _selector.Selector(space) |
|
|
|
return selector.make_selection(args) |
|
|
|
|
|
class Selection: |
|
|
|
""" |
|
Base class for HDF5 dataspace selections. Subclasses support the |
|
"selection protocol", which means they have at least the following |
|
members: |
|
|
|
__init__(shape) => Create a new selection on "shape"-tuple |
|
__getitem__(args) => Perform a selection with the range specified. |
|
What args are allowed depends on the |
|
particular subclass in use. |
|
|
|
id (read-only) => h5py.h5s.SpaceID instance |
|
shape (read-only) => The shape of the dataspace. |
|
mshape (read-only) => The shape of the selection region. |
|
Not guaranteed to fit within "shape", although |
|
the total number of points is less than |
|
product(shape). |
|
nselect (read-only) => Number of selected points. Always equal to |
|
product(mshape). |
|
|
|
broadcast(target_shape) => Return an iterable which yields dataspaces |
|
for read, based on target_shape. |
|
|
|
The base class represents "unshaped" selections (1-D). |
|
""" |
|
|
|
def __init__(self, shape, spaceid=None): |
|
""" Create a selection. Shape may be None if spaceid is given. """ |
|
if spaceid is not None: |
|
self._id = spaceid |
|
self._shape = spaceid.shape |
|
else: |
|
shape = tuple(shape) |
|
self._shape = shape |
|
self._id = h5s.create_simple(shape, (h5s.UNLIMITED,)*len(shape)) |
|
self._id.select_all() |
|
|
|
@property |
|
def id(self): |
|
""" SpaceID instance """ |
|
return self._id |
|
|
|
@property |
|
def shape(self): |
|
""" Shape of whole dataspace """ |
|
return self._shape |
|
|
|
@property |
|
def nselect(self): |
|
""" Number of elements currently selected """ |
|
return self._id.get_select_npoints() |
|
|
|
@property |
|
def mshape(self): |
|
""" Shape of selection (always 1-D for this class) """ |
|
return (self.nselect,) |
|
|
|
@property |
|
def array_shape(self): |
|
"""Shape of array to read/write (always 1-D for this class)""" |
|
return self.mshape |
|
|
|
|
|
def expand_shape(self, source_shape): |
|
if product(source_shape) != self.nselect: |
|
raise TypeError("Broadcasting is not supported for point-wise selections") |
|
return source_shape |
|
|
|
def broadcast(self, source_shape): |
|
""" Get an iterable for broadcasting """ |
|
if product(source_shape) != self.nselect: |
|
raise TypeError("Broadcasting is not supported for point-wise selections") |
|
yield self._id |
|
|
|
def __getitem__(self, args): |
|
raise NotImplementedError("This class does not support indexing") |
|
|
|
class PointSelection(Selection): |
|
|
|
""" |
|
Represents a point-wise selection. You can supply sequences of |
|
points to the three methods append(), prepend() and set(), or |
|
instantiate it with a single boolean array using from_mask(). |
|
""" |
|
def __init__(self, shape, spaceid=None, points=None): |
|
super().__init__(shape, spaceid) |
|
if points is not None: |
|
self._perform_selection(points, h5s.SELECT_SET) |
|
|
|
def _perform_selection(self, points, op): |
|
""" Internal method which actually performs the selection """ |
|
points = np.asarray(points, order='C', dtype='u8') |
|
if len(points.shape) == 1: |
|
points.shape = (1,points.shape[0]) |
|
|
|
if self._id.get_select_type() != h5s.SEL_POINTS: |
|
op = h5s.SELECT_SET |
|
|
|
if len(points) == 0: |
|
self._id.select_none() |
|
else: |
|
self._id.select_elements(points, op) |
|
|
|
@classmethod |
|
def from_mask(cls, mask, spaceid=None): |
|
"""Create a point-wise selection from a NumPy boolean array """ |
|
if not (isinstance(mask, np.ndarray) and mask.dtype.kind == 'b'): |
|
raise TypeError("PointSelection.from_mask only works with bool arrays") |
|
|
|
points = np.transpose(mask.nonzero()) |
|
return cls(mask.shape, spaceid, points=points) |
|
|
|
def append(self, points): |
|
""" Add the sequence of points to the end of the current selection """ |
|
self._perform_selection(points, h5s.SELECT_APPEND) |
|
|
|
def prepend(self, points): |
|
""" Add the sequence of points to the beginning of the current selection """ |
|
self._perform_selection(points, h5s.SELECT_PREPEND) |
|
|
|
def set(self, points): |
|
""" Replace the current selection with the given sequence of points""" |
|
self._perform_selection(points, h5s.SELECT_SET) |
|
|
|
|
|
class SimpleSelection(Selection): |
|
|
|
""" A single "rectangular" (regular) selection composed of only slices |
|
and integer arguments. Can participate in broadcasting. |
|
""" |
|
|
|
@property |
|
def mshape(self): |
|
""" Shape of current selection """ |
|
return self._sel[1] |
|
|
|
@property |
|
def array_shape(self): |
|
scalar = self._sel[3] |
|
return tuple(x for x, s in zip(self.mshape, scalar) if not s) |
|
|
|
def __init__(self, shape, spaceid=None, hyperslab=None): |
|
super().__init__(shape, spaceid) |
|
if hyperslab is not None: |
|
self._sel = hyperslab |
|
else: |
|
|
|
rank = len(self.shape) |
|
self._sel = ((0,)*rank, self.shape, (1,)*rank, (False,)*rank) |
|
|
|
def expand_shape(self, source_shape): |
|
"""Match the dimensions of an array to be broadcast to the selection |
|
|
|
The returned shape describes an array of the same size as the input |
|
shape, but its dimensions |
|
|
|
E.g. with a dataset shape (10, 5, 4, 2), writing like this:: |
|
|
|
ds[..., 0] = np.ones((5, 4)) |
|
|
|
The source shape (5, 4) will expand to (1, 5, 4, 1). |
|
Then the broadcast method below repeats that chunk 10 |
|
times to write to an effective shape of (10, 5, 4, 1). |
|
""" |
|
start, count, step, scalar = self._sel |
|
|
|
rank = len(count) |
|
remaining_src_dims = list(source_shape) |
|
|
|
eshape = [] |
|
for idx in range(1, rank + 1): |
|
if len(remaining_src_dims) == 0 or scalar[-idx]: |
|
eshape.append(1) |
|
else: |
|
t = remaining_src_dims.pop() |
|
if t == 1 or count[-idx] == t: |
|
eshape.append(t) |
|
else: |
|
raise TypeError("Can't broadcast %s -> %s" % (source_shape, self.array_shape)) |
|
|
|
if any([n > 1 for n in remaining_src_dims]): |
|
|
|
|
|
raise TypeError("Can't broadcast %s -> %s" % (source_shape, self.array_shape)) |
|
|
|
|
|
return tuple(eshape[::-1]) |
|
|
|
|
|
def broadcast(self, source_shape): |
|
""" Return an iterator over target dataspaces for broadcasting. |
|
|
|
Follows the standard NumPy broadcasting rules against the current |
|
selection shape (self.mshape). |
|
""" |
|
if self.shape == (): |
|
if product(source_shape) != 1: |
|
raise TypeError("Can't broadcast %s to scalar" % source_shape) |
|
self._id.select_all() |
|
yield self._id |
|
return |
|
|
|
start, count, step, scalar = self._sel |
|
|
|
rank = len(count) |
|
tshape = self.expand_shape(source_shape) |
|
|
|
chunks = tuple(x//y for x, y in zip(count, tshape)) |
|
nchunks = product(chunks) |
|
|
|
if nchunks == 1: |
|
yield self._id |
|
else: |
|
sid = self._id.copy() |
|
sid.select_hyperslab((0,)*rank, tshape, step) |
|
for idx in range(nchunks): |
|
offset = tuple(x*y*z + s for x, y, z, s in zip(np.unravel_index(idx, chunks), tshape, step, start)) |
|
sid.offset_simple(offset) |
|
yield sid |
|
|
|
|
|
class FancySelection(Selection): |
|
|
|
""" |
|
Implements advanced NumPy-style selection operations in addition to |
|
the standard slice-and-int behavior. |
|
|
|
Indexing arguments may be ints, slices, lists of indices, or |
|
per-axis (1D) boolean arrays. |
|
|
|
Broadcasting is not supported for these selections. |
|
""" |
|
|
|
@property |
|
def mshape(self): |
|
return self._mshape |
|
|
|
@property |
|
def array_shape(self): |
|
return self._array_shape |
|
|
|
def __init__(self, shape, spaceid=None, mshape=None, array_shape=None): |
|
super().__init__(shape, spaceid) |
|
if mshape is None: |
|
mshape = self.shape |
|
if array_shape is None: |
|
array_shape = mshape |
|
self._mshape = mshape |
|
self._array_shape = array_shape |
|
|
|
def expand_shape(self, source_shape): |
|
if not source_shape == self.array_shape: |
|
raise TypeError("Broadcasting is not supported for complex selections") |
|
return source_shape |
|
|
|
def broadcast(self, source_shape): |
|
if not source_shape == self.array_shape: |
|
raise TypeError("Broadcasting is not supported for complex selections") |
|
yield self._id |
|
|
|
|
|
def guess_shape(sid): |
|
""" Given a dataspace, try to deduce the shape of the selection. |
|
|
|
Returns one of: |
|
* A tuple with the selection shape, same length as the dataspace |
|
* A 1D selection shape for point-based and multiple-hyperslab selections |
|
* None, for unselected scalars and for NULL dataspaces |
|
""" |
|
|
|
sel_class = sid.get_simple_extent_type() |
|
sel_type = sid.get_select_type() |
|
|
|
if sel_class == h5s.NULL: |
|
|
|
return None |
|
|
|
elif sel_class == h5s.SCALAR: |
|
|
|
if sel_type == h5s.SEL_NONE: return None |
|
if sel_type == h5s.SEL_ALL: return tuple() |
|
|
|
elif sel_class != h5s.SIMPLE: |
|
raise TypeError("Unrecognized dataspace class %s" % sel_class) |
|
|
|
|
|
|
|
N = sid.get_select_npoints() |
|
rank = len(sid.shape) |
|
|
|
if sel_type == h5s.SEL_NONE: |
|
return (0,)*rank |
|
|
|
elif sel_type == h5s.SEL_ALL: |
|
return sid.shape |
|
|
|
elif sel_type == h5s.SEL_POINTS: |
|
|
|
|
|
return (N,) |
|
|
|
elif sel_type != h5s.SEL_HYPERSLABS: |
|
raise TypeError("Unrecognized selection method %s" % sel_type) |
|
|
|
|
|
|
|
if N == 0: |
|
return (0,)*rank |
|
|
|
bottomcorner, topcorner = (np.array(x) for x in sid.get_select_bounds()) |
|
|
|
|
|
boxshape = topcorner - bottomcorner + np.ones((rank,)) |
|
|
|
def get_n_axis(sid, axis): |
|
""" Determine the number of elements selected along a particular axis. |
|
|
|
To do this, we "mask off" the axis by making a hyperslab selection |
|
which leaves only the first point along the axis. For a 2D dataset |
|
with selection box shape (X, Y), for axis 1, this would leave a |
|
selection of shape (X, 1). We count the number of points N_leftover |
|
remaining in the selection and compute the axis selection length by |
|
N_axis = N/N_leftover. |
|
""" |
|
|
|
if(boxshape[axis]) == 1: |
|
return 1 |
|
|
|
start = bottomcorner.copy() |
|
start[axis] += 1 |
|
count = boxshape.copy() |
|
count[axis] -= 1 |
|
|
|
|
|
masked_sid = sid.copy() |
|
masked_sid.select_hyperslab(tuple(start), tuple(count), op=h5s.SELECT_NOTB) |
|
|
|
N_leftover = masked_sid.get_select_npoints() |
|
|
|
return N//N_leftover |
|
|
|
|
|
shape = tuple(get_n_axis(sid, x) for x in range(rank)) |
|
|
|
if np.product(shape) != N: |
|
|
|
|
|
return (N,) |
|
|
|
return shape |
|
|