Spaces:
Running
Running
# This file is part of h5py, a Python interface to the HDF5 library. | |
# | |
# http://www.h5py.org | |
# | |
# Copyright 2008-2013 Andrew Collette and contributors | |
# | |
# License: Standard 3-clause BSD; see "license.txt" for full license terms | |
# and contributor agreement. | |
""" | |
High-level access to HDF5 dataspace selections | |
""" | |
import numpy as np | |
from .base import product | |
from .. import h5s, h5r, _selector | |
def select(shape, args, dataset=None): | |
""" High-level routine to generate a selection from arbitrary arguments | |
to __getitem__. The arguments should be the following: | |
shape | |
Shape of the "source" dataspace. | |
args | |
Either a single argument or a tuple of arguments. See below for | |
supported classes of argument. | |
dataset | |
A h5py.Dataset instance representing the source dataset. | |
Argument classes: | |
Single Selection instance | |
Returns the argument. | |
numpy.ndarray | |
Must be a boolean mask. Returns a PointSelection instance. | |
RegionReference | |
Returns a Selection instance. | |
Indices, slices, ellipses, MultiBlockSlices only | |
Returns a SimpleSelection instance | |
Indices, slices, ellipses, lists or boolean index arrays | |
Returns a FancySelection instance. | |
""" | |
if not isinstance(args, tuple): | |
args = (args,) | |
# "Special" indexing objects | |
if len(args) == 1: | |
arg = args[0] | |
if isinstance(arg, Selection): | |
if arg.shape != shape: | |
raise TypeError("Mismatched selection shape") | |
return arg | |
elif isinstance(arg, np.ndarray) and arg.dtype.kind == 'b': | |
if arg.shape != shape: | |
raise TypeError("Boolean indexing array has incompatible shape") | |
return PointSelection.from_mask(arg) | |
elif isinstance(arg, h5r.RegionReference): | |
if dataset is None: | |
raise TypeError("Cannot apply a region reference without a dataset") | |
sid = h5r.get_region(arg, dataset.id) | |
if shape != sid.shape: | |
raise TypeError("Reference shape does not match dataset shape") | |
return Selection(shape, spaceid=sid) | |
if dataset is not None: | |
selector = dataset._selector | |
else: | |
space = h5s.create_simple(shape) | |
selector = _selector.Selector(space) | |
return selector.make_selection(args) | |
class Selection: | |
""" | |
Base class for HDF5 dataspace selections. Subclasses support the | |
"selection protocol", which means they have at least the following | |
members: | |
__init__(shape) => Create a new selection on "shape"-tuple | |
__getitem__(args) => Perform a selection with the range specified. | |
What args are allowed depends on the | |
particular subclass in use. | |
id (read-only) => h5py.h5s.SpaceID instance | |
shape (read-only) => The shape of the dataspace. | |
mshape (read-only) => The shape of the selection region. | |
Not guaranteed to fit within "shape", although | |
the total number of points is less than | |
product(shape). | |
nselect (read-only) => Number of selected points. Always equal to | |
product(mshape). | |
broadcast(target_shape) => Return an iterable which yields dataspaces | |
for read, based on target_shape. | |
The base class represents "unshaped" selections (1-D). | |
""" | |
def __init__(self, shape, spaceid=None): | |
""" Create a selection. Shape may be None if spaceid is given. """ | |
if spaceid is not None: | |
self._id = spaceid | |
self._shape = spaceid.shape | |
else: | |
shape = tuple(shape) | |
self._shape = shape | |
self._id = h5s.create_simple(shape, (h5s.UNLIMITED,)*len(shape)) | |
self._id.select_all() | |
def id(self): | |
""" SpaceID instance """ | |
return self._id | |
def shape(self): | |
""" Shape of whole dataspace """ | |
return self._shape | |
def nselect(self): | |
""" Number of elements currently selected """ | |
return self._id.get_select_npoints() | |
def mshape(self): | |
""" Shape of selection (always 1-D for this class) """ | |
return (self.nselect,) | |
def array_shape(self): | |
"""Shape of array to read/write (always 1-D for this class)""" | |
return self.mshape | |
# expand_shape and broadcast only really make sense for SimpleSelection | |
def expand_shape(self, source_shape): | |
if product(source_shape) != self.nselect: | |
raise TypeError("Broadcasting is not supported for point-wise selections") | |
return source_shape | |
def broadcast(self, source_shape): | |
""" Get an iterable for broadcasting """ | |
if product(source_shape) != self.nselect: | |
raise TypeError("Broadcasting is not supported for point-wise selections") | |
yield self._id | |
def __getitem__(self, args): | |
raise NotImplementedError("This class does not support indexing") | |
class PointSelection(Selection): | |
""" | |
Represents a point-wise selection. You can supply sequences of | |
points to the three methods append(), prepend() and set(), or | |
instantiate it with a single boolean array using from_mask(). | |
""" | |
def __init__(self, shape, spaceid=None, points=None): | |
super().__init__(shape, spaceid) | |
if points is not None: | |
self._perform_selection(points, h5s.SELECT_SET) | |
def _perform_selection(self, points, op): | |
""" Internal method which actually performs the selection """ | |
points = np.asarray(points, order='C', dtype='u8') | |
if len(points.shape) == 1: | |
points.shape = (1,points.shape[0]) | |
if self._id.get_select_type() != h5s.SEL_POINTS: | |
op = h5s.SELECT_SET | |
if len(points) == 0: | |
self._id.select_none() | |
else: | |
self._id.select_elements(points, op) | |
def from_mask(cls, mask, spaceid=None): | |
"""Create a point-wise selection from a NumPy boolean array """ | |
if not (isinstance(mask, np.ndarray) and mask.dtype.kind == 'b'): | |
raise TypeError("PointSelection.from_mask only works with bool arrays") | |
points = np.transpose(mask.nonzero()) | |
return cls(mask.shape, spaceid, points=points) | |
def append(self, points): | |
""" Add the sequence of points to the end of the current selection """ | |
self._perform_selection(points, h5s.SELECT_APPEND) | |
def prepend(self, points): | |
""" Add the sequence of points to the beginning of the current selection """ | |
self._perform_selection(points, h5s.SELECT_PREPEND) | |
def set(self, points): | |
""" Replace the current selection with the given sequence of points""" | |
self._perform_selection(points, h5s.SELECT_SET) | |
class SimpleSelection(Selection): | |
""" A single "rectangular" (regular) selection composed of only slices | |
and integer arguments. Can participate in broadcasting. | |
""" | |
def mshape(self): | |
""" Shape of current selection """ | |
return self._sel[1] | |
def array_shape(self): | |
scalar = self._sel[3] | |
return tuple(x for x, s in zip(self.mshape, scalar) if not s) | |
def __init__(self, shape, spaceid=None, hyperslab=None): | |
super().__init__(shape, spaceid) | |
if hyperslab is not None: | |
self._sel = hyperslab | |
else: | |
# No hyperslab specified - select all | |
rank = len(self.shape) | |
self._sel = ((0,)*rank, self.shape, (1,)*rank, (False,)*rank) | |
def expand_shape(self, source_shape): | |
"""Match the dimensions of an array to be broadcast to the selection | |
The returned shape describes an array of the same size as the input | |
shape, but its dimensions | |
E.g. with a dataset shape (10, 5, 4, 2), writing like this:: | |
ds[..., 0] = np.ones((5, 4)) | |
The source shape (5, 4) will expand to (1, 5, 4, 1). | |
Then the broadcast method below repeats that chunk 10 | |
times to write to an effective shape of (10, 5, 4, 1). | |
""" | |
start, count, step, scalar = self._sel | |
rank = len(count) | |
remaining_src_dims = list(source_shape) | |
eshape = [] | |
for idx in range(1, rank + 1): | |
if len(remaining_src_dims) == 0 or scalar[-idx]: # Skip scalar axes | |
eshape.append(1) | |
else: | |
t = remaining_src_dims.pop() | |
if t == 1 or count[-idx] == t: | |
eshape.append(t) | |
else: | |
raise TypeError("Can't broadcast %s -> %s" % (source_shape, self.array_shape)) # array shape | |
if any([n > 1 for n in remaining_src_dims]): | |
# All dimensions from target_shape should either have been popped | |
# to match the selection shape, or be 1. | |
raise TypeError("Can't broadcast %s -> %s" % (source_shape, self.array_shape)) # array shape | |
# We have built eshape backwards, so now reverse it | |
return tuple(eshape[::-1]) | |
def broadcast(self, source_shape): | |
""" Return an iterator over target dataspaces for broadcasting. | |
Follows the standard NumPy broadcasting rules against the current | |
selection shape (self.mshape). | |
""" | |
if self.shape == (): | |
if product(source_shape) != 1: | |
raise TypeError("Can't broadcast %s to scalar" % source_shape) | |
self._id.select_all() | |
yield self._id | |
return | |
start, count, step, scalar = self._sel | |
rank = len(count) | |
tshape = self.expand_shape(source_shape) | |
chunks = tuple(x//y for x, y in zip(count, tshape)) | |
nchunks = product(chunks) | |
if nchunks == 1: | |
yield self._id | |
else: | |
sid = self._id.copy() | |
sid.select_hyperslab((0,)*rank, tshape, step) | |
for idx in range(nchunks): | |
offset = tuple(x*y*z + s for x, y, z, s in zip(np.unravel_index(idx, chunks), tshape, step, start)) | |
sid.offset_simple(offset) | |
yield sid | |
class FancySelection(Selection): | |
""" | |
Implements advanced NumPy-style selection operations in addition to | |
the standard slice-and-int behavior. | |
Indexing arguments may be ints, slices, lists of indices, or | |
per-axis (1D) boolean arrays. | |
Broadcasting is not supported for these selections. | |
""" | |
def mshape(self): | |
return self._mshape | |
def array_shape(self): | |
return self._array_shape | |
def __init__(self, shape, spaceid=None, mshape=None, array_shape=None): | |
super().__init__(shape, spaceid) | |
if mshape is None: | |
mshape = self.shape | |
if array_shape is None: | |
array_shape = mshape | |
self._mshape = mshape | |
self._array_shape = array_shape | |
def expand_shape(self, source_shape): | |
if not source_shape == self.array_shape: | |
raise TypeError("Broadcasting is not supported for complex selections") | |
return source_shape | |
def broadcast(self, source_shape): | |
if not source_shape == self.array_shape: | |
raise TypeError("Broadcasting is not supported for complex selections") | |
yield self._id | |
def guess_shape(sid): | |
""" Given a dataspace, try to deduce the shape of the selection. | |
Returns one of: | |
* A tuple with the selection shape, same length as the dataspace | |
* A 1D selection shape for point-based and multiple-hyperslab selections | |
* None, for unselected scalars and for NULL dataspaces | |
""" | |
sel_class = sid.get_simple_extent_type() # Dataspace class | |
sel_type = sid.get_select_type() # Flavor of selection in use | |
if sel_class == h5s.NULL: | |
# NULL dataspaces don't support selections | |
return None | |
elif sel_class == h5s.SCALAR: | |
# NumPy has no way of expressing empty 0-rank selections, so we use None | |
if sel_type == h5s.SEL_NONE: return None | |
if sel_type == h5s.SEL_ALL: return tuple() | |
elif sel_class != h5s.SIMPLE: | |
raise TypeError("Unrecognized dataspace class %s" % sel_class) | |
# We have a "simple" (rank >= 1) dataspace | |
N = sid.get_select_npoints() | |
rank = len(sid.shape) | |
if sel_type == h5s.SEL_NONE: | |
return (0,)*rank | |
elif sel_type == h5s.SEL_ALL: | |
return sid.shape | |
elif sel_type == h5s.SEL_POINTS: | |
# Like NumPy, point-based selections yield 1D arrays regardless of | |
# the dataspace rank | |
return (N,) | |
elif sel_type != h5s.SEL_HYPERSLABS: | |
raise TypeError("Unrecognized selection method %s" % sel_type) | |
# We have a hyperslab-based selection | |
if N == 0: | |
return (0,)*rank | |
bottomcorner, topcorner = (np.array(x) for x in sid.get_select_bounds()) | |
# Shape of full selection box | |
boxshape = topcorner - bottomcorner + np.ones((rank,)) | |
def get_n_axis(sid, axis): | |
""" Determine the number of elements selected along a particular axis. | |
To do this, we "mask off" the axis by making a hyperslab selection | |
which leaves only the first point along the axis. For a 2D dataset | |
with selection box shape (X, Y), for axis 1, this would leave a | |
selection of shape (X, 1). We count the number of points N_leftover | |
remaining in the selection and compute the axis selection length by | |
N_axis = N/N_leftover. | |
""" | |
if(boxshape[axis]) == 1: | |
return 1 | |
start = bottomcorner.copy() | |
start[axis] += 1 | |
count = boxshape.copy() | |
count[axis] -= 1 | |
# Throw away all points along this axis | |
masked_sid = sid.copy() | |
masked_sid.select_hyperslab(tuple(start), tuple(count), op=h5s.SELECT_NOTB) | |
N_leftover = masked_sid.get_select_npoints() | |
return N//N_leftover | |
shape = tuple(get_n_axis(sid, x) for x in range(rank)) | |
if product(shape) != N: | |
# This means multiple hyperslab selections are in effect, | |
# so we fall back to a 1D shape | |
return (N,) | |
return shape | |