|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
""" |
|
High-level interface for creating HDF5 virtual datasets |
|
""" |
|
|
|
from copy import deepcopy as copy |
|
from collections import namedtuple |
|
|
|
import numpy as np |
|
|
|
from .compat import filename_encode |
|
from .datatype import Datatype |
|
from .selections import SimpleSelection, select |
|
from .. import h5d, h5p, h5s, h5t, h5 |
|
from .. import version |
|
|
|
|
|
class VDSmap(namedtuple('VDSmap', ('vspace', 'file_name', |
|
'dset_name', 'src_space'))): |
|
'''Defines a region in a virtual dataset mapping to part of a source dataset |
|
''' |
|
|
|
|
|
vds_support = False |
|
hdf5_version = version.hdf5_version_tuple[0:3] |
|
|
|
if hdf5_version >= h5.get_config().vds_min_hdf5_version: |
|
vds_support = True |
|
|
|
|
|
def _convert_space_for_key(space, key): |
|
""" |
|
Converts the space with the given key. Mainly used to allow unlimited |
|
dimensions in virtual space selection. |
|
""" |
|
key = key if isinstance(key, tuple) else (key,) |
|
type_code = space.get_select_type() |
|
|
|
|
|
|
|
|
|
if type_code == h5s.SEL_HYPERSLABS and space.is_regular_hyperslab(): |
|
rank = space.get_simple_extent_ndims() |
|
nargs = len(key) |
|
|
|
idx_offset = 0 |
|
start, stride, count, block = space.get_regular_hyperslab() |
|
|
|
|
|
|
|
for i, sl in enumerate(key): |
|
if isinstance(sl, slice): |
|
if sl.stop == h5s.UNLIMITED: |
|
counts = list(count) |
|
idx = i + idx_offset |
|
counts[idx] = h5s.UNLIMITED |
|
count = tuple(counts) |
|
elif sl is Ellipsis: |
|
idx_offset = rank - nargs |
|
|
|
space.select_hyperslab(start, count, stride, block) |
|
|
|
|
|
class VirtualSource: |
|
"""Source definition for virtual data sets. |
|
|
|
Instantiate this class to represent an entire source dataset, and then |
|
slice it to indicate which regions should be used in the virtual dataset. |
|
|
|
path_or_dataset |
|
The path to a file, or an h5py dataset. If a dataset is given, |
|
no other parameters are allowed, as the relevant values are taken from |
|
the dataset instead. |
|
name |
|
The name of the source dataset within the file. |
|
shape |
|
A tuple giving the shape of the dataset. |
|
dtype |
|
Numpy dtype or string. |
|
maxshape |
|
The source dataset is resizable up to this shape. Use None for |
|
axes you want to be unlimited. |
|
""" |
|
def __init__(self, path_or_dataset, name=None, |
|
shape=None, dtype=None, maxshape=None): |
|
from .dataset import Dataset |
|
if isinstance(path_or_dataset, Dataset): |
|
failed = {k: v |
|
for k, v in |
|
{'name': name, 'shape': shape, |
|
'dtype': dtype, 'maxshape': maxshape}.items() |
|
if v is not None} |
|
if failed: |
|
raise TypeError("If a Dataset is passed as the first argument " |
|
"then no other arguments may be passed. You " |
|
"passed {failed}".format(failed=failed)) |
|
ds = path_or_dataset |
|
path = ds.file.filename |
|
name = ds.name |
|
shape = ds.shape |
|
dtype = ds.dtype |
|
maxshape = ds.maxshape |
|
else: |
|
path = path_or_dataset |
|
if name is None: |
|
raise TypeError("The name parameter is required when " |
|
"specifying a source by path") |
|
if shape is None: |
|
raise TypeError("The shape parameter is required when " |
|
"specifying a source by path") |
|
elif isinstance(shape, int): |
|
shape = (shape,) |
|
|
|
if isinstance(maxshape, int): |
|
maxshape = (maxshape,) |
|
|
|
self.path = path |
|
self.name = name |
|
self.dtype = dtype |
|
|
|
if maxshape is None: |
|
self.maxshape = shape |
|
else: |
|
self.maxshape = tuple([h5s.UNLIMITED if ix is None else ix |
|
for ix in maxshape]) |
|
self.sel = SimpleSelection(shape) |
|
|
|
@property |
|
def shape(self): |
|
return self.sel.array_shape |
|
|
|
def __getitem__(self, key): |
|
tmp = copy(self) |
|
tmp.sel = select(self.shape, key, dataset=None) |
|
_convert_space_for_key(tmp.sel.id, key) |
|
return tmp |
|
|
|
class VirtualLayout: |
|
"""Object for building a virtual dataset. |
|
|
|
Instantiate this class to define a virtual dataset, assign to slices of it |
|
(using VirtualSource objects), and then pass it to |
|
group.create_virtual_dataset() to add the virtual dataset to a file. |
|
|
|
This class does not allow access to the data; the virtual dataset must |
|
be created in a file before it can be used. |
|
|
|
shape |
|
A tuple giving the shape of the dataset. |
|
dtype |
|
Numpy dtype or string. |
|
maxshape |
|
The virtual dataset is resizable up to this shape. Use None for |
|
axes you want to be unlimited. |
|
filename |
|
The name of the destination file, if known in advance. Mappings from |
|
data in the same file will be stored with filename '.', allowing the |
|
file to be renamed later. |
|
""" |
|
def __init__(self, shape, dtype, maxshape=None, filename=None): |
|
self.shape = (shape,) if isinstance(shape, int) else shape |
|
self.dtype = dtype |
|
self.maxshape = (maxshape,) if isinstance(maxshape, int) else maxshape |
|
self._filename = filename |
|
self._src_filenames = set() |
|
self.dcpl = h5p.create(h5p.DATASET_CREATE) |
|
|
|
def __setitem__(self, key, source): |
|
sel = select(self.shape, key, dataset=None) |
|
_convert_space_for_key(sel.id, key) |
|
src_filename = self._source_file_name(source.path, self._filename) |
|
|
|
self.dcpl.set_virtual( |
|
sel.id, src_filename, source.name.encode('utf-8'), source.sel.id |
|
) |
|
if self._filename is None: |
|
self._src_filenames.add(src_filename) |
|
|
|
@staticmethod |
|
def _source_file_name(src_filename, dst_filename) -> bytes: |
|
src_filename = filename_encode(src_filename) |
|
if dst_filename and (src_filename == filename_encode(dst_filename)): |
|
|
|
|
|
|
|
return b'.' |
|
return filename_encode(src_filename) |
|
|
|
def _get_dcpl(self, dst_filename): |
|
"""Get the property list containing virtual dataset mappings |
|
|
|
If the destination filename wasn't known when the VirtualLayout was |
|
created, it is handled here. |
|
""" |
|
dst_filename = filename_encode(dst_filename) |
|
if self._filename is not None: |
|
|
|
if dst_filename != filename_encode(self._filename): |
|
raise Exception(f"{dst_filename!r} != {self._filename!r}") |
|
return self.dcpl |
|
|
|
|
|
if dst_filename in self._src_filenames: |
|
|
|
|
|
|
|
new_dcpl = h5p.create(h5p.DATASET_CREATE) |
|
for i in range(self.dcpl.get_virtual_count()): |
|
src_filename = self.dcpl.get_virtual_filename(i) |
|
new_dcpl.set_virtual( |
|
self.dcpl.get_virtual_vspace(i), |
|
self._source_file_name(src_filename, dst_filename), |
|
self.dcpl.get_virtual_dsetname(i).encode('utf-8'), |
|
self.dcpl.get_virtual_srcspace(i), |
|
) |
|
return new_dcpl |
|
else: |
|
return self.dcpl |
|
|
|
def make_dataset(self, parent, name, fillvalue=None): |
|
""" Return a new low-level dataset identifier for a virtual dataset """ |
|
dcpl = self._get_dcpl(parent.file.filename) |
|
|
|
if fillvalue is not None: |
|
dcpl.set_fill_value(np.array([fillvalue])) |
|
|
|
maxshape = self.maxshape |
|
if maxshape is not None: |
|
maxshape = tuple(m if m is not None else h5s.UNLIMITED for m in maxshape) |
|
|
|
virt_dspace = h5s.create_simple(self.shape, maxshape) |
|
|
|
if isinstance(self.dtype, Datatype): |
|
|
|
tid = self.dtype.id |
|
else: |
|
dtype = np.dtype(self.dtype) |
|
tid = h5t.py_create(dtype, logical=1) |
|
|
|
return h5d.create(parent.id, name=name, tid=tid, space=virt_dspace, |
|
dcpl=dcpl) |
|
|