File size: 10,694 Bytes
096c926 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 |
# This file is part of h5py, a Python interface to the HDF5 library.
#
# http://www.h5py.org
#
# Copyright 2008-2013 Andrew Collette and contributors
#
# License: Standard 3-clause BSD; see "license.txt" for full license terms
# and contributor agreement.
"""
Implements high-level operations for attributes.
Provides the AttributeManager class, available on high-level objects
as <obj>.attrs.
"""
import numpy
import uuid
from .. import h5, h5s, h5t, h5a, h5p
from . import base
from .base import phil, with_phil, Empty, is_empty_dataspace, product
from .datatype import Datatype
class AttributeManager(base.MutableMappingHDF5, base.CommonStateObject):
"""
Allows dictionary-style access to an HDF5 object's attributes.
These are created exclusively by the library and are available as
a Python attribute at <object>.attrs
Like Group objects, attributes provide a minimal dictionary-
style interface. Anything which can be reasonably converted to a
Numpy array or Numpy scalar can be stored.
Attributes are automatically created on assignment with the
syntax <obj>.attrs[name] = value, with the HDF5 type automatically
deduced from the value. Existing attributes are overwritten.
To modify an existing attribute while preserving its type, use the
method modify(). To specify an attribute of a particular type and
shape, use create().
"""
def __init__(self, parent):
""" Private constructor.
"""
self._id = parent.id
@with_phil
def __getitem__(self, name):
""" Read the value of an attribute.
"""
attr = h5a.open(self._id, self._e(name))
shape = attr.shape
# shape is None for empty dataspaces
if shape is None:
return Empty(attr.dtype)
dtype = attr.dtype
# Do this first, as we'll be fiddling with the dtype for top-level
# array types
htype = h5t.py_create(dtype)
# NumPy doesn't support top-level array types, so we have to "fake"
# the correct type and shape for the array. For example, consider
# attr.shape == (5,) and attr.dtype == '(3,)f'. Then:
if dtype.subdtype is not None:
subdtype, subshape = dtype.subdtype
shape = attr.shape + subshape # (5, 3)
dtype = subdtype # 'f'
arr = numpy.zeros(shape, dtype=dtype, order='C')
attr.read(arr, mtype=htype)
string_info = h5t.check_string_dtype(dtype)
if string_info and (string_info.length is None):
# Vlen strings: convert bytes to Python str
arr = numpy.array([
b.decode('utf-8', 'surrogateescape') for b in arr.flat
], dtype=dtype).reshape(arr.shape)
if arr.ndim == 0:
return arr[()]
return arr
def get_id(self, name):
"""Get a low-level AttrID object for the named attribute.
"""
return h5a.open(self._id, self._e(name))
@with_phil
def __setitem__(self, name, value):
""" Set a new attribute, overwriting any existing attribute.
The type and shape of the attribute are determined from the data. To
use a specific type or shape, or to preserve the type of an attribute,
use the methods create() and modify().
"""
self.create(name, data=value)
@with_phil
def __delitem__(self, name):
""" Delete an attribute (which must already exist). """
h5a.delete(self._id, self._e(name))
def create(self, name, data, shape=None, dtype=None):
""" Create a new attribute, overwriting any existing attribute.
name
Name of the new attribute (required)
data
An array to initialize the attribute (required)
shape
Shape of the attribute. Overrides data.shape if both are
given, in which case the total number of points must be unchanged.
dtype
Data type of the attribute. Overrides data.dtype if both
are given.
"""
with phil:
# First, make sure we have a NumPy array. We leave the data type
# conversion for HDF5 to perform.
if not isinstance(data, Empty):
data = base.array_for_new_object(data, specified_dtype=dtype)
if shape is None:
shape = data.shape
elif isinstance(shape, int):
shape = (shape,)
use_htype = None # If a committed type is given, we must use it
# in the call to h5a.create.
if isinstance(dtype, Datatype):
use_htype = dtype.id
dtype = dtype.dtype
elif dtype is None:
dtype = data.dtype
else:
dtype = numpy.dtype(dtype) # In case a string, e.g. 'i8' is passed
original_dtype = dtype # We'll need this for top-level array types
# Where a top-level array type is requested, we have to do some
# fiddling around to present the data as a smaller array of
# subarrays.
if dtype.subdtype is not None:
subdtype, subshape = dtype.subdtype
# Make sure the subshape matches the last N axes' sizes.
if shape[-len(subshape):] != subshape:
raise ValueError("Array dtype shape %s is incompatible with data shape %s" % (subshape, shape))
# New "advertised" shape and dtype
shape = shape[0:len(shape)-len(subshape)]
dtype = subdtype
# Not an array type; make sure to check the number of elements
# is compatible, and reshape if needed.
else:
if shape is not None and numpy.product(shape, dtype=numpy.ulonglong) != numpy.product(data.shape, dtype=numpy.ulonglong):
raise ValueError("Shape of new attribute conflicts with shape of data")
if shape != data.shape:
data = data.reshape(shape)
# We need this to handle special string types.
if not isinstance(data, Empty):
data = numpy.asarray(data, dtype=dtype)
# Make HDF5 datatype and dataspace for the H5A calls
if use_htype is None:
htype = h5t.py_create(original_dtype, logical=True)
htype2 = h5t.py_create(original_dtype) # Must be bit-for-bit representation rather than logical
else:
htype = use_htype
htype2 = None
if isinstance(data, Empty):
space = h5s.create(h5s.NULL)
else:
space = h5s.create_simple(shape)
# This mess exists because you can't overwrite attributes in HDF5.
# So we write to a temporary attribute first, and then rename.
# see issue 1385
# if track_order is enabled new attributes (which exceed the
# max_compact range, 8 is default) cannot be created as temporary
# attributes with subsequent rename, doing that would trigger
# the error discussed in the above issue
attr_exists = False
if h5a.exists(self._id, self._e(name)):
attr_exists = True
tempname = uuid.uuid4().hex
else:
tempname = name
attr = h5a.create(self._id, self._e(tempname), htype, space)
try:
if not isinstance(data, Empty):
attr.write(data, mtype=htype2)
if attr_exists:
# Rename temp attribute to proper name
# No atomic rename in HDF5 :(
h5a.delete(self._id, self._e(name))
h5a.rename(self._id, self._e(tempname), self._e(name))
except:
attr.close()
h5a.delete(self._id, self._e(tempname))
raise
finally:
attr.close()
def modify(self, name, value):
""" Change the value of an attribute while preserving its type.
Differs from __setitem__ in that if the attribute already exists, its
type is preserved. This can be very useful for interacting with
externally generated files.
If the attribute doesn't exist, it will be automatically created.
"""
with phil:
if not name in self:
self[name] = value
else:
attr = h5a.open(self._id, self._e(name))
if is_empty_dataspace(attr):
raise OSError("Empty attributes can't be modified")
# If the input data is already an array, let HDF5 do the conversion.
# If it's a list or similar, don't make numpy guess a dtype for it.
dt = None if isinstance(value, numpy.ndarray) else attr.dtype
value = numpy.asarray(value, order='C', dtype=dt)
# Allow the case of () <-> (1,)
if (value.shape != attr.shape) and not \
(value.size == 1 and product(attr.shape) == 1):
raise TypeError("Shape of data is incompatible with existing attribute")
attr.write(value)
@with_phil
def __len__(self):
""" Number of attributes attached to the object. """
# I expect we will not have more than 2**32 attributes
return h5a.get_num_attrs(self._id)
def __iter__(self):
""" Iterate over the names of attributes. """
with phil:
attrlist = []
def iter_cb(name, *args):
""" Callback to gather attribute names """
attrlist.append(self._d(name))
cpl = self._id.get_create_plist()
crt_order = cpl.get_attr_creation_order()
cpl.close()
if crt_order & h5p.CRT_ORDER_TRACKED:
idx_type = h5.INDEX_CRT_ORDER
else:
idx_type = h5.INDEX_NAME
h5a.iterate(self._id, iter_cb, index_type=idx_type)
for name in attrlist:
yield name
@with_phil
def __contains__(self, name):
""" Determine if an attribute exists, by name. """
return h5a.exists(self._id, self._e(name))
@with_phil
def __repr__(self):
if not self._id:
return "<Attributes of closed HDF5 object>"
return "<Attributes of HDF5 object at %s>" % id(self._id)
|