Spaces:
Running
Running
import h5py | |
from .common import TestCase | |
def is_aligned(dataset, offset=4096): | |
# Here we check if the dataset is aligned | |
return dataset.id.get_offset() % offset == 0 | |
def dataset_name(i): | |
return f"data{i:03}" | |
class TestFileAlignment(TestCase): | |
""" | |
Ensure that setting the file alignment has the desired effect | |
in the internal structure. | |
""" | |
def test_no_alignment_set(self): | |
fname = self.mktemp() | |
# 881 is a prime number, so hopefully this help randomize the alignment | |
# enough | |
# A nice even number might give a pathological case where | |
# While we don't want the data to be aligned, it ends up aligned... | |
shape = (881,) | |
with h5py.File(fname, 'w') as h5file: | |
# Create up to 1000 datasets | |
# At least one of them should be misaligned. | |
# While this isn't perfect, it seems that there | |
# The case where 1000 datasets get created is one where the data | |
# is aligned. Therefore, during correct operation, this test is | |
# expected to finish quickly | |
for i in range(1000): | |
dataset = h5file.create_dataset( | |
dataset_name(i), shape, dtype='uint8') | |
# Assign data so that the dataset is instantiated in | |
# the file | |
dataset[...] = i | |
if not is_aligned(dataset): | |
# Break early asserting that the file is not aligned | |
break | |
else: | |
raise RuntimeError("Data was all found to be aligned to 4096") | |
def test_alignment_set_above_threshold(self): | |
# 2022/01/19 hmaarrfk | |
# UnitTest (TestCase) doesn't play well with pytest parametrization. | |
alignment_threshold = 1000 | |
alignment_interval = 4096 | |
for shape in [ | |
(1033,), # A prime number above the threshold | |
(1000,), # Exactly equal to the threshold | |
(1001,), # one above the threshold | |
]: | |
fname = self.mktemp() | |
with h5py.File(fname, 'w', | |
alignment_threshold=alignment_threshold, | |
alignment_interval=alignment_interval) as h5file: | |
# Create up to 1000 datasets | |
# They are all expected to be aligned | |
for i in range(1000): | |
dataset = h5file.create_dataset( | |
dataset_name(i), shape, dtype='uint8') | |
# Assign data so that the dataset is instantiated in | |
# the file | |
dataset[...] = (i % 256) # Truncate to uint8 | |
assert is_aligned(dataset, offset=alignment_interval) | |
def test_alignment_set_below_threshold(self): | |
# 2022/01/19 hmaarrfk | |
# UnitTest (TestCase) doesn't play well with pytest parametrization. | |
alignment_threshold = 1000 | |
alignment_interval = 1024 | |
for shape in [ | |
(881,), # A prime number below the threshold | |
(999,), # Exactly one below the threshold | |
]: | |
fname = self.mktemp() | |
with h5py.File(fname, 'w', | |
alignment_threshold=alignment_threshold, | |
alignment_interval=alignment_interval) as h5file: | |
# Create up to 1000 datasets | |
# At least one of them should be misaligned. | |
# While this isn't perfect, it seems that there | |
# The case where 1000 datasets get created is one where the | |
# data is aligned. Therefore, during correct operation, this | |
# test is expected to finish quickly | |
for i in range(1000): | |
dataset = h5file.create_dataset( | |
dataset_name(i), shape, dtype='uint8') | |
# Assign data so that the dataset is instantiated in | |
# the file | |
dataset[...] = i | |
if not is_aligned(dataset, offset=alignment_interval): | |
# Break early asserting that the file is not aligned | |
break | |
else: | |
raise RuntimeError( | |
"Data was all found to be aligned to " | |
f"{alignment_interval}. This is highly unlikely.") | |