File size: 4,509 Bytes
122d3ff
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import h5py
from .common import TestCase


def is_aligned(dataset, offset=4096):
    # Here we check if the dataset is aligned
    return dataset.id.get_offset() % offset == 0


def dataset_name(i):
    return f"data{i:03}"


class TestFileAlignment(TestCase):
    """

        Ensure that setting the file alignment has the desired effect

        in the internal structure.

    """
    def test_no_alignment_set(self):
        fname = self.mktemp()
        # 881 is a prime number, so hopefully this help randomize the alignment
        # enough
        # A nice even number might give a pathological case where
        # While we don't want the data to be aligned, it ends up aligned...
        shape = (881,)

        with h5py.File(fname, 'w') as h5file:
            # Create up to 1000 datasets
            # At least one of them should be misaligned.
            # While this isn't perfect, it seems that there
            # The case where 1000 datasets get created is one where the data
            # is aligned. Therefore, during correct operation, this test is
            # expected to finish quickly
            for i in range(1000):
                dataset = h5file.create_dataset(
                    dataset_name(i), shape, dtype='uint8')
                # Assign data so that the dataset is instantiated in
                # the file
                dataset[...] = i
                if not is_aligned(dataset):
                    # Break early asserting that the file is not aligned
                    break
            else:
                raise RuntimeError("Data was all found to be aligned to 4096")

    def test_alignment_set_above_threshold(self):
        # 2022/01/19 hmaarrfk
        # UnitTest (TestCase) doesn't play well with pytest parametrization.
        alignment_threshold = 1000
        alignment_interval = 4096

        for shape in [
            (1033,),  # A prime number above the threshold
            (1000,),  # Exactly equal to the threshold
            (1001,),  # one above the threshold
        ]:
            fname = self.mktemp()
            with h5py.File(fname, 'w',
                           alignment_threshold=alignment_threshold,
                           alignment_interval=alignment_interval) as h5file:
                # Create up to 1000 datasets
                # They are all expected to be aligned
                for i in range(1000):
                    dataset = h5file.create_dataset(
                        dataset_name(i), shape, dtype='uint8')
                    # Assign data so that the dataset is instantiated in
                    # the file
                    dataset[...] = (i % 256)  # Truncate to uint8
                    assert is_aligned(dataset, offset=alignment_interval)

    def test_alignment_set_below_threshold(self):
        # 2022/01/19 hmaarrfk
        # UnitTest (TestCase) doesn't play well with pytest parametrization.
        alignment_threshold = 1000
        alignment_interval = 1024

        for shape in [
            (881,),  # A prime number below the threshold
            (999,),  # Exactly one below the threshold
        ]:
            fname = self.mktemp()
            with h5py.File(fname, 'w',
                           alignment_threshold=alignment_threshold,
                           alignment_interval=alignment_interval) as h5file:
                # Create up to 1000 datasets
                # At least one of them should be misaligned.
                # While this isn't perfect, it seems that there
                # The case where 1000 datasets get created is one where the
                # data is aligned. Therefore, during correct operation, this
                # test is expected to finish quickly
                for i in range(1000):
                    dataset = h5file.create_dataset(
                        dataset_name(i), shape, dtype='uint8')
                    # Assign data so that the dataset is instantiated in
                    # the file
                    dataset[...] = i
                    if not is_aligned(dataset, offset=alignment_interval):
                        # Break early asserting that the file is not aligned
                        break
                else:
                    raise RuntimeError(
                        "Data was all found to be aligned to "
                        f"{alignment_interval}. This is highly unlikely.")