File size: 4,213 Bytes
7885a28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
from scipy.datasets._registry import registry
from scipy.datasets._fetchers import data_fetcher
from scipy.datasets._utils import _clear_cache
from scipy.datasets import ascent, face, electrocardiogram, download_all
from numpy.testing import assert_equal, assert_almost_equal
import os
from threading import get_ident
import pytest

try:
    import pooch
except ImportError:
    raise ImportError("Missing optional dependency 'pooch' required "
                      "for scipy.datasets module. Please use pip or "
                      "conda to install 'pooch'.")


data_dir = data_fetcher.path  # type: ignore


def _has_hash(path, expected_hash):
    """Check if the provided path has the expected hash."""
    if not os.path.exists(path):
        return False
    return pooch.file_hash(path) == expected_hash


class TestDatasets:

    @pytest.fixture(scope='module', autouse=True)
    def test_download_all(self):
        # This fixture requires INTERNET CONNECTION

        # test_setup phase
        download_all()

        yield

    @pytest.mark.fail_slow(10)
    def test_existence_all(self):
        assert len(os.listdir(data_dir)) >= len(registry)

    def test_ascent(self):
        assert_equal(ascent().shape, (512, 512))

        # hash check
        assert _has_hash(os.path.join(data_dir, "ascent.dat"),
                         registry["ascent.dat"])

    def test_face(self):
        assert_equal(face().shape, (768, 1024, 3))

        # hash check
        assert _has_hash(os.path.join(data_dir, "face.dat"),
                         registry["face.dat"])

    def test_electrocardiogram(self):
        # Test shape, dtype and stats of signal
        ecg = electrocardiogram()
        assert_equal(ecg.dtype, float)
        assert_equal(ecg.shape, (108000,))
        assert_almost_equal(ecg.mean(), -0.16510875)
        assert_almost_equal(ecg.std(), 0.5992473991177294)

        # hash check
        assert _has_hash(os.path.join(data_dir, "ecg.dat"),
                         registry["ecg.dat"])


def test_clear_cache(tmp_path):
    # Note: `tmp_path` is a pytest fixture, it handles cleanup
    thread_basepath = tmp_path / str(get_ident())
    thread_basepath.mkdir()

    dummy_basepath = thread_basepath / "dummy_cache_dir"
    dummy_basepath.mkdir()

    # Create three dummy dataset files for dummy dataset methods
    dummy_method_map = {}
    for i in range(4):
        dummy_method_map[f"data{i}"] = [f"data{i}.dat"]
        data_filepath = dummy_basepath / f"data{i}.dat"
        data_filepath.write_text("")

    # clear files associated to single dataset method data0
    # also test callable argument instead of list of callables
    def data0():
        pass
    _clear_cache(datasets=data0, cache_dir=dummy_basepath,
                 method_map=dummy_method_map)
    assert not os.path.exists(dummy_basepath/"data0.dat")

    # clear files associated to multiple dataset methods "data3" and "data4"
    def data1():
        pass

    def data2():
        pass
    _clear_cache(datasets=[data1, data2], cache_dir=dummy_basepath,
                 method_map=dummy_method_map)
    assert not os.path.exists(dummy_basepath/"data1.dat")
    assert not os.path.exists(dummy_basepath/"data2.dat")

    # clear multiple dataset files "data3_0.dat" and "data3_1.dat"
    # associated with dataset method "data3"
    def data4():
        pass
    # create files
    (dummy_basepath / "data4_0.dat").write_text("")
    (dummy_basepath / "data4_1.dat").write_text("")

    dummy_method_map["data4"] = ["data4_0.dat", "data4_1.dat"]
    _clear_cache(datasets=[data4], cache_dir=dummy_basepath,
                 method_map=dummy_method_map)
    assert not os.path.exists(dummy_basepath/"data4_0.dat")
    assert not os.path.exists(dummy_basepath/"data4_1.dat")

    # wrong dataset method should raise ValueError since it
    # doesn't exist in the dummy_method_map
    def data5():
        pass
    with pytest.raises(ValueError):
        _clear_cache(datasets=[data5], cache_dir=dummy_basepath,
                     method_map=dummy_method_map)

    # remove all dataset cache
    _clear_cache(datasets=None, cache_dir=dummy_basepath)
    assert not os.path.exists(dummy_basepath)