File size: 7,623 Bytes
7885a28 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 |
import numpy as np
from numpy.testing import assert_array_almost_equal, assert_, assert_array_equal
from scipy.sparse import csr_matrix, csc_matrix, csr_array, csc_array, hstack
from scipy import sparse
import pytest
def _check_csr_rowslice(i, sl, X, Xcsr):
np_slice = X[i, sl]
csr_slice = Xcsr[i, sl]
assert_array_almost_equal(np_slice, csr_slice.toarray()[0])
assert_(type(csr_slice) is csr_matrix)
def test_csr_rowslice():
N = 10
np.random.seed(0)
X = np.random.random((N, N))
X[X > 0.7] = 0
Xcsr = csr_matrix(X)
slices = [slice(None, None, None),
slice(None, None, -1),
slice(1, -2, 2),
slice(-2, 1, -2)]
for i in range(N):
for sl in slices:
_check_csr_rowslice(i, sl, X, Xcsr)
def test_csr_getrow():
N = 10
np.random.seed(0)
X = np.random.random((N, N))
X[X > 0.7] = 0
Xcsr = csr_matrix(X)
for i in range(N):
arr_row = X[i:i + 1, :]
csr_row = Xcsr.getrow(i)
assert_array_almost_equal(arr_row, csr_row.toarray())
assert_(type(csr_row) is csr_matrix)
def test_csr_getcol():
N = 10
np.random.seed(0)
X = np.random.random((N, N))
X[X > 0.7] = 0
Xcsr = csr_matrix(X)
for i in range(N):
arr_col = X[:, i:i + 1]
csr_col = Xcsr.getcol(i)
assert_array_almost_equal(arr_col, csr_col.toarray())
assert_(type(csr_col) is csr_matrix)
@pytest.mark.parametrize("matrix_input, axis, expected_shape",
[(csr_matrix([[1, 0, 0, 0],
[0, 0, 0, 0],
[0, 2, 3, 0]]),
0, (0, 4)),
(csr_matrix([[1, 0, 0, 0],
[0, 0, 0, 0],
[0, 2, 3, 0]]),
1, (3, 0)),
(csr_matrix([[1, 0, 0, 0],
[0, 0, 0, 0],
[0, 2, 3, 0]]),
'both', (0, 0)),
(csr_matrix([[0, 1, 0, 0, 0],
[0, 0, 0, 0, 0],
[0, 0, 2, 3, 0]]),
0, (0, 5))])
def test_csr_empty_slices(matrix_input, axis, expected_shape):
# see gh-11127 for related discussion
slice_1 = matrix_input.toarray().shape[0] - 1
slice_2 = slice_1
slice_3 = slice_2 - 1
if axis == 0:
actual_shape_1 = matrix_input[slice_1:slice_2, :].toarray().shape
actual_shape_2 = matrix_input[slice_1:slice_3, :].toarray().shape
elif axis == 1:
actual_shape_1 = matrix_input[:, slice_1:slice_2].toarray().shape
actual_shape_2 = matrix_input[:, slice_1:slice_3].toarray().shape
elif axis == 'both':
actual_shape_1 = matrix_input[slice_1:slice_2, slice_1:slice_2].toarray().shape
actual_shape_2 = matrix_input[slice_1:slice_3, slice_1:slice_3].toarray().shape
assert actual_shape_1 == expected_shape
assert actual_shape_1 == actual_shape_2
def test_csr_bool_indexing():
data = csr_matrix([[0, 1, 2], [3, 4, 5], [6, 7, 8]])
list_indices1 = [False, True, False]
array_indices1 = np.array(list_indices1)
list_indices2 = [[False, True, False], [False, True, False], [False, True, False]]
array_indices2 = np.array(list_indices2)
list_indices3 = ([False, True, False], [False, True, False])
array_indices3 = (np.array(list_indices3[0]), np.array(list_indices3[1]))
slice_list1 = data[list_indices1].toarray()
slice_array1 = data[array_indices1].toarray()
slice_list2 = data[list_indices2]
slice_array2 = data[array_indices2]
slice_list3 = data[list_indices3]
slice_array3 = data[array_indices3]
assert (slice_list1 == slice_array1).all()
assert (slice_list2 == slice_array2).all()
assert (slice_list3 == slice_array3).all()
def test_csr_hstack_int64():
"""
Tests if hstack properly promotes to indices and indptr arrays to np.int64
when using np.int32 during concatenation would result in either array
overflowing.
"""
max_int32 = np.iinfo(np.int32).max
# First case: indices would overflow with int32
data = [1.0]
row = [0]
max_indices_1 = max_int32 - 1
max_indices_2 = 3
# Individual indices arrays are representable with int32
col_1 = [max_indices_1 - 1]
col_2 = [max_indices_2 - 1]
X_1 = csr_matrix((data, (row, col_1)))
X_2 = csr_matrix((data, (row, col_2)))
assert max(max_indices_1 - 1, max_indices_2 - 1) < max_int32
assert X_1.indices.dtype == X_1.indptr.dtype == np.int32
assert X_2.indices.dtype == X_2.indptr.dtype == np.int32
# ... but when concatenating their CSR matrices, the resulting indices
# array can't be represented with int32 and must be promoted to int64.
X_hs = hstack([X_1, X_2], format="csr")
assert X_hs.indices.max() == max_indices_1 + max_indices_2 - 1
assert max_indices_1 + max_indices_2 - 1 > max_int32
assert X_hs.indices.dtype == X_hs.indptr.dtype == np.int64
# Even if the matrices are empty, we must account for their size
# contribution so that we may safely set the final elements.
X_1_empty = csr_matrix(X_1.shape)
X_2_empty = csr_matrix(X_2.shape)
X_hs_empty = hstack([X_1_empty, X_2_empty], format="csr")
assert X_hs_empty.shape == X_hs.shape
assert X_hs_empty.indices.dtype == np.int64
# Should be just small enough to stay in int32 after stack. Note that
# we theoretically could support indices.max() == max_int32, but due to an
# edge-case in the underlying sparsetools code
# (namely the `coo_tocsr` routine),
# we require that max(X_hs_32.shape) < max_int32 as well.
# Hence we can only support max_int32 - 1.
col_3 = [max_int32 - max_indices_1 - 1]
X_3 = csr_matrix((data, (row, col_3)))
X_hs_32 = hstack([X_1, X_3], format="csr")
assert X_hs_32.indices.dtype == np.int32
assert X_hs_32.indices.max() == max_int32 - 1
@pytest.mark.parametrize("cls", [csr_matrix, csr_array, csc_matrix, csc_array])
def test_mixed_index_dtype_int_indexing(cls):
# https://github.com/scipy/scipy/issues/20182
rng = np.random.default_rng(0)
base_mtx = cls(sparse.random(50, 50, random_state=rng, density=0.1))
indptr_64bit = base_mtx.copy()
indices_64bit = base_mtx.copy()
indptr_64bit.indptr = base_mtx.indptr.astype(np.int64)
indices_64bit.indices = base_mtx.indices.astype(np.int64)
for mtx in [base_mtx, indptr_64bit, indices_64bit]:
np.testing.assert_array_equal(
mtx[[1,2], :].toarray(),
base_mtx[[1, 2], :].toarray()
)
np.testing.assert_array_equal(
mtx[:, [1, 2]].toarray(),
base_mtx[:, [1, 2]].toarray()
)
def test_broadcast_to():
a = np.array([1, 0, 2])
b = np.array([3])
e = np.zeros((0,))
res_a = csr_array(a)._broadcast_to((2,3))
res_b = csr_array(b)._broadcast_to((4,))
res_c = csr_array(b)._broadcast_to((2,4))
res_d = csr_array(b)._broadcast_to((1,))
res_e = csr_array(e)._broadcast_to((4,0))
assert_array_equal(res_a.toarray(), np.broadcast_to(a, (2,3)))
assert_array_equal(res_b.toarray(), np.broadcast_to(b, (4,)))
assert_array_equal(res_c.toarray(), np.broadcast_to(b, (2,4)))
assert_array_equal(res_d.toarray(), np.broadcast_to(b, (1,)))
assert_array_equal(res_e.toarray(), np.broadcast_to(e, (4,0)))
with pytest.raises(ValueError, match="cannot be broadcast"):
csr_matrix([[1, 2, 0], [3, 0, 1]])._broadcast_to(shape=(2, 1))
with pytest.raises(ValueError, match="cannot be broadcast"):
csr_matrix([[0, 1, 2]])._broadcast_to(shape=(3, 2))
with pytest.raises(ValueError, match="cannot be broadcast"):
csr_array([0, 1, 2])._broadcast_to(shape=(3, 2))
|