Sam Chaudry
Upload folder using huggingface_hub
7885a28 verified
raw
history blame
7.62 kB
import numpy as np
from numpy.testing import assert_array_almost_equal, assert_, assert_array_equal
from scipy.sparse import csr_matrix, csc_matrix, csr_array, csc_array, hstack
from scipy import sparse
import pytest
def _check_csr_rowslice(i, sl, X, Xcsr):
np_slice = X[i, sl]
csr_slice = Xcsr[i, sl]
assert_array_almost_equal(np_slice, csr_slice.toarray()[0])
assert_(type(csr_slice) is csr_matrix)
def test_csr_rowslice():
N = 10
np.random.seed(0)
X = np.random.random((N, N))
X[X > 0.7] = 0
Xcsr = csr_matrix(X)
slices = [slice(None, None, None),
slice(None, None, -1),
slice(1, -2, 2),
slice(-2, 1, -2)]
for i in range(N):
for sl in slices:
_check_csr_rowslice(i, sl, X, Xcsr)
def test_csr_getrow():
N = 10
np.random.seed(0)
X = np.random.random((N, N))
X[X > 0.7] = 0
Xcsr = csr_matrix(X)
for i in range(N):
arr_row = X[i:i + 1, :]
csr_row = Xcsr.getrow(i)
assert_array_almost_equal(arr_row, csr_row.toarray())
assert_(type(csr_row) is csr_matrix)
def test_csr_getcol():
N = 10
np.random.seed(0)
X = np.random.random((N, N))
X[X > 0.7] = 0
Xcsr = csr_matrix(X)
for i in range(N):
arr_col = X[:, i:i + 1]
csr_col = Xcsr.getcol(i)
assert_array_almost_equal(arr_col, csr_col.toarray())
assert_(type(csr_col) is csr_matrix)
@pytest.mark.parametrize("matrix_input, axis, expected_shape",
[(csr_matrix([[1, 0, 0, 0],
[0, 0, 0, 0],
[0, 2, 3, 0]]),
0, (0, 4)),
(csr_matrix([[1, 0, 0, 0],
[0, 0, 0, 0],
[0, 2, 3, 0]]),
1, (3, 0)),
(csr_matrix([[1, 0, 0, 0],
[0, 0, 0, 0],
[0, 2, 3, 0]]),
'both', (0, 0)),
(csr_matrix([[0, 1, 0, 0, 0],
[0, 0, 0, 0, 0],
[0, 0, 2, 3, 0]]),
0, (0, 5))])
def test_csr_empty_slices(matrix_input, axis, expected_shape):
# see gh-11127 for related discussion
slice_1 = matrix_input.toarray().shape[0] - 1
slice_2 = slice_1
slice_3 = slice_2 - 1
if axis == 0:
actual_shape_1 = matrix_input[slice_1:slice_2, :].toarray().shape
actual_shape_2 = matrix_input[slice_1:slice_3, :].toarray().shape
elif axis == 1:
actual_shape_1 = matrix_input[:, slice_1:slice_2].toarray().shape
actual_shape_2 = matrix_input[:, slice_1:slice_3].toarray().shape
elif axis == 'both':
actual_shape_1 = matrix_input[slice_1:slice_2, slice_1:slice_2].toarray().shape
actual_shape_2 = matrix_input[slice_1:slice_3, slice_1:slice_3].toarray().shape
assert actual_shape_1 == expected_shape
assert actual_shape_1 == actual_shape_2
def test_csr_bool_indexing():
data = csr_matrix([[0, 1, 2], [3, 4, 5], [6, 7, 8]])
list_indices1 = [False, True, False]
array_indices1 = np.array(list_indices1)
list_indices2 = [[False, True, False], [False, True, False], [False, True, False]]
array_indices2 = np.array(list_indices2)
list_indices3 = ([False, True, False], [False, True, False])
array_indices3 = (np.array(list_indices3[0]), np.array(list_indices3[1]))
slice_list1 = data[list_indices1].toarray()
slice_array1 = data[array_indices1].toarray()
slice_list2 = data[list_indices2]
slice_array2 = data[array_indices2]
slice_list3 = data[list_indices3]
slice_array3 = data[array_indices3]
assert (slice_list1 == slice_array1).all()
assert (slice_list2 == slice_array2).all()
assert (slice_list3 == slice_array3).all()
def test_csr_hstack_int64():
"""
Tests if hstack properly promotes to indices and indptr arrays to np.int64
when using np.int32 during concatenation would result in either array
overflowing.
"""
max_int32 = np.iinfo(np.int32).max
# First case: indices would overflow with int32
data = [1.0]
row = [0]
max_indices_1 = max_int32 - 1
max_indices_2 = 3
# Individual indices arrays are representable with int32
col_1 = [max_indices_1 - 1]
col_2 = [max_indices_2 - 1]
X_1 = csr_matrix((data, (row, col_1)))
X_2 = csr_matrix((data, (row, col_2)))
assert max(max_indices_1 - 1, max_indices_2 - 1) < max_int32
assert X_1.indices.dtype == X_1.indptr.dtype == np.int32
assert X_2.indices.dtype == X_2.indptr.dtype == np.int32
# ... but when concatenating their CSR matrices, the resulting indices
# array can't be represented with int32 and must be promoted to int64.
X_hs = hstack([X_1, X_2], format="csr")
assert X_hs.indices.max() == max_indices_1 + max_indices_2 - 1
assert max_indices_1 + max_indices_2 - 1 > max_int32
assert X_hs.indices.dtype == X_hs.indptr.dtype == np.int64
# Even if the matrices are empty, we must account for their size
# contribution so that we may safely set the final elements.
X_1_empty = csr_matrix(X_1.shape)
X_2_empty = csr_matrix(X_2.shape)
X_hs_empty = hstack([X_1_empty, X_2_empty], format="csr")
assert X_hs_empty.shape == X_hs.shape
assert X_hs_empty.indices.dtype == np.int64
# Should be just small enough to stay in int32 after stack. Note that
# we theoretically could support indices.max() == max_int32, but due to an
# edge-case in the underlying sparsetools code
# (namely the `coo_tocsr` routine),
# we require that max(X_hs_32.shape) < max_int32 as well.
# Hence we can only support max_int32 - 1.
col_3 = [max_int32 - max_indices_1 - 1]
X_3 = csr_matrix((data, (row, col_3)))
X_hs_32 = hstack([X_1, X_3], format="csr")
assert X_hs_32.indices.dtype == np.int32
assert X_hs_32.indices.max() == max_int32 - 1
@pytest.mark.parametrize("cls", [csr_matrix, csr_array, csc_matrix, csc_array])
def test_mixed_index_dtype_int_indexing(cls):
# https://github.com/scipy/scipy/issues/20182
rng = np.random.default_rng(0)
base_mtx = cls(sparse.random(50, 50, random_state=rng, density=0.1))
indptr_64bit = base_mtx.copy()
indices_64bit = base_mtx.copy()
indptr_64bit.indptr = base_mtx.indptr.astype(np.int64)
indices_64bit.indices = base_mtx.indices.astype(np.int64)
for mtx in [base_mtx, indptr_64bit, indices_64bit]:
np.testing.assert_array_equal(
mtx[[1,2], :].toarray(),
base_mtx[[1, 2], :].toarray()
)
np.testing.assert_array_equal(
mtx[:, [1, 2]].toarray(),
base_mtx[:, [1, 2]].toarray()
)
def test_broadcast_to():
a = np.array([1, 0, 2])
b = np.array([3])
e = np.zeros((0,))
res_a = csr_array(a)._broadcast_to((2,3))
res_b = csr_array(b)._broadcast_to((4,))
res_c = csr_array(b)._broadcast_to((2,4))
res_d = csr_array(b)._broadcast_to((1,))
res_e = csr_array(e)._broadcast_to((4,0))
assert_array_equal(res_a.toarray(), np.broadcast_to(a, (2,3)))
assert_array_equal(res_b.toarray(), np.broadcast_to(b, (4,)))
assert_array_equal(res_c.toarray(), np.broadcast_to(b, (2,4)))
assert_array_equal(res_d.toarray(), np.broadcast_to(b, (1,)))
assert_array_equal(res_e.toarray(), np.broadcast_to(e, (4,0)))
with pytest.raises(ValueError, match="cannot be broadcast"):
csr_matrix([[1, 2, 0], [3, 0, 1]])._broadcast_to(shape=(2, 1))
with pytest.raises(ValueError, match="cannot be broadcast"):
csr_matrix([[0, 1, 2]])._broadcast_to(shape=(3, 2))
with pytest.raises(ValueError, match="cannot be broadcast"):
csr_array([0, 1, 2])._broadcast_to(shape=(3, 2))