|
"""Sparse DIAgonal format""" |
|
|
|
__docformat__ = "restructuredtext en" |
|
|
|
__all__ = ['dia_array', 'dia_matrix', 'isspmatrix_dia'] |
|
|
|
import numpy as np |
|
|
|
from .._lib._util import copy_if_needed |
|
from ._matrix import spmatrix |
|
from ._base import issparse, _formats, _spbase, sparray |
|
from ._data import _data_matrix |
|
from ._sputils import ( |
|
isshape, upcast_char, getdtype, get_sum_dtype, validateaxis, check_shape |
|
) |
|
from ._sparsetools import dia_matvec |
|
|
|
|
|
class _dia_base(_data_matrix): |
|
_format = 'dia' |
|
|
|
def __init__(self, arg1, shape=None, dtype=None, copy=False, *, maxprint=None): |
|
_data_matrix.__init__(self, arg1, maxprint=maxprint) |
|
|
|
if issparse(arg1): |
|
if arg1.format == "dia": |
|
if copy: |
|
arg1 = arg1.copy() |
|
self.data = arg1.data |
|
self.offsets = arg1.offsets |
|
self._shape = check_shape(arg1.shape) |
|
else: |
|
if arg1.format == self.format and copy: |
|
A = arg1.copy() |
|
else: |
|
A = arg1.todia() |
|
self.data = A.data |
|
self.offsets = A.offsets |
|
self._shape = check_shape(A.shape) |
|
elif isinstance(arg1, tuple): |
|
if isshape(arg1): |
|
|
|
|
|
self._shape = check_shape(arg1) |
|
self.data = np.zeros((0,0), getdtype(dtype, default=float)) |
|
idx_dtype = self._get_index_dtype(maxval=max(self.shape)) |
|
self.offsets = np.zeros((0), dtype=idx_dtype) |
|
else: |
|
try: |
|
|
|
data, offsets = arg1 |
|
except Exception as e: |
|
message = 'unrecognized form for dia_array constructor' |
|
raise ValueError(message) from e |
|
else: |
|
if shape is None: |
|
raise ValueError('expected a shape argument') |
|
if not copy: |
|
copy = copy_if_needed |
|
self.data = np.atleast_2d(np.array(arg1[0], dtype=dtype, copy=copy)) |
|
offsets = np.array(arg1[1], |
|
dtype=self._get_index_dtype(maxval=max(shape)), |
|
copy=copy) |
|
self.offsets = np.atleast_1d(offsets) |
|
self._shape = check_shape(shape) |
|
else: |
|
|
|
try: |
|
arg1 = np.asarray(arg1) |
|
except Exception as e: |
|
raise ValueError("unrecognized form for " |
|
f"{self.format}_matrix constructor") from e |
|
if isinstance(self, sparray) and arg1.ndim != 2: |
|
raise ValueError(f"DIA arrays don't support {arg1.ndim}D input. Use 2D") |
|
A = self._coo_container(arg1, dtype=dtype, shape=shape).todia() |
|
self.data = A.data |
|
self.offsets = A.offsets |
|
self._shape = check_shape(A.shape) |
|
|
|
if dtype is not None: |
|
newdtype = getdtype(dtype) |
|
self.data = self.data.astype(newdtype) |
|
|
|
|
|
if self.offsets.ndim != 1: |
|
raise ValueError('offsets array must have rank 1') |
|
|
|
if self.data.ndim != 2: |
|
raise ValueError('data array must have rank 2') |
|
|
|
if self.data.shape[0] != len(self.offsets): |
|
raise ValueError('number of diagonals (%d) ' |
|
'does not match the number of offsets (%d)' |
|
% (self.data.shape[0], len(self.offsets))) |
|
|
|
if len(np.unique(self.offsets)) != len(self.offsets): |
|
raise ValueError('offset array contains duplicate values') |
|
|
|
def __repr__(self): |
|
_, fmt = _formats[self.format] |
|
sparse_cls = 'array' if isinstance(self, sparray) else 'matrix' |
|
d = self.data.shape[0] |
|
return ( |
|
f"<{fmt} sparse {sparse_cls} of dtype '{self.dtype}'\n" |
|
f"\twith {self.nnz} stored elements ({d} diagonals) and shape {self.shape}>" |
|
) |
|
|
|
def _data_mask(self): |
|
"""Returns a mask of the same shape as self.data, where |
|
mask[i,j] is True when data[i,j] corresponds to a stored element.""" |
|
num_rows, num_cols = self.shape |
|
offset_inds = np.arange(self.data.shape[1]) |
|
row = offset_inds - self.offsets[:,None] |
|
mask = (row >= 0) |
|
mask &= (row < num_rows) |
|
mask &= (offset_inds < num_cols) |
|
return mask |
|
|
|
def count_nonzero(self, axis=None): |
|
if axis is not None: |
|
raise NotImplementedError( |
|
"count_nonzero over an axis is not implemented for DIA format" |
|
) |
|
mask = self._data_mask() |
|
return np.count_nonzero(self.data[mask]) |
|
|
|
count_nonzero.__doc__ = _spbase.count_nonzero.__doc__ |
|
|
|
def _getnnz(self, axis=None): |
|
if axis is not None: |
|
raise NotImplementedError("_getnnz over an axis is not implemented " |
|
"for DIA format") |
|
M,N = self.shape |
|
nnz = 0 |
|
for k in self.offsets: |
|
if k > 0: |
|
nnz += min(M,N-k) |
|
else: |
|
nnz += min(M+k,N) |
|
return int(nnz) |
|
|
|
_getnnz.__doc__ = _spbase._getnnz.__doc__ |
|
|
|
def sum(self, axis=None, dtype=None, out=None): |
|
validateaxis(axis) |
|
|
|
if axis is not None and axis < 0: |
|
axis += 2 |
|
|
|
res_dtype = get_sum_dtype(self.dtype) |
|
num_rows, num_cols = self.shape |
|
ret = None |
|
|
|
if axis == 0: |
|
mask = self._data_mask() |
|
x = (self.data * mask).sum(axis=0) |
|
if x.shape[0] == num_cols: |
|
res = x |
|
else: |
|
res = np.zeros(num_cols, dtype=x.dtype) |
|
res[:x.shape[0]] = x |
|
ret = self._ascontainer(res, dtype=res_dtype) |
|
|
|
else: |
|
row_sums = np.zeros((num_rows, 1), dtype=res_dtype) |
|
one = np.ones(num_cols, dtype=res_dtype) |
|
dia_matvec(num_rows, num_cols, len(self.offsets), |
|
self.data.shape[1], self.offsets, self.data, one, row_sums) |
|
|
|
row_sums = self._ascontainer(row_sums) |
|
|
|
if axis is None: |
|
return row_sums.sum(dtype=dtype, out=out) |
|
|
|
ret = self._ascontainer(row_sums.sum(axis=axis)) |
|
|
|
if out is not None and out.shape != ret.shape: |
|
raise ValueError("dimensions do not match") |
|
|
|
return ret.sum(axis=(), dtype=dtype, out=out) |
|
|
|
sum.__doc__ = _spbase.sum.__doc__ |
|
|
|
def _add_sparse(self, other): |
|
|
|
if not isinstance(other, _dia_base): |
|
return other._add_sparse(self) |
|
|
|
|
|
if np.array_equal(self.offsets, other.offsets): |
|
return self._with_data(self.data + other.data) |
|
|
|
|
|
new_offsets = np.union1d(self.offsets, other.offsets) |
|
self_idx = np.searchsorted(new_offsets, self.offsets) |
|
other_idx = np.searchsorted(new_offsets, other.offsets) |
|
|
|
self_d = self.data.shape[1] |
|
other_d = other.data.shape[1] |
|
|
|
|
|
if self_d == other_d and len(new_offsets) == len(self.offsets): |
|
new_data = self.data[_invert_index(self_idx)] |
|
new_data[other_idx, :] += other.data |
|
elif self_d == other_d and len(new_offsets) == len(other.offsets): |
|
new_data = other.data[_invert_index(other_idx)] |
|
new_data[self_idx, :] += self.data |
|
else: |
|
|
|
d = min(self.shape[0] + new_offsets[-1], self.shape[1]) |
|
|
|
|
|
new_data = np.zeros( |
|
(len(new_offsets), d), |
|
dtype=np.result_type(self.data, other.data), |
|
) |
|
new_data[self_idx, :self_d] += self.data[:, :d] |
|
new_data[other_idx, :other_d] += other.data[:, :d] |
|
return self._dia_container((new_data, new_offsets), shape=self.shape) |
|
|
|
def _mul_scalar(self, other): |
|
return self._with_data(self.data * other) |
|
|
|
def _matmul_vector(self, other): |
|
x = other |
|
|
|
y = np.zeros(self.shape[0], dtype=upcast_char(self.dtype.char, |
|
x.dtype.char)) |
|
|
|
L = self.data.shape[1] |
|
|
|
M,N = self.shape |
|
|
|
dia_matvec(M,N, len(self.offsets), L, self.offsets, self.data, |
|
x.ravel(), y.ravel()) |
|
|
|
return y |
|
|
|
def _setdiag(self, values, k=0): |
|
M, N = self.shape |
|
|
|
if values.ndim == 0: |
|
|
|
values_n = np.inf |
|
else: |
|
values_n = len(values) |
|
|
|
if k < 0: |
|
n = min(M + k, N, values_n) |
|
min_index = 0 |
|
max_index = n |
|
else: |
|
n = min(M, N - k, values_n) |
|
min_index = k |
|
max_index = k + n |
|
|
|
if values.ndim != 0: |
|
|
|
values = values[:n] |
|
|
|
data_rows, data_cols = self.data.shape |
|
if k in self.offsets: |
|
if max_index > data_cols: |
|
data = np.zeros((data_rows, max_index), dtype=self.data.dtype) |
|
data[:, :data_cols] = self.data |
|
self.data = data |
|
self.data[self.offsets == k, min_index:max_index] = values |
|
else: |
|
self.offsets = np.append(self.offsets, self.offsets.dtype.type(k)) |
|
m = max(max_index, data_cols) |
|
data = np.zeros((data_rows + 1, m), dtype=self.data.dtype) |
|
data[:-1, :data_cols] = self.data |
|
data[-1, min_index:max_index] = values |
|
self.data = data |
|
|
|
def todia(self, copy=False): |
|
if copy: |
|
return self.copy() |
|
else: |
|
return self |
|
|
|
todia.__doc__ = _spbase.todia.__doc__ |
|
|
|
def transpose(self, axes=None, copy=False): |
|
if axes is not None and axes != (1, 0): |
|
raise ValueError("Sparse arrays/matrices do not support " |
|
"an 'axes' parameter because swapping " |
|
"dimensions is the only logical permutation.") |
|
|
|
num_rows, num_cols = self.shape |
|
max_dim = max(self.shape) |
|
|
|
|
|
offsets = -self.offsets |
|
|
|
|
|
r = np.arange(len(offsets), dtype=np.intc)[:, None] |
|
c = np.arange(num_rows, dtype=np.intc) - (offsets % max_dim)[:, None] |
|
pad_amount = max(0, max_dim-self.data.shape[1]) |
|
data = np.hstack((self.data, np.zeros((self.data.shape[0], pad_amount), |
|
dtype=self.data.dtype))) |
|
data = data[r, c] |
|
return self._dia_container((data, offsets), shape=( |
|
num_cols, num_rows), copy=copy) |
|
|
|
transpose.__doc__ = _spbase.transpose.__doc__ |
|
|
|
def diagonal(self, k=0): |
|
rows, cols = self.shape |
|
if k <= -rows or k >= cols: |
|
return np.empty(0, dtype=self.data.dtype) |
|
idx, = np.nonzero(self.offsets == k) |
|
first_col = max(0, k) |
|
last_col = min(rows + k, cols) |
|
result_size = last_col - first_col |
|
if idx.size == 0: |
|
return np.zeros(result_size, dtype=self.data.dtype) |
|
result = self.data[idx[0], first_col:last_col] |
|
padding = result_size - len(result) |
|
if padding > 0: |
|
result = np.pad(result, (0, padding), mode='constant') |
|
return result |
|
|
|
diagonal.__doc__ = _spbase.diagonal.__doc__ |
|
|
|
def tocsc(self, copy=False): |
|
if self.nnz == 0: |
|
return self._csc_container(self.shape, dtype=self.dtype) |
|
|
|
num_rows, num_cols = self.shape |
|
num_offsets, offset_len = self.data.shape |
|
offset_inds = np.arange(offset_len) |
|
|
|
row = offset_inds - self.offsets[:,None] |
|
mask = (row >= 0) |
|
mask &= (row < num_rows) |
|
mask &= (offset_inds < num_cols) |
|
mask &= (self.data != 0) |
|
|
|
idx_dtype = self._get_index_dtype(maxval=max(self.shape)) |
|
indptr = np.zeros(num_cols + 1, dtype=idx_dtype) |
|
indptr[1:offset_len+1] = np.cumsum(mask.sum(axis=0)[:num_cols]) |
|
if offset_len < num_cols: |
|
indptr[offset_len+1:] = indptr[offset_len] |
|
indices = row.T[mask.T].astype(idx_dtype, copy=False) |
|
data = self.data.T[mask.T] |
|
return self._csc_container((data, indices, indptr), shape=self.shape, |
|
dtype=self.dtype) |
|
|
|
tocsc.__doc__ = _spbase.tocsc.__doc__ |
|
|
|
def tocoo(self, copy=False): |
|
num_rows, num_cols = self.shape |
|
num_offsets, offset_len = self.data.shape |
|
offset_inds = np.arange(offset_len) |
|
|
|
row = offset_inds - self.offsets[:,None] |
|
mask = (row >= 0) |
|
mask &= (row < num_rows) |
|
mask &= (offset_inds < num_cols) |
|
mask &= (self.data != 0) |
|
row = row[mask] |
|
col = np.tile(offset_inds, num_offsets)[mask.ravel()] |
|
idx_dtype = self._get_index_dtype( |
|
arrays=(self.offsets,), maxval=max(self.shape) |
|
) |
|
row = row.astype(idx_dtype, copy=False) |
|
col = col.astype(idx_dtype, copy=False) |
|
data = self.data[mask] |
|
|
|
|
|
return self._coo_container( |
|
(data, (row, col)), shape=self.shape, dtype=self.dtype, copy=False |
|
) |
|
|
|
tocoo.__doc__ = _spbase.tocoo.__doc__ |
|
|
|
|
|
def _with_data(self, data, copy=True): |
|
"""Returns a matrix with the same sparsity structure as self, |
|
but with different data. By default the structure arrays are copied. |
|
""" |
|
if copy: |
|
return self._dia_container( |
|
(data, self.offsets.copy()), shape=self.shape |
|
) |
|
else: |
|
return self._dia_container( |
|
(data, self.offsets), shape=self.shape |
|
) |
|
|
|
def resize(self, *shape): |
|
shape = check_shape(shape) |
|
M, N = shape |
|
|
|
self.data = self.data[:, :N] |
|
|
|
if (M > self.shape[0] and |
|
np.any(self.offsets + self.shape[0] < self.data.shape[1])): |
|
|
|
mask = (self.offsets[:, None] + self.shape[0] <= |
|
np.arange(self.data.shape[1])) |
|
self.data[mask] = 0 |
|
|
|
self._shape = shape |
|
|
|
resize.__doc__ = _spbase.resize.__doc__ |
|
|
|
|
|
def _invert_index(idx): |
|
"""Helper function to invert an index array.""" |
|
inv = np.zeros_like(idx) |
|
inv[idx] = np.arange(len(idx)) |
|
return inv |
|
|
|
|
|
def isspmatrix_dia(x): |
|
"""Is `x` of dia_matrix type? |
|
|
|
Parameters |
|
---------- |
|
x |
|
object to check for being a dia matrix |
|
|
|
Returns |
|
------- |
|
bool |
|
True if `x` is a dia matrix, False otherwise |
|
|
|
Examples |
|
-------- |
|
>>> from scipy.sparse import dia_array, dia_matrix, coo_matrix, isspmatrix_dia |
|
>>> isspmatrix_dia(dia_matrix([[5]])) |
|
True |
|
>>> isspmatrix_dia(dia_array([[5]])) |
|
False |
|
>>> isspmatrix_dia(coo_matrix([[5]])) |
|
False |
|
""" |
|
return isinstance(x, dia_matrix) |
|
|
|
|
|
|
|
class dia_array(_dia_base, sparray): |
|
""" |
|
Sparse array with DIAgonal storage. |
|
|
|
This can be instantiated in several ways: |
|
dia_array(D) |
|
where D is a 2-D ndarray |
|
|
|
dia_array(S) |
|
with another sparse array or matrix S (equivalent to S.todia()) |
|
|
|
dia_array((M, N), [dtype]) |
|
to construct an empty array with shape (M, N), |
|
dtype is optional, defaulting to dtype='d'. |
|
|
|
dia_array((data, offsets), shape=(M, N)) |
|
where the ``data[k,:]`` stores the diagonal entries for |
|
diagonal ``offsets[k]`` (See example below) |
|
|
|
Attributes |
|
---------- |
|
dtype : dtype |
|
Data type of the array |
|
shape : 2-tuple |
|
Shape of the array |
|
ndim : int |
|
Number of dimensions (this is always 2) |
|
nnz |
|
size |
|
data |
|
DIA format data array of the array |
|
offsets |
|
DIA format offset array of the array |
|
T |
|
|
|
Notes |
|
----- |
|
|
|
Sparse arrays can be used in arithmetic operations: they support |
|
addition, subtraction, multiplication, division, and matrix power. |
|
Sparse arrays with DIAgonal storage do not support slicing. |
|
|
|
Examples |
|
-------- |
|
|
|
>>> import numpy as np |
|
>>> from scipy.sparse import dia_array |
|
>>> dia_array((3, 4), dtype=np.int8).toarray() |
|
array([[0, 0, 0, 0], |
|
[0, 0, 0, 0], |
|
[0, 0, 0, 0]], dtype=int8) |
|
|
|
>>> data = np.array([[1, 2, 3, 4]]).repeat(3, axis=0) |
|
>>> offsets = np.array([0, -1, 2]) |
|
>>> dia_array((data, offsets), shape=(4, 4)).toarray() |
|
array([[1, 0, 3, 0], |
|
[1, 2, 0, 4], |
|
[0, 2, 3, 0], |
|
[0, 0, 3, 4]]) |
|
|
|
>>> from scipy.sparse import dia_array |
|
>>> n = 10 |
|
>>> ex = np.ones(n) |
|
>>> data = np.array([ex, 2 * ex, ex]) |
|
>>> offsets = np.array([-1, 0, 1]) |
|
>>> dia_array((data, offsets), shape=(n, n)).toarray() |
|
array([[2., 1., 0., ..., 0., 0., 0.], |
|
[1., 2., 1., ..., 0., 0., 0.], |
|
[0., 1., 2., ..., 0., 0., 0.], |
|
..., |
|
[0., 0., 0., ..., 2., 1., 0.], |
|
[0., 0., 0., ..., 1., 2., 1.], |
|
[0., 0., 0., ..., 0., 1., 2.]]) |
|
""" |
|
|
|
|
|
class dia_matrix(spmatrix, _dia_base): |
|
""" |
|
Sparse matrix with DIAgonal storage. |
|
|
|
This can be instantiated in several ways: |
|
dia_matrix(D) |
|
where D is a 2-D ndarray |
|
|
|
dia_matrix(S) |
|
with another sparse array or matrix S (equivalent to S.todia()) |
|
|
|
dia_matrix((M, N), [dtype]) |
|
to construct an empty matrix with shape (M, N), |
|
dtype is optional, defaulting to dtype='d'. |
|
|
|
dia_matrix((data, offsets), shape=(M, N)) |
|
where the ``data[k,:]`` stores the diagonal entries for |
|
diagonal ``offsets[k]`` (See example below) |
|
|
|
Attributes |
|
---------- |
|
dtype : dtype |
|
Data type of the matrix |
|
shape : 2-tuple |
|
Shape of the matrix |
|
ndim : int |
|
Number of dimensions (this is always 2) |
|
nnz |
|
size |
|
data |
|
DIA format data array of the matrix |
|
offsets |
|
DIA format offset array of the matrix |
|
T |
|
|
|
Notes |
|
----- |
|
|
|
Sparse matrices can be used in arithmetic operations: they support |
|
addition, subtraction, multiplication, division, and matrix power. |
|
Sparse matrices with DIAgonal storage do not support slicing. |
|
|
|
Examples |
|
-------- |
|
|
|
>>> import numpy as np |
|
>>> from scipy.sparse import dia_matrix |
|
>>> dia_matrix((3, 4), dtype=np.int8).toarray() |
|
array([[0, 0, 0, 0], |
|
[0, 0, 0, 0], |
|
[0, 0, 0, 0]], dtype=int8) |
|
|
|
>>> data = np.array([[1, 2, 3, 4]]).repeat(3, axis=0) |
|
>>> offsets = np.array([0, -1, 2]) |
|
>>> dia_matrix((data, offsets), shape=(4, 4)).toarray() |
|
array([[1, 0, 3, 0], |
|
[1, 2, 0, 4], |
|
[0, 2, 3, 0], |
|
[0, 0, 3, 4]]) |
|
|
|
>>> from scipy.sparse import dia_matrix |
|
>>> n = 10 |
|
>>> ex = np.ones(n) |
|
>>> data = np.array([ex, 2 * ex, ex]) |
|
>>> offsets = np.array([-1, 0, 1]) |
|
>>> dia_matrix((data, offsets), shape=(n, n)).toarray() |
|
array([[2., 1., 0., ..., 0., 0., 0.], |
|
[1., 2., 1., ..., 0., 0., 0.], |
|
[0., 1., 2., ..., 0., 0., 0.], |
|
..., |
|
[0., 0., 0., ..., 2., 1., 0.], |
|
[0., 0., 0., ..., 1., 2., 1.], |
|
[0., 0., 0., ..., 0., 1., 2.]]) |
|
""" |
|
|