|
import numpy as np |
|
import scipy as sp |
|
|
|
__all__ = ['save_npz', 'load_npz'] |
|
|
|
|
|
|
|
PICKLE_KWARGS = dict(allow_pickle=False) |
|
|
|
|
|
def save_npz(file, matrix, compressed=True): |
|
""" Save a sparse matrix or array to a file using ``.npz`` format. |
|
|
|
Parameters |
|
---------- |
|
file : str or file-like object |
|
Either the file name (string) or an open file (file-like object) |
|
where the data will be saved. If file is a string, the ``.npz`` |
|
extension will be appended to the file name if it is not already |
|
there. |
|
matrix: spmatrix or sparray |
|
The sparse matrix or array to save. |
|
Supported formats: ``csc``, ``csr``, ``bsr``, ``dia`` or ``coo``. |
|
compressed : bool, optional |
|
Allow compressing the file. Default: True |
|
|
|
See Also |
|
-------- |
|
scipy.sparse.load_npz: Load a sparse matrix from a file using ``.npz`` format. |
|
numpy.savez: Save several arrays into a ``.npz`` archive. |
|
numpy.savez_compressed : Save several arrays into a compressed ``.npz`` archive. |
|
|
|
Examples |
|
-------- |
|
Store sparse matrix to disk, and load it again: |
|
|
|
>>> import numpy as np |
|
>>> import scipy as sp |
|
>>> sparse_matrix = sp.sparse.csc_matrix([[0, 0, 3], [4, 0, 0]]) |
|
>>> sparse_matrix |
|
<Compressed Sparse Column sparse matrix of dtype 'int64' |
|
with 2 stored elements and shape (2, 3)> |
|
>>> sparse_matrix.toarray() |
|
array([[0, 0, 3], |
|
[4, 0, 0]], dtype=int64) |
|
|
|
>>> sp.sparse.save_npz('/tmp/sparse_matrix.npz', sparse_matrix) |
|
>>> sparse_matrix = sp.sparse.load_npz('/tmp/sparse_matrix.npz') |
|
|
|
>>> sparse_matrix |
|
<Compressed Sparse Column sparse matrix of dtype 'int64' |
|
with 2 stored elements and shape (2, 3)> |
|
>>> sparse_matrix.toarray() |
|
array([[0, 0, 3], |
|
[4, 0, 0]], dtype=int64) |
|
""" |
|
arrays_dict = {} |
|
if matrix.format in ('csc', 'csr', 'bsr'): |
|
arrays_dict.update(indices=matrix.indices, indptr=matrix.indptr) |
|
elif matrix.format == 'dia': |
|
arrays_dict.update(offsets=matrix.offsets) |
|
elif matrix.format == 'coo': |
|
arrays_dict.update(row=matrix.row, col=matrix.col) |
|
else: |
|
msg = f'Save is not implemented for sparse matrix of format {matrix.format}.' |
|
raise NotImplementedError(msg) |
|
arrays_dict.update( |
|
format=matrix.format.encode('ascii'), |
|
shape=matrix.shape, |
|
data=matrix.data |
|
) |
|
if isinstance(matrix, sp.sparse.sparray): |
|
arrays_dict.update(_is_array=True) |
|
if compressed: |
|
np.savez_compressed(file, **arrays_dict) |
|
else: |
|
np.savez(file, **arrays_dict) |
|
|
|
|
|
def load_npz(file): |
|
""" Load a sparse array/matrix from a file using ``.npz`` format. |
|
|
|
Parameters |
|
---------- |
|
file : str or file-like object |
|
Either the file name (string) or an open file (file-like object) |
|
where the data will be loaded. |
|
|
|
Returns |
|
------- |
|
result : csc_array, csr_array, bsr_array, dia_array or coo_array |
|
A sparse array/matrix containing the loaded data. |
|
|
|
Raises |
|
------ |
|
OSError |
|
If the input file does not exist or cannot be read. |
|
|
|
See Also |
|
-------- |
|
scipy.sparse.save_npz: Save a sparse array/matrix to a file using ``.npz`` format. |
|
numpy.load: Load several arrays from a ``.npz`` archive. |
|
|
|
Examples |
|
-------- |
|
Store sparse array/matrix to disk, and load it again: |
|
|
|
>>> import numpy as np |
|
>>> import scipy as sp |
|
>>> sparse_array = sp.sparse.csc_array([[0, 0, 3], [4, 0, 0]]) |
|
>>> sparse_array |
|
<Compressed Sparse Column sparse array of dtype 'int64' |
|
with 2 stored elements and shape (2, 3)> |
|
>>> sparse_array.toarray() |
|
array([[0, 0, 3], |
|
[4, 0, 0]], dtype=int64) |
|
|
|
>>> sp.sparse.save_npz('/tmp/sparse_array.npz', sparse_array) |
|
>>> sparse_array = sp.sparse.load_npz('/tmp/sparse_array.npz') |
|
|
|
>>> sparse_array |
|
<Compressed Sparse Column sparse array of dtype 'int64' |
|
with 2 stored elements and shape (2, 3)> |
|
>>> sparse_array.toarray() |
|
array([[0, 0, 3], |
|
[4, 0, 0]], dtype=int64) |
|
|
|
In this example we force the result to be csr_array from csr_matrix |
|
>>> sparse_matrix = sp.sparse.csc_matrix([[0, 0, 3], [4, 0, 0]]) |
|
>>> sp.sparse.save_npz('/tmp/sparse_matrix.npz', sparse_matrix) |
|
>>> tmp = sp.sparse.load_npz('/tmp/sparse_matrix.npz') |
|
>>> sparse_array = sp.sparse.csr_array(tmp) |
|
""" |
|
with np.load(file, **PICKLE_KWARGS) as loaded: |
|
sparse_format = loaded.get('format') |
|
if sparse_format is None: |
|
raise ValueError(f'The file {file} does not contain ' |
|
f'a sparse array or matrix.') |
|
sparse_format = sparse_format.item() |
|
|
|
if not isinstance(sparse_format, str): |
|
|
|
|
|
sparse_format = sparse_format.decode('ascii') |
|
|
|
if loaded.get('_is_array'): |
|
sparse_type = sparse_format + '_array' |
|
else: |
|
sparse_type = sparse_format + '_matrix' |
|
|
|
try: |
|
cls = getattr(sp.sparse, f'{sparse_type}') |
|
except AttributeError as e: |
|
raise ValueError(f'Unknown format "{sparse_type}"') from e |
|
|
|
if sparse_format in ('csc', 'csr', 'bsr'): |
|
return cls((loaded['data'], loaded['indices'], loaded['indptr']), |
|
shape=loaded['shape']) |
|
elif sparse_format == 'dia': |
|
return cls((loaded['data'], loaded['offsets']), |
|
shape=loaded['shape']) |
|
elif sparse_format == 'coo': |
|
return cls((loaded['data'], (loaded['row'], loaded['col'])), |
|
shape=loaded['shape']) |
|
else: |
|
raise NotImplementedError(f'Load is not implemented for ' |
|
f'sparse matrix of format {sparse_format}.') |
|
|