|
''' Classes for read / write of matlab (TM) 4 files |
|
''' |
|
import sys |
|
import warnings |
|
import math |
|
from operator import mul |
|
|
|
import numpy as np |
|
|
|
import scipy.sparse |
|
|
|
from ._miobase import (MatFileReader, docfiller, matdims, read_dtype, |
|
convert_dtypes, arr_to_chars, arr_dtype_number) |
|
|
|
from ._mio_utils import squeeze_element, chars_to_strings |
|
from functools import reduce |
|
|
|
|
|
__all__ = [ |
|
'MatFile4Reader', 'MatFile4Writer', 'SYS_LITTLE_ENDIAN', |
|
'VarHeader4', 'VarReader4', 'VarWriter4', 'arr_to_2d', 'mclass_info', |
|
'mdtypes_template', 'miDOUBLE', 'miINT16', 'miINT32', 'miSINGLE', |
|
'miUINT16', 'miUINT8', 'mxCHAR_CLASS', 'mxFULL_CLASS', 'mxSPARSE_CLASS', |
|
'np_to_mtypes', 'order_codes' |
|
] |
|
|
|
|
|
SYS_LITTLE_ENDIAN = sys.byteorder == 'little' |
|
|
|
miDOUBLE = 0 |
|
miSINGLE = 1 |
|
miINT32 = 2 |
|
miINT16 = 3 |
|
miUINT16 = 4 |
|
miUINT8 = 5 |
|
|
|
mdtypes_template = { |
|
miDOUBLE: 'f8', |
|
miSINGLE: 'f4', |
|
miINT32: 'i4', |
|
miINT16: 'i2', |
|
miUINT16: 'u2', |
|
miUINT8: 'u1', |
|
'header': [('mopt', 'i4'), |
|
('mrows', 'i4'), |
|
('ncols', 'i4'), |
|
('imagf', 'i4'), |
|
('namlen', 'i4')], |
|
'U1': 'U1', |
|
} |
|
|
|
np_to_mtypes = { |
|
'f8': miDOUBLE, |
|
'c32': miDOUBLE, |
|
'c24': miDOUBLE, |
|
'c16': miDOUBLE, |
|
'f4': miSINGLE, |
|
'c8': miSINGLE, |
|
'i4': miINT32, |
|
'i2': miINT16, |
|
'u2': miUINT16, |
|
'u1': miUINT8, |
|
'S1': miUINT8, |
|
} |
|
|
|
|
|
mxFULL_CLASS = 0 |
|
mxCHAR_CLASS = 1 |
|
mxSPARSE_CLASS = 2 |
|
|
|
order_codes = { |
|
0: '<', |
|
1: '>', |
|
2: 'VAX D-float', |
|
3: 'VAX G-float', |
|
4: 'Cray', |
|
} |
|
|
|
mclass_info = { |
|
mxFULL_CLASS: 'double', |
|
mxCHAR_CLASS: 'char', |
|
mxSPARSE_CLASS: 'sparse', |
|
} |
|
|
|
|
|
_MAX_INTP = np.iinfo(np.intp).max |
|
|
|
|
|
class VarHeader4: |
|
|
|
is_logical = False |
|
is_global = False |
|
|
|
def __init__(self, |
|
name, |
|
dtype, |
|
mclass, |
|
dims, |
|
is_complex): |
|
self.name = name |
|
self.dtype = dtype |
|
self.mclass = mclass |
|
self.dims = dims |
|
self.is_complex = is_complex |
|
|
|
|
|
class VarReader4: |
|
''' Class to read matlab 4 variables ''' |
|
|
|
def __init__(self, file_reader): |
|
self.file_reader = file_reader |
|
self.mat_stream = file_reader.mat_stream |
|
self.dtypes = file_reader.dtypes |
|
self.chars_as_strings = file_reader.chars_as_strings |
|
self.squeeze_me = file_reader.squeeze_me |
|
|
|
def read_header(self): |
|
''' Read and return header for variable ''' |
|
data = read_dtype(self.mat_stream, self.dtypes['header']) |
|
name = self.mat_stream.read(int(data['namlen'])).strip(b'\x00') |
|
if data['mopt'] < 0 or data['mopt'] > 5000: |
|
raise ValueError('Mat 4 mopt wrong format, byteswapping problem?') |
|
M, rest = divmod(data['mopt'], 1000) |
|
if M not in (0, 1): |
|
warnings.warn(f"We do not support byte ordering '{order_codes[M]}';" |
|
" returned data may be corrupt", |
|
UserWarning, stacklevel=3) |
|
O, rest = divmod(rest, 100) |
|
if O != 0: |
|
raise ValueError('O in MOPT integer should be 0, wrong format?') |
|
P, rest = divmod(rest, 10) |
|
T = rest |
|
dims = (data['mrows'], data['ncols']) |
|
is_complex = data['imagf'] == 1 |
|
dtype = self.dtypes[P] |
|
return VarHeader4( |
|
name, |
|
dtype, |
|
T, |
|
dims, |
|
is_complex) |
|
|
|
def array_from_header(self, hdr, process=True): |
|
mclass = hdr.mclass |
|
if mclass == mxFULL_CLASS: |
|
arr = self.read_full_array(hdr) |
|
elif mclass == mxCHAR_CLASS: |
|
arr = self.read_char_array(hdr) |
|
if process and self.chars_as_strings: |
|
arr = chars_to_strings(arr) |
|
elif mclass == mxSPARSE_CLASS: |
|
|
|
return self.read_sparse_array(hdr) |
|
else: |
|
raise TypeError(f'No reader for class code {mclass}') |
|
if process and self.squeeze_me: |
|
return squeeze_element(arr) |
|
return arr |
|
|
|
def read_sub_array(self, hdr, copy=True): |
|
''' Mat4 read using header `hdr` dtype and dims |
|
|
|
Parameters |
|
---------- |
|
hdr : object |
|
object with attributes ``dtype``, ``dims``. dtype is assumed to be |
|
the correct endianness |
|
copy : bool, optional |
|
copies array before return if True (default True) |
|
(buffer is usually read only) |
|
|
|
Returns |
|
------- |
|
arr : ndarray |
|
of dtype given by `hdr` ``dtype`` and shape given by `hdr` ``dims`` |
|
''' |
|
dt = hdr.dtype |
|
|
|
num_bytes = reduce(mul, hdr.dims, np.int64(dt.itemsize)) |
|
if num_bytes > _MAX_INTP: |
|
raise ValueError( |
|
f"Variable '{hdr.name.decode('latin1')}' has byte length " |
|
f"longer than largest possible NumPy array on this platform.") |
|
buffer = self.mat_stream.read(num_bytes) |
|
if len(buffer) != num_bytes: |
|
raise ValueError( |
|
f"Not enough bytes to read matrix " |
|
f"'{hdr.name.decode('latin1')}'; is this a badly-formed file? " |
|
f"Consider listing matrices with `whosmat` and loading named " |
|
f"matrices with `variable_names` kwarg to `loadmat`") |
|
arr = np.ndarray(shape=hdr.dims, |
|
dtype=dt, |
|
buffer=buffer, |
|
order='F') |
|
if copy: |
|
arr = arr.copy() |
|
return arr |
|
|
|
def read_full_array(self, hdr): |
|
''' Full (rather than sparse) matrix getter |
|
|
|
Read matrix (array) can be real or complex |
|
|
|
Parameters |
|
---------- |
|
hdr : ``VarHeader4`` instance |
|
|
|
Returns |
|
------- |
|
arr : ndarray |
|
complex array if ``hdr.is_complex`` is True, otherwise a real |
|
numeric array |
|
''' |
|
if hdr.is_complex: |
|
|
|
res = self.read_sub_array(hdr, copy=False) |
|
res_j = self.read_sub_array(hdr, copy=False) |
|
return res + (res_j * 1j) |
|
return self.read_sub_array(hdr) |
|
|
|
def read_char_array(self, hdr): |
|
''' latin-1 text matrix (char matrix) reader |
|
|
|
Parameters |
|
---------- |
|
hdr : ``VarHeader4`` instance |
|
|
|
Returns |
|
------- |
|
arr : ndarray |
|
with dtype 'U1', shape given by `hdr` ``dims`` |
|
''' |
|
arr = self.read_sub_array(hdr).astype(np.uint8) |
|
S = arr.tobytes().decode('latin-1') |
|
return np.ndarray(shape=hdr.dims, |
|
dtype=np.dtype('U1'), |
|
buffer=np.array(S)).copy() |
|
|
|
def read_sparse_array(self, hdr): |
|
''' Read and return sparse matrix type |
|
|
|
Parameters |
|
---------- |
|
hdr : ``VarHeader4`` instance |
|
|
|
Returns |
|
------- |
|
arr : coo_array |
|
with dtype ``float`` and shape read from the sparse array data |
|
|
|
Notes |
|
----- |
|
MATLAB 4 real sparse arrays are saved in a N+1 by 3 array format, where |
|
N is the number of non-zero values. Column 1 values [0:N] are the |
|
(1-based) row indices of the each non-zero value, column 2 [0:N] are the |
|
column indices, column 3 [0:N] are the (real) values. The last values |
|
[-1,0:2] of the rows, column indices are shape[0] and shape[1] |
|
respectively of the output matrix. The last value for the values column |
|
is a padding 0. mrows and ncols values from the header give the shape of |
|
the stored matrix, here [N+1, 3]. Complex data are saved as a 4 column |
|
matrix, where the fourth column contains the imaginary component; the |
|
last value is again 0. Complex sparse data do *not* have the header |
|
``imagf`` field set to True; the fact that the data are complex is only |
|
detectable because there are 4 storage columns. |
|
''' |
|
res = self.read_sub_array(hdr) |
|
tmp = res[:-1,:] |
|
|
|
dims = (int(res[-1,0]), int(res[-1,1])) |
|
I = np.ascontiguousarray(tmp[:,0],dtype='intc') |
|
J = np.ascontiguousarray(tmp[:,1],dtype='intc') |
|
I -= 1 |
|
J -= 1 |
|
if res.shape[1] == 3: |
|
V = np.ascontiguousarray(tmp[:,2],dtype='float') |
|
else: |
|
V = np.ascontiguousarray(tmp[:,2],dtype='complex') |
|
V.imag = tmp[:,3] |
|
return scipy.sparse.coo_array((V,(I,J)), dims) |
|
|
|
def shape_from_header(self, hdr): |
|
'''Read the shape of the array described by the header. |
|
The file position after this call is unspecified. |
|
''' |
|
mclass = hdr.mclass |
|
if mclass == mxFULL_CLASS: |
|
shape = tuple(map(int, hdr.dims)) |
|
elif mclass == mxCHAR_CLASS: |
|
shape = tuple(map(int, hdr.dims)) |
|
if self.chars_as_strings: |
|
shape = shape[:-1] |
|
elif mclass == mxSPARSE_CLASS: |
|
dt = hdr.dtype |
|
dims = hdr.dims |
|
|
|
if not (len(dims) == 2 and dims[0] >= 1 and dims[1] >= 1): |
|
return () |
|
|
|
|
|
self.mat_stream.seek(dt.itemsize * (dims[0] - 1), 1) |
|
rows = np.ndarray(shape=(), dtype=dt, |
|
buffer=self.mat_stream.read(dt.itemsize)) |
|
self.mat_stream.seek(dt.itemsize * (dims[0] - 1), 1) |
|
cols = np.ndarray(shape=(), dtype=dt, |
|
buffer=self.mat_stream.read(dt.itemsize)) |
|
|
|
shape = (int(rows), int(cols)) |
|
else: |
|
raise TypeError(f'No reader for class code {mclass}') |
|
|
|
if self.squeeze_me: |
|
shape = tuple([x for x in shape if x != 1]) |
|
return shape |
|
|
|
|
|
class MatFile4Reader(MatFileReader): |
|
''' Reader for Mat4 files ''' |
|
@docfiller |
|
def __init__(self, mat_stream, *args, **kwargs): |
|
''' Initialize matlab 4 file reader |
|
|
|
%(matstream_arg)s |
|
%(load_args)s |
|
''' |
|
super().__init__(mat_stream, *args, **kwargs) |
|
self._matrix_reader = None |
|
|
|
def guess_byte_order(self): |
|
self.mat_stream.seek(0) |
|
mopt = read_dtype(self.mat_stream, np.dtype('i4')) |
|
self.mat_stream.seek(0) |
|
if mopt == 0: |
|
return '<' |
|
if mopt < 0 or mopt > 5000: |
|
|
|
return SYS_LITTLE_ENDIAN and '>' or '<' |
|
|
|
return SYS_LITTLE_ENDIAN and '<' or '>' |
|
|
|
def initialize_read(self): |
|
''' Run when beginning read of variables |
|
|
|
Sets up readers from parameters in `self` |
|
''' |
|
self.dtypes = convert_dtypes(mdtypes_template, self.byte_order) |
|
self._matrix_reader = VarReader4(self) |
|
|
|
def read_var_header(self): |
|
''' Read and return header, next position |
|
|
|
Parameters |
|
---------- |
|
None |
|
|
|
Returns |
|
------- |
|
header : object |
|
object that can be passed to self.read_var_array, and that |
|
has attributes ``name`` and ``is_global`` |
|
next_position : int |
|
position in stream of next variable |
|
''' |
|
hdr = self._matrix_reader.read_header() |
|
|
|
remaining_bytes = reduce(mul, hdr.dims, np.int64(hdr.dtype.itemsize)) |
|
if hdr.is_complex and not hdr.mclass == mxSPARSE_CLASS: |
|
remaining_bytes *= 2 |
|
next_position = self.mat_stream.tell() + remaining_bytes |
|
return hdr, next_position |
|
|
|
def read_var_array(self, header, process=True): |
|
''' Read array, given `header` |
|
|
|
Parameters |
|
---------- |
|
header : header object |
|
object with fields defining variable header |
|
process : {True, False}, optional |
|
If True, apply recursive post-processing during loading of array. |
|
|
|
Returns |
|
------- |
|
arr : array |
|
array with post-processing applied or not according to |
|
`process`. |
|
''' |
|
return self._matrix_reader.array_from_header(header, process) |
|
|
|
def get_variables(self, variable_names=None): |
|
''' get variables from stream as dictionary |
|
|
|
Parameters |
|
---------- |
|
variable_names : None or str or sequence of str, optional |
|
variable name, or sequence of variable names to get from Mat file / |
|
file stream. If None, then get all variables in file. |
|
''' |
|
if isinstance(variable_names, str): |
|
variable_names = [variable_names] |
|
elif variable_names is not None: |
|
variable_names = list(variable_names) |
|
self.mat_stream.seek(0) |
|
|
|
self.initialize_read() |
|
mdict = {} |
|
while not self.end_of_stream(): |
|
hdr, next_position = self.read_var_header() |
|
name = 'None' if hdr.name is None else hdr.name.decode('latin1') |
|
if variable_names is not None and name not in variable_names: |
|
self.mat_stream.seek(next_position) |
|
continue |
|
mdict[name] = self.read_var_array(hdr) |
|
self.mat_stream.seek(next_position) |
|
if variable_names is not None: |
|
variable_names.remove(name) |
|
if len(variable_names) == 0: |
|
break |
|
return mdict |
|
|
|
def list_variables(self): |
|
''' list variables from stream ''' |
|
self.mat_stream.seek(0) |
|
|
|
self.initialize_read() |
|
vars = [] |
|
while not self.end_of_stream(): |
|
hdr, next_position = self.read_var_header() |
|
name = 'None' if hdr.name is None else hdr.name.decode('latin1') |
|
shape = self._matrix_reader.shape_from_header(hdr) |
|
info = mclass_info.get(hdr.mclass, 'unknown') |
|
vars.append((name, shape, info)) |
|
|
|
self.mat_stream.seek(next_position) |
|
return vars |
|
|
|
|
|
def arr_to_2d(arr, oned_as='row'): |
|
''' Make ``arr`` exactly two dimensional |
|
|
|
If `arr` has more than 2 dimensions, raise a ValueError |
|
|
|
Parameters |
|
---------- |
|
arr : array |
|
oned_as : {'row', 'column'}, optional |
|
Whether to reshape 1-D vectors as row vectors or column vectors. |
|
See documentation for ``matdims`` for more detail |
|
|
|
Returns |
|
------- |
|
arr2d : array |
|
2-D version of the array |
|
''' |
|
dims = matdims(arr, oned_as) |
|
if len(dims) > 2: |
|
raise ValueError('Matlab 4 files cannot save arrays with more than ' |
|
'2 dimensions') |
|
return arr.reshape(dims) |
|
|
|
|
|
class VarWriter4: |
|
def __init__(self, file_writer): |
|
self.file_stream = file_writer.file_stream |
|
self.oned_as = file_writer.oned_as |
|
|
|
def write_bytes(self, arr): |
|
self.file_stream.write(arr.tobytes(order='F')) |
|
|
|
def write_string(self, s): |
|
self.file_stream.write(s) |
|
|
|
def write_header(self, name, shape, P=miDOUBLE, T=mxFULL_CLASS, imagf=0): |
|
''' Write header for given data options |
|
|
|
Parameters |
|
---------- |
|
name : str |
|
name of variable |
|
shape : sequence |
|
Shape of array as it will be read in matlab |
|
P : int, optional |
|
code for mat4 data type, one of ``miDOUBLE, miSINGLE, miINT32, |
|
miINT16, miUINT16, miUINT8`` |
|
T : int, optional |
|
code for mat4 matrix class, one of ``mxFULL_CLASS, mxCHAR_CLASS, |
|
mxSPARSE_CLASS`` |
|
imagf : int, optional |
|
flag indicating complex |
|
''' |
|
header = np.empty((), mdtypes_template['header']) |
|
M = not SYS_LITTLE_ENDIAN |
|
O = 0 |
|
header['mopt'] = (M * 1000 + |
|
O * 100 + |
|
P * 10 + |
|
T) |
|
header['mrows'] = shape[0] |
|
header['ncols'] = shape[1] |
|
header['imagf'] = imagf |
|
header['namlen'] = len(name) + 1 |
|
self.write_bytes(header) |
|
data = name + '\0' |
|
self.write_string(data.encode('latin1')) |
|
|
|
def write(self, arr, name): |
|
''' Write matrix `arr`, with name `name` |
|
|
|
Parameters |
|
---------- |
|
arr : array_like |
|
array to write |
|
name : str |
|
name in matlab workspace |
|
''' |
|
|
|
|
|
if scipy.sparse.issparse(arr): |
|
self.write_sparse(arr, name) |
|
return |
|
arr = np.asarray(arr) |
|
dt = arr.dtype |
|
if not dt.isnative: |
|
arr = arr.astype(dt.newbyteorder('=')) |
|
dtt = dt.type |
|
if dtt is np.object_: |
|
raise TypeError('Cannot save object arrays in Mat4') |
|
elif dtt is np.void: |
|
raise TypeError('Cannot save void type arrays') |
|
elif dtt in (np.str_, np.bytes_): |
|
self.write_char(arr, name) |
|
return |
|
self.write_numeric(arr, name) |
|
|
|
def write_numeric(self, arr, name): |
|
arr = arr_to_2d(arr, self.oned_as) |
|
imagf = arr.dtype.kind == 'c' |
|
try: |
|
P = np_to_mtypes[arr.dtype.str[1:]] |
|
except KeyError: |
|
if imagf: |
|
arr = arr.astype('c128') |
|
else: |
|
arr = arr.astype('f8') |
|
P = miDOUBLE |
|
self.write_header(name, |
|
arr.shape, |
|
P=P, |
|
T=mxFULL_CLASS, |
|
imagf=imagf) |
|
if imagf: |
|
self.write_bytes(arr.real) |
|
self.write_bytes(arr.imag) |
|
else: |
|
self.write_bytes(arr) |
|
|
|
def write_char(self, arr, name): |
|
if arr.dtype.type == np.str_ and arr.dtype.itemsize != np.dtype('U1').itemsize: |
|
arr = arr_to_chars(arr) |
|
arr = arr_to_2d(arr, self.oned_as) |
|
dims = arr.shape |
|
self.write_header( |
|
name, |
|
dims, |
|
P=miUINT8, |
|
T=mxCHAR_CLASS) |
|
if arr.dtype.kind == 'U': |
|
|
|
n_chars = math.prod(dims) |
|
st_arr = np.ndarray(shape=(), |
|
dtype=arr_dtype_number(arr, n_chars), |
|
buffer=arr) |
|
st = st_arr.item().encode('latin-1') |
|
arr = np.ndarray(shape=dims, dtype='S1', buffer=st) |
|
self.write_bytes(arr) |
|
|
|
def write_sparse(self, arr, name): |
|
''' Sparse matrices are 2-D |
|
|
|
See docstring for VarReader4.read_sparse_array |
|
''' |
|
A = arr.tocoo() |
|
imagf = A.dtype.kind == 'c' |
|
ijv = np.zeros((A.nnz + 1, 3+imagf), dtype='f8') |
|
ijv[:-1,0] = A.row |
|
ijv[:-1,1] = A.col |
|
ijv[:-1,0:2] += 1 |
|
if imagf: |
|
ijv[:-1,2] = A.data.real |
|
ijv[:-1,3] = A.data.imag |
|
else: |
|
ijv[:-1,2] = A.data |
|
ijv[-1,0:2] = A.shape |
|
self.write_header( |
|
name, |
|
ijv.shape, |
|
P=miDOUBLE, |
|
T=mxSPARSE_CLASS) |
|
self.write_bytes(ijv) |
|
|
|
|
|
class MatFile4Writer: |
|
''' Class for writing matlab 4 format files ''' |
|
def __init__(self, file_stream, oned_as=None): |
|
self.file_stream = file_stream |
|
if oned_as is None: |
|
oned_as = 'row' |
|
self.oned_as = oned_as |
|
self._matrix_writer = None |
|
|
|
def put_variables(self, mdict, write_header=None): |
|
''' Write variables in `mdict` to stream |
|
|
|
Parameters |
|
---------- |
|
mdict : mapping |
|
mapping with method ``items`` return name, contents pairs |
|
where ``name`` which will appeak in the matlab workspace in |
|
file load, and ``contents`` is something writeable to a |
|
matlab file, such as a NumPy array. |
|
write_header : {None, True, False} |
|
If True, then write the matlab file header before writing the |
|
variables. If None (the default) then write the file header |
|
if we are at position 0 in the stream. By setting False |
|
here, and setting the stream position to the end of the file, |
|
you can append variables to a matlab file |
|
''' |
|
|
|
|
|
|
|
self._matrix_writer = VarWriter4(self) |
|
for name, var in mdict.items(): |
|
self._matrix_writer.write(var, name) |
|
|