Sam Chaudry

Upload folder using huggingface_hub

7885a28 verified about 1 month ago

13 kB

	# Authors: Travis Oliphant, Matthew Brett

	"""
	Base classes for MATLAB file stream reading.

	MATLAB is a registered trademark of the Mathworks inc.
	"""

	from typing import Final

	import numpy as np
	from scipy._lib import doccer

	from . import _byteordercodes as boc

	__all__ = [
	'MatReadError', 'MatReadWarning', 'MatWriteError',
	]

	class MatReadError(Exception):
	"""Exception indicating a read issue."""


	class MatWriteError(Exception):
	"""Exception indicating a write issue."""


	class MatReadWarning(UserWarning):
	"""Warning class for read issues."""


	doc_dict = \
	{'file_arg':
	'''file_name : str
	Name of the mat file (do not need .mat extension if
	appendmat==True) Can also pass open file-like object.''',
	'append_arg':
	'''appendmat : bool, optional
	True to append the .mat extension to the end of the given
	filename, if not already present. Default is True.''',
	'load_args':
	'''byte_order : str or None, optional
	None by default, implying byte order guessed from mat
	file. Otherwise can be one of ('native', '=', 'little', '<',
	'BIG', '>').
	mat_dtype : bool, optional
	If True, return arrays in same dtype as would be loaded into
	MATLAB (instead of the dtype with which they are saved).
	squeeze_me : bool, optional
	Whether to squeeze unit matrix dimensions or not.
	chars_as_strings : bool, optional
	Whether to convert char arrays to string arrays.
	matlab_compatible : bool, optional
	Returns matrices as would be loaded by MATLAB (implies
	squeeze_me=False, chars_as_strings=False, mat_dtype=True,
	struct_as_record=True).''',
	'struct_arg':
	'''struct_as_record : bool, optional
	Whether to load MATLAB structs as NumPy record arrays, or as
	old-style NumPy arrays with dtype=object. Setting this flag to
	False replicates the behavior of SciPy version 0.7.x (returning
	numpy object arrays). The default setting is True, because it
	allows easier round-trip load and save of MATLAB files.''',
	'matstream_arg':
	'''mat_stream : file-like
	Object with file API, open for reading.''',
	'long_fields':
	'''long_field_names : bool, optional
	* False - maximum field name length in a structure is 31 characters
	which is the documented maximum length. This is the default.
	* True - maximum field name length in a structure is 63 characters
	which works for MATLAB 7.6''',
	'do_compression':
	'''do_compression : bool, optional
	Whether to compress matrices on write. Default is False.''',
	'oned_as':
	'''oned_as : {'row', 'column'}, optional
	If 'column', write 1-D NumPy arrays as column vectors.
	If 'row', write 1D NumPy arrays as row vectors.''',
	'unicode_strings':
	'''unicode_strings : bool, optional
	If True, write strings as Unicode, else MATLAB usual encoding.'''}

	docfiller: Final = doccer.filldoc(doc_dict)

	'''

	Note on architecture
	======================

	There are three sets of parameters relevant for reading files. The
	first are file read parameters - containing options that are common
	for reading the whole file, and therefore every variable within that
	file. At the moment these are:

	* mat_stream
	* dtypes (derived from byte code)
	* byte_order
	* chars_as_strings
	* squeeze_me
	* struct_as_record (MATLAB 5 files)
	* class_dtypes (derived from order code, MATLAB 5 files)
	* codecs (MATLAB 5 files)
	* uint16_codec (MATLAB 5 files)

	Another set of parameters are those that apply only to the current
	variable being read - the header:

	* header related variables (different for v4 and v5 mat files)
	* is_complex
	* mclass
	* var_stream

	With the header, we need ``next_position`` to tell us where the next
	variable in the stream is.

	Then, for each element in a matrix, there can be *element read
	parameters*. An element is, for example, one element in a MATLAB cell
	array. At the moment, these are:

	* mat_dtype

	The file-reading object contains the file read parameters. The
	header is passed around as a data object, or may be read and discarded
	in a single function. The element read parameters - the mat_dtype in
	this instance, is passed into a general post-processing function - see
	``mio_utils`` for details.
	'''


	def convert_dtypes(dtype_template, order_code):
	''' Convert dtypes in mapping to given order

	Parameters
	----------
	dtype_template : mapping
	mapping with values returning numpy dtype from ``np.dtype(val)``
	order_code : str
	an order code suitable for using in ``dtype.newbyteorder()``

	Returns
	-------
	dtypes : mapping
	mapping where values have been replaced by
	``np.dtype(val).newbyteorder(order_code)``

	'''
	dtypes = dtype_template.copy()
	for k in dtypes:
	dtypes[k] = np.dtype(dtypes[k]).newbyteorder(order_code)
	return dtypes


	def read_dtype(mat_stream, a_dtype):
	"""
	Generic get of byte stream data of known type

	Parameters
	----------
	mat_stream : file_like object
	MATLAB (tm) mat file stream
	a_dtype : dtype
	dtype of array to read. `a_dtype` is assumed to be correct
	endianness.

	Returns
	-------
	arr : ndarray
	Array of dtype `a_dtype` read from stream.

	"""
	num_bytes = a_dtype.itemsize
	arr = np.ndarray(shape=(),
	dtype=a_dtype,
	buffer=mat_stream.read(num_bytes),
	order='F')
	return arr


	def matfile_version(file_name, *, appendmat=True):
	"""
	Return major, minor tuple depending on apparent mat file type

	Where:

	#. 0,x -> version 4 format mat files
	#. 1,x -> version 5 format mat files
	#. 2,x -> version 7.3 format mat files (HDF format)

	Parameters
	----------
	file_name : str
	Name of the mat file (do not need .mat extension if
	appendmat==True). Can also pass open file-like object.
	appendmat : bool, optional
	True to append the .mat extension to the end of the given
	filename, if not already present. Default is True.

	Returns
	-------
	major_version : {0, 1, 2}
	major MATLAB File format version
	minor_version : int
	minor MATLAB file format version

	Raises
	------
	MatReadError
	If the file is empty.
	ValueError
	The matfile version is unknown.

	Notes
	-----
	Has the side effect of setting the file read pointer to 0
	"""
	from ._mio import _open_file_context
	with _open_file_context(file_name, appendmat=appendmat) as fileobj:
	return _get_matfile_version(fileobj)


	get_matfile_version = matfile_version


	_HDR_N_BYTES = 20


	def _get_matfile_version(fileobj):
	# Mat4 files have a zero somewhere in first 4 bytes
	fileobj.seek(0)
	hdr_bytes = fileobj.read(_HDR_N_BYTES)
	if len(hdr_bytes) < _HDR_N_BYTES:
	raise MatReadError("Mat file appears to be truncated")
	if hdr_bytes.count(0) == _HDR_N_BYTES:
	raise MatReadError("Mat file appears to be corrupt "
	f"(first {_HDR_N_BYTES} bytes == 0)")
	mopt_ints = np.ndarray(shape=(4,), dtype=np.uint8, buffer=hdr_bytes[:4])
	if 0 in mopt_ints:
	fileobj.seek(0)
	return (0,0)
	# For 5 format or 7.3 format we need to read an integer in the
	# header. Bytes 124 through 128 contain a version integer and an
	# endian test string
	fileobj.seek(124)
	tst_str = fileobj.read(4)
	fileobj.seek(0)
	maj_ind = int(tst_str[2] == b'I'[0])
	maj_val = int(tst_str[maj_ind])
	min_val = int(tst_str[1 - maj_ind])
	ret = (maj_val, min_val)
	if maj_val in (1, 2):
	return ret
	raise ValueError('Unknown mat file type, version {}, {}'.format(*ret))


	def matdims(arr, oned_as='column'):
	"""
	Determine equivalent MATLAB dimensions for given array

	Parameters
	----------
	arr : ndarray
	Input array
	oned_as : {'column', 'row'}, optional
	Whether 1-D arrays are returned as MATLAB row or column matrices.
	Default is 'column'.

	Returns
	-------
	dims : tuple
	Shape tuple, in the form MATLAB expects it.

	Notes
	-----
	We had to decide what shape a 1 dimensional array would be by
	default. ``np.atleast_2d`` thinks it is a row vector. The
	default for a vector in MATLAB (e.g., ``>> 1:12``) is a row vector.

	Versions of scipy up to and including 0.11 resulted (accidentally)
	in 1-D arrays being read as column vectors. For the moment, we
	maintain the same tradition here.

	Examples
	--------
	>>> import numpy as np
	>>> from scipy.io.matlab._miobase import matdims
	>>> matdims(np.array(1)) # NumPy scalar
	(1, 1)
	>>> matdims(np.array([1])) # 1-D array, 1 element
	(1, 1)
	>>> matdims(np.array([1,2])) # 1-D array, 2 elements
	(2, 1)
	>>> matdims(np.array([[2],[3]])) # 2-D array, column vector
	(2, 1)
	>>> matdims(np.array([[2,3]])) # 2-D array, row vector
	(1, 2)
	>>> matdims(np.array([[[2,3]]])) # 3-D array, rowish vector
	(1, 1, 2)
	>>> matdims(np.array([])) # empty 1-D array
	(0, 0)
	>>> matdims(np.array([[]])) # empty 2-D array
	(0, 0)
	>>> matdims(np.array([[[]]])) # empty 3-D array
	(0, 0, 0)

	Optional argument flips 1-D shape behavior.

	>>> matdims(np.array([1,2]), 'row') # 1-D array, 2 elements
	(1, 2)

	The argument has to make sense though

	>>> matdims(np.array([1,2]), 'bizarre')
	Traceback (most recent call last):
	...
	ValueError: 1-D option "bizarre" is strange

	"""
	shape = arr.shape
	if shape == (): # scalar
	return (1, 1)
	if len(shape) == 1: # 1D
	if shape[0] == 0:
	return (0, 0)
	elif oned_as == 'column':
	return shape + (1,)
	elif oned_as == 'row':
	return (1,) + shape
	else:
	raise ValueError(f'1-D option "{oned_as}" is strange')
	return shape


	class MatVarReader:
	''' Abstract class defining required interface for var readers'''
	def __init__(self, file_reader):
	pass

	def read_header(self):
	''' Returns header '''
	pass

	def array_from_header(self, header):
	''' Reads array given header '''
	pass


	class MatFileReader:
	""" Base object for reading mat files

	To make this class functional, you will need to override the
	following methods:

	matrix_getter_factory - gives object to fetch next matrix from stream
	guess_byte_order - guesses file byte order from file
	"""

	@docfiller
	def __init__(self, mat_stream,
	byte_order=None,
	mat_dtype=False,
	squeeze_me=False,
	chars_as_strings=True,
	matlab_compatible=False,
	struct_as_record=True,
	verify_compressed_data_integrity=True,
	simplify_cells=False):
	'''
	Initializer for mat file reader

	mat_stream : file-like
	object with file API, open for reading
	%(load_args)s
	'''
	# Initialize stream
	self.mat_stream = mat_stream
	self.dtypes = {}
	if not byte_order:
	byte_order = self.guess_byte_order()
	else:
	byte_order = boc.to_numpy_code(byte_order)
	self.byte_order = byte_order
	self.struct_as_record = struct_as_record
	if matlab_compatible:
	self.set_matlab_compatible()
	else:
	self.squeeze_me = squeeze_me
	self.chars_as_strings = chars_as_strings
	self.mat_dtype = mat_dtype
	self.verify_compressed_data_integrity = verify_compressed_data_integrity
	self.simplify_cells = simplify_cells
	if simplify_cells:
	self.squeeze_me = True
	self.struct_as_record = False

	def set_matlab_compatible(self):
	''' Sets options to return arrays as MATLAB loads them '''
	self.mat_dtype = True
	self.squeeze_me = False
	self.chars_as_strings = False

	def guess_byte_order(self):
	''' As we do not know what file type we have, assume native '''
	return boc.native_code

	def end_of_stream(self):
	b = self.mat_stream.read(1)
	curpos = self.mat_stream.tell()
	self.mat_stream.seek(curpos-1)
	return len(b) == 0


	def arr_dtype_number(arr, num):
	''' Return dtype for given number of items per element'''
	return np.dtype(arr.dtype.str[:2] + str(num))


	def arr_to_chars(arr):
	''' Convert string array to char array '''
	dims = list(arr.shape)
	if not dims:
	dims = [1]
	dims.append(int(arr.dtype.str[2:]))
	arr = np.ndarray(shape=dims,
	dtype=arr_dtype_number(arr, 1),
	buffer=arr)
	empties = [arr == np.array('', dtype=arr.dtype)]
	if not np.any(empties):
	return arr
	arr = arr.copy()
	arr[tuple(empties)] = ' '
	return arr