Sam Chaudry

Upload folder using huggingface_hub

7885a28 verified about 1 month ago

16.4 kB

	"""Indexing mixin for sparse array/matrix classes.
	"""
	import numpy as np
	from ._sputils import isintlike
	from ._base import sparray, issparse

	INT_TYPES = (int, np.integer)


	def _broadcast_arrays(a, b):
	"""
	Same as np.broadcast_arrays(a, b) but old writeability rules.

	NumPy >= 1.17.0 transitions broadcast_arrays to return
	read-only arrays. Set writeability explicitly to avoid warnings.
	Retain the old writeability rules, as our Cython code assumes
	the old behavior.
	"""
	x, y = np.broadcast_arrays(a, b)
	x.flags.writeable = a.flags.writeable
	y.flags.writeable = b.flags.writeable
	return x, y


	class IndexMixin:
	"""
	This class provides common dispatching and validation logic for indexing.
	"""
	def __getitem__(self, key):
	index, new_shape = self._validate_indices(key)

	# 1D array
	if len(index) == 1:
	idx = index[0]
	if isinstance(idx, np.ndarray):
	if idx.shape == ():
	idx = idx.item()
	if isinstance(idx, INT_TYPES):
	res = self._get_int(idx)
	elif isinstance(idx, slice):
	res = self._get_slice(idx)
	else: # assume array idx
	res = self._get_array(idx)

	# package the result and return
	if not isinstance(self, sparray):
	return res
	# handle np.newaxis in idx when result would otherwise be a scalar
	if res.shape == () and new_shape != ():
	if len(new_shape) == 1:
	return self.__class__([res], shape=new_shape, dtype=self.dtype)
	if len(new_shape) == 2:
	return self.__class__([[res]], shape=new_shape, dtype=self.dtype)
	return res.reshape(new_shape)

	# 2D array
	row, col = index

	# Dispatch to specialized methods.
	if isinstance(row, INT_TYPES):
	if isinstance(col, INT_TYPES):
	res = self._get_intXint(row, col)
	elif isinstance(col, slice):
	res = self._get_intXslice(row, col)
	elif col.ndim == 1:
	res = self._get_intXarray(row, col)
	elif col.ndim == 2:
	res = self._get_intXarray(row, col)
	else:
	raise IndexError('index results in >2 dimensions')
	elif isinstance(row, slice):
	if isinstance(col, INT_TYPES):
	res = self._get_sliceXint(row, col)
	elif isinstance(col, slice):
	if row == slice(None) and row == col:
	res = self.copy()
	else:
	res = self._get_sliceXslice(row, col)
	elif col.ndim == 1:
	res = self._get_sliceXarray(row, col)
	else:
	raise IndexError('index results in >2 dimensions')
	else:
	if isinstance(col, INT_TYPES):
	res = self._get_arrayXint(row, col)
	elif isinstance(col, slice):
	res = self._get_arrayXslice(row, col)
	# arrayXarray preprocess
	elif (row.ndim == 2 and row.shape[1] == 1
	and (col.ndim == 1 or col.shape[0] == 1)):
	# outer indexing
	res = self._get_columnXarray(row[:, 0], col.ravel())
	else:
	# inner indexing
	row, col = _broadcast_arrays(row, col)
	if row.shape != col.shape:
	raise IndexError('number of row and column indices differ')
	if row.size == 0:
	res = self.__class__(np.atleast_2d(row).shape, dtype=self.dtype)
	else:
	res = self._get_arrayXarray(row, col)

	# handle spmatrix (must be 2d, dont let 1d new_shape start reshape)
	if not isinstance(self, sparray):
	if new_shape == () or (len(new_shape) == 1 and res.ndim != 0):
	# res handles cases not inflated by None
	return res
	if len(new_shape) == 1:
	# shape inflated to 1D by None in index. Make 2D
	new_shape = (1,) + new_shape
	# reshape if needed (when None changes shape, e.g. A[1,:,None])
	return res if new_shape == res.shape else res.reshape(new_shape)

	# package the result and return
	if res.shape != new_shape:
	# handle formats that support indexing but not 1D (lil for now)
	if self.format == "lil" and len(new_shape) != 2:
	if res.shape == ():
	return self._coo_container([res], shape = new_shape)
	return res.tocoo().reshape(new_shape)
	return res.reshape(new_shape)
	return res

	def __setitem__(self, key, x):
	index, _ = self._validate_indices(key)

	# 1D array
	if len(index) == 1:
	idx = index[0]

	if issparse(x):
	x = x.toarray()
	else:
	x = np.asarray(x, dtype=self.dtype)

	if isinstance(idx, INT_TYPES):
	if x.size != 1:
	raise ValueError('Trying to assign a sequence to an item')
	self._set_int(idx, x.flat[0])
	return

	if isinstance(idx, slice):
	# check for simple case of slice that gives 1 item
	# Note: Python `range` does not use lots of memory
	idx_range = range(*idx.indices(self.shape[0]))
	N = len(idx_range)
	if N == 1 and x.size == 1:
	self._set_int(idx_range[0], x.flat[0])
	return
	idx = np.arange(*idx.indices(self.shape[0]))
	idx_shape = idx.shape
	else:
	idx_shape = idx.squeeze().shape
	# broadcast scalar to full 1d
	if x.squeeze().shape != idx_shape:
	x = np.broadcast_to(x, idx.shape)
	if x.size != 0:
	self._set_array(idx, x)
	return

	# 2D array
	row, col = index

	if isinstance(row, INT_TYPES) and isinstance(col, INT_TYPES):
	x = np.asarray(x, dtype=self.dtype)
	if x.size != 1:
	raise ValueError('Trying to assign a sequence to an item')
	self._set_intXint(row, col, x.flat[0])
	return

	if isinstance(row, slice):
	row = np.arange(*row.indices(self.shape[0]))[:, None]
	else:
	row = np.atleast_1d(row)

	if isinstance(col, slice):
	col = np.arange(*col.indices(self.shape[1]))[None, :]
	if row.ndim == 1:
	row = row[:, None]
	else:
	col = np.atleast_1d(col)

	i, j = _broadcast_arrays(row, col)
	if i.shape != j.shape:
	raise IndexError('number of row and column indices differ')

	if issparse(x):
	if 0 in x.shape:
	return
	if i.ndim == 1:
	# Inner indexing, so treat them like row vectors.
	i = i[None]
	j = j[None]
	x = x.tocoo(copy=False).reshape(x._shape_as_2d, copy=True)
	broadcast_row = x.shape[0] == 1 and i.shape[0] != 1
	broadcast_col = x.shape[1] == 1 and i.shape[1] != 1
	if not ((broadcast_row or x.shape[0] == i.shape[0]) and
	(broadcast_col or x.shape[1] == i.shape[1])):
	raise ValueError('shape mismatch in assignment')
	x.sum_duplicates()
	self._set_arrayXarray_sparse(i, j, x)
	else:
	# Make x and i into the same shape
	x = np.asarray(x, dtype=self.dtype)
	if x.squeeze().shape != i.squeeze().shape:
	x = np.broadcast_to(x, i.shape)
	if x.size == 0:
	return
	x = x.reshape(i.shape)
	self._set_arrayXarray(i, j, x)

	def _validate_indices(self, key):
	"""Returns two tuples: (index tuple, requested shape tuple)"""
	# single ellipsis
	if key is Ellipsis:
	return (slice(None),) * self.ndim, self.shape

	if not isinstance(key, tuple):
	key = [key]

	ellps_pos = None
	index_1st = []
	prelim_ndim = 0
	for i, idx in enumerate(key):
	if idx is Ellipsis:
	if ellps_pos is not None:
	raise IndexError('an index can only have a single ellipsis')
	ellps_pos = i
	elif idx is None:
	index_1st.append(idx)
	elif isinstance(idx, slice) or isintlike(idx):
	index_1st.append(idx)
	prelim_ndim += 1
	elif (ix := _compatible_boolean_index(idx, self.ndim)) is not None:
	index_1st.append(ix)
	prelim_ndim += ix.ndim
	elif issparse(idx):
	# TODO: make sparse matrix indexing work for sparray
	raise IndexError(
	'Indexing with sparse matrices is not supported '
	'except boolean indexing where matrix and index '
	'are equal shapes.')
	else: # dense array
	index_1st.append(np.asarray(idx))
	prelim_ndim += 1
	ellip_slices = (self.ndim - prelim_ndim) * [slice(None)]
	if ellip_slices:
	if ellps_pos is None:
	index_1st.extend(ellip_slices)
	else:
	index_1st = index_1st[:ellps_pos] + ellip_slices + index_1st[ellps_pos:]

	# second pass (have processed ellipsis and preprocessed arrays)
	idx_shape = []
	index_ndim = 0
	index = []
	array_indices = []
	for i, idx in enumerate(index_1st):
	if idx is None:
	idx_shape.append(1)
	elif isinstance(idx, slice):
	index.append(idx)
	Ms = self._shape[index_ndim]
	len_slice = len(range(*idx.indices(Ms)))
	idx_shape.append(len_slice)
	index_ndim += 1
	elif isintlike(idx):
	N = self._shape[index_ndim]
	if not (-N <= idx < N):
	raise IndexError(f'index ({idx}) out of range')
	idx = int(idx + N if idx < 0 else idx)
	index.append(idx)
	index_ndim += 1
	# bool array (checked in first pass)
	elif idx.dtype.kind == 'b':
	ix = idx
	tmp_ndim = index_ndim + ix.ndim
	mid_shape = self._shape[index_ndim:tmp_ndim]
	if ix.shape != mid_shape:
	raise IndexError(
	f"bool index {i} has shape {mid_shape} instead of {ix.shape}"
	)
	index.extend(ix.nonzero())
	array_indices.extend(range(index_ndim, tmp_ndim))
	index_ndim = tmp_ndim
	else: # dense array
	N = self._shape[index_ndim]
	idx = self._asindices(idx, N)
	index.append(idx)
	array_indices.append(index_ndim)
	index_ndim += 1
	if index_ndim > self.ndim:
	raise IndexError(
	f'invalid index ndim. Array is {self.ndim}D. Index needs {index_ndim}D'
	)
	if len(array_indices) > 1:
	idx_arrays = _broadcast_arrays(*(index[i] for i in array_indices))
	if any(idx_arrays[0].shape != ix.shape for ix in idx_arrays[1:]):
	shapes = " ".join(str(ix.shape) for ix in idx_arrays)
	msg = (f'shape mismatch: indexing arrays could not be broadcast '
	f'together with shapes {shapes}')
	raise IndexError(msg)
	# TODO: handle this for nD (adjacent arrays stay, separated move to start)
	idx_shape = list(idx_arrays[0].shape) + idx_shape
	elif len(array_indices) == 1:
	arr_index = array_indices[0]
	arr_shape = list(index[arr_index].shape)
	idx_shape = idx_shape[:arr_index] + arr_shape + idx_shape[arr_index:]
	if (ndim := len(idx_shape)) > 2:
	raise IndexError(f'Only 1D or 2D arrays allowed. Index makes {ndim}D')
	return tuple(index), tuple(idx_shape)

	def _asindices(self, idx, length):
	"""Convert `idx` to a valid index for an axis with a given length.

	Subclasses that need special validation can override this method.
	"""
	try:
	x = np.asarray(idx)
	except (ValueError, TypeError, MemoryError) as e:
	raise IndexError('invalid index') from e

	if x.ndim not in (1, 2):
	raise IndexError('Index dimension must be 1 or 2')

	if x.size == 0:
	return x

	# Check bounds
	max_indx = x.max()
	if max_indx >= length:
	raise IndexError('index (%d) out of range' % max_indx)

	min_indx = x.min()
	if min_indx < 0:
	if min_indx < -length:
	raise IndexError('index (%d) out of range' % min_indx)
	if x is idx or not x.flags.owndata:
	x = x.copy()
	x[x < 0] += length
	return x

	def _getrow(self, i):
	"""Return a copy of row i of the matrix, as a (1 x n) row vector.
	"""
	M, N = self.shape
	i = int(i)
	if i < -M or i >= M:
	raise IndexError('index (%d) out of range' % i)
	if i < 0:
	i += M
	return self._get_intXslice(i, slice(None))

	def _getcol(self, i):
	"""Return a copy of column i of the matrix, as a (m x 1) column vector.
	"""
	M, N = self.shape
	i = int(i)
	if i < -N or i >= N:
	raise IndexError('index (%d) out of range' % i)
	if i < 0:
	i += N
	return self._get_sliceXint(slice(None), i)

	def _get_int(self, idx):
	raise NotImplementedError()

	def _get_slice(self, idx):
	raise NotImplementedError()

	def _get_array(self, idx):
	raise NotImplementedError()

	def _get_intXint(self, row, col):
	raise NotImplementedError()

	def _get_intXarray(self, row, col):
	raise NotImplementedError()

	def _get_intXslice(self, row, col):
	raise NotImplementedError()

	def _get_sliceXint(self, row, col):
	raise NotImplementedError()

	def _get_sliceXslice(self, row, col):
	raise NotImplementedError()

	def _get_sliceXarray(self, row, col):
	raise NotImplementedError()

	def _get_arrayXint(self, row, col):
	raise NotImplementedError()

	def _get_arrayXslice(self, row, col):
	raise NotImplementedError()

	def _get_columnXarray(self, row, col):
	raise NotImplementedError()

	def _get_arrayXarray(self, row, col):
	raise NotImplementedError()

	def _set_int(self, idx, x):
	raise NotImplementedError()

	def _set_array(self, idx, x):
	raise NotImplementedError()

	def _set_intXint(self, row, col, x):
	raise NotImplementedError()

	def _set_arrayXarray(self, row, col, x):
	raise NotImplementedError()

	def _set_arrayXarray_sparse(self, row, col, x):
	# Fall back to densifying x
	x = np.asarray(x.toarray(), dtype=self.dtype)
	x, _ = _broadcast_arrays(x, row)
	self._set_arrayXarray(row, col, x)


	def _compatible_boolean_index(idx, desired_ndim):
	"""Check for boolean array or array-like. peek before asarray for array-like"""
	# use attribute ndim to indicate a compatible array and check dtype
	# if not, look at 1st element as quick rejection of bool, else slower asanyarray
	if not hasattr(idx, 'ndim'):
	# is first element boolean?
	try:
	ix = next(iter(idx), None)
	for _ in range(desired_ndim):
	if isinstance(ix, bool):
	break
	ix = next(iter(ix), None)
	else:
	return None
	except TypeError:
	return None
	# since first is boolean, construct array and check all elements
	idx = np.asanyarray(idx)

	if idx.dtype.kind == 'b':
	return idx
	return None