Sam Chaudry
Upload folder using huggingface_hub
7885a28 verified
raw
history blame
2.97 kB
# Authors: The scikit-learn developers
# SPDX-License-Identifier: BSD-3-Clause
import numpy as np
from sklearn.utils._array_api import get_namespace
def _attach_unique(y):
"""Attach unique values of y to y and return the result.
The result is a view of y, and the metadata (unique) is not attached to y.
"""
if not isinstance(y, np.ndarray):
return y
try:
# avoid recalculating unique in nested calls.
if "unique" in y.dtype.metadata:
return y
except (AttributeError, TypeError):
pass
unique = np.unique(y)
unique_dtype = np.dtype(y.dtype, metadata={"unique": unique})
return y.view(dtype=unique_dtype)
def attach_unique(*ys, return_tuple=False):
"""Attach unique values of ys to ys and return the results.
The result is a view of y, and the metadata (unique) is not attached to y.
IMPORTANT: The output of this function should NEVER be returned in functions.
This is to avoid this pattern:
.. code:: python
y = np.array([1, 2, 3])
y = attach_unique(y)
y[1] = -1
# now np.unique(y) will be different from cached_unique(y)
Parameters
----------
*ys : sequence of array-like
Input data arrays.
return_tuple : bool, default=False
If True, always return a tuple even if there is only one array.
Returns
-------
ys : tuple of array-like or array-like
Input data with unique values attached.
"""
res = tuple(_attach_unique(y) for y in ys)
if len(res) == 1 and not return_tuple:
return res[0]
return res
def _cached_unique(y, xp=None):
"""Return the unique values of y.
Use the cached values from dtype.metadata if present.
This function does NOT cache the values in y, i.e. it doesn't change y.
Call `attach_unique` to attach the unique values to y.
"""
try:
if y.dtype.metadata is not None and "unique" in y.dtype.metadata:
return y.dtype.metadata["unique"]
except AttributeError:
# in case y is not a numpy array
pass
xp, _ = get_namespace(y, xp=xp)
return xp.unique_values(y)
def cached_unique(*ys, xp=None):
"""Return the unique values of ys.
Use the cached values from dtype.metadata if present.
This function does NOT cache the values in y, i.e. it doesn't change y.
Call `attach_unique` to attach the unique values to y.
Parameters
----------
*ys : sequence of array-like
Input data arrays.
xp : module, default=None
Precomputed array namespace module. When passed, typically from a caller
that has already performed inspection of its own inputs, skips array
namespace inspection.
Returns
-------
res : tuple of array-like or array-like
Unique values of ys.
"""
res = tuple(_cached_unique(y, xp=xp) for y in ys)
if len(res) == 1:
return res[0]
return res