File size: 87,892 Bytes

7885a28

#
# Author: Damian Eads
# Date: April 17, 2008
#
# Copyright (C) 2008 Damian Eads
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# 1. Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above
#    copyright notice, this list of conditions and the following
#    disclaimer in the documentation and/or other materials provided
#    with the distribution.
#
# 3. The name of the author may not be used to endorse or promote
#    products derived from this software without specific prior
#    written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
# GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys
import os.path

from functools import wraps, partial
import weakref

import numpy as np
import warnings
from numpy.linalg import norm
from numpy.testing import (verbose, assert_,
                           assert_array_equal, assert_equal,
                           assert_almost_equal, assert_allclose,
                           break_cycles, IS_PYPY)
import pytest

import scipy.spatial.distance

from scipy.spatial.distance import (
    squareform, pdist, cdist, num_obs_y, num_obs_dm, is_valid_dm, is_valid_y,
    _validate_vector, _METRICS_NAMES)

# these were missing: chebyshev cityblock
# jensenshannon  and seuclidean are referenced by string name.
from scipy.spatial.distance import (braycurtis, canberra, chebyshev, cityblock,
                                    correlation, cosine, dice, euclidean,
                                    hamming, jaccard, jensenshannon,
                                    kulczynski1, mahalanobis,
                                    minkowski, rogerstanimoto,
                                    russellrao, seuclidean, sokalmichener,  # noqa: F401
                                    sokalsneath, sqeuclidean, yule)
from scipy._lib._util import np_long, np_ulong


@pytest.fixture(params=_METRICS_NAMES, scope="session")
def metric(request):
    """
    Fixture for all metrics in scipy.spatial.distance
    """
    return request.param


_filenames = [
              "cdist-X1.txt",
              "cdist-X2.txt",
              "iris.txt",
              "pdist-boolean-inp.txt",
              "pdist-chebyshev-ml-iris.txt",
              "pdist-chebyshev-ml.txt",
              "pdist-cityblock-ml-iris.txt",
              "pdist-cityblock-ml.txt",
              "pdist-correlation-ml-iris.txt",
              "pdist-correlation-ml.txt",
              "pdist-cosine-ml-iris.txt",
              "pdist-cosine-ml.txt",
              "pdist-double-inp.txt",
              "pdist-euclidean-ml-iris.txt",
              "pdist-euclidean-ml.txt",
              "pdist-hamming-ml.txt",
              "pdist-jaccard-ml.txt",
              "pdist-jensenshannon-ml-iris.txt",
              "pdist-jensenshannon-ml.txt",
              "pdist-minkowski-3.2-ml-iris.txt",
              "pdist-minkowski-3.2-ml.txt",
              "pdist-minkowski-5.8-ml-iris.txt",
              "pdist-seuclidean-ml-iris.txt",
              "pdist-seuclidean-ml.txt",
              "pdist-spearman-ml.txt",
              "random-bool-data.txt",
              "random-double-data.txt",
              "random-int-data.txt",
              "random-uint-data.txt",
              ]

_tdist = np.array([[0, 662, 877, 255, 412, 996],
                      [662, 0, 295, 468, 268, 400],
                      [877, 295, 0, 754, 564, 138],
                      [255, 468, 754, 0, 219, 869],
                      [412, 268, 564, 219, 0, 669],
                      [996, 400, 138, 869, 669, 0]], dtype='double')

_ytdist = squareform(_tdist)

# A hashmap of expected output arrays for the tests. These arrays
# come from a list of text files, which are read prior to testing.
# Each test loads inputs and outputs from this dictionary.
eo = {}


def load_testing_files():
    for fn in _filenames:
        name = fn.replace(".txt", "").replace("-ml", "")
        fqfn = os.path.join(os.path.dirname(__file__), 'data', fn)
        fp = open(fqfn)
        eo[name] = np.loadtxt(fp)
        fp.close()
    eo['pdist-boolean-inp'] = np.bool_(eo['pdist-boolean-inp'])
    eo['random-bool-data'] = np.bool_(eo['random-bool-data'])
    eo['random-float32-data'] = np.float32(eo['random-double-data'])
    eo['random-int-data'] = np_long(eo['random-int-data'])
    eo['random-uint-data'] = np_ulong(eo['random-uint-data'])


load_testing_files()


def _is_32bit():
    return np.intp(0).itemsize < 8


def _chk_asarrays(arrays, axis=None):
    arrays = [np.asanyarray(a) for a in arrays]
    if axis is None:
        # np < 1.10 ravel removes subclass from arrays
        arrays = [np.ravel(a) if a.ndim != 1 else a
                  for a in arrays]
        axis = 0
    arrays = tuple(np.atleast_1d(a) for a in arrays)
    if axis < 0:
        if not all(a.ndim == arrays[0].ndim for a in arrays):
            raise ValueError("array ndim must be the same for neg axis")
        axis = range(arrays[0].ndim)[axis]
    return arrays + (axis,)


def _chk_weights(arrays, weights=None, axis=None,
                 force_weights=False, simplify_weights=True,
                 pos_only=False, neg_check=False,
                 nan_screen=False, mask_screen=False,
                 ddof=None):
    chked = _chk_asarrays(arrays, axis=axis)
    arrays, axis = chked[:-1], chked[-1]

    simplify_weights = simplify_weights and not force_weights
    if not force_weights and mask_screen:
        force_weights = any(np.ma.getmask(a) is not np.ma.nomask for a in arrays)

    if nan_screen:
        has_nans = [np.isnan(np.sum(a)) for a in arrays]
        if any(has_nans):
            mask_screen = True
            force_weights = True
            arrays = tuple(np.ma.masked_invalid(a) if has_nan else a
                           for a, has_nan in zip(arrays, has_nans))

    if weights is not None:
        weights = np.asanyarray(weights)
    elif force_weights:
        weights = np.ones(arrays[0].shape[axis])
    else:
        return arrays + (weights, axis)

    if ddof:
        weights = _freq_weights(weights)

    if mask_screen:
        weights = _weight_masked(arrays, weights, axis)

    if not all(weights.shape == (a.shape[axis],) for a in arrays):
        raise ValueError("weights shape must match arrays along axis")
    if neg_check and (weights < 0).any():
        raise ValueError("weights cannot be negative")

    if pos_only:
        pos_weights = np.nonzero(weights > 0)[0]
        if pos_weights.size < weights.size:
            arrays = tuple(np.take(a, pos_weights, axis=axis) for a in arrays)
            weights = weights[pos_weights]
    if simplify_weights and (weights == 1).all():
        weights = None
    return arrays + (weights, axis)


def _freq_weights(weights):
    if weights is None:
        return weights
    int_weights = weights.astype(int)
    if (weights != int_weights).any():
        raise ValueError(f"frequency (integer count-type) weights required {weights}")
    return int_weights


def _weight_masked(arrays, weights, axis):
    if axis is None:
        axis = 0
    weights = np.asanyarray(weights)
    for a in arrays:
        axis_mask = np.ma.getmask(a)
        if axis_mask is np.ma.nomask:
            continue
        if a.ndim > 1:
            not_axes = tuple(i for i in range(a.ndim) if i != axis)
            axis_mask = axis_mask.any(axis=not_axes)
        weights *= 1 - axis_mask.astype(int)
    return weights


def _rand_split(arrays, weights, axis, split_per, seed=None):
    # Coerce `arrays` to float64 if integer, to avoid nan-to-integer issues
    arrays = [arr.astype(np.float64) if np.issubdtype(arr.dtype, np.integer)
              else arr for arr in arrays]

    # inverse operation for stats.collapse_weights
    weights = np.array(weights, dtype=np.float64)  # modified inplace; need a copy
    seeded_rand = np.random.RandomState(seed)

    def mytake(a, ix, axis):
        record = np.asanyarray(np.take(a, ix, axis=axis))
        return record.reshape([a.shape[i] if i != axis else 1
                               for i in range(a.ndim)])

    n_obs = arrays[0].shape[axis]
    assert all(a.shape[axis] == n_obs for a in arrays), \
           "data must be aligned on sample axis"
    for i in range(int(split_per) * n_obs):
        split_ix = seeded_rand.randint(n_obs + i)
        prev_w = weights[split_ix]
        q = seeded_rand.rand()
        weights[split_ix] = q * prev_w
        weights = np.append(weights, (1. - q) * prev_w)
        arrays = [np.append(a, mytake(a, split_ix, axis=axis),
                            axis=axis) for a in arrays]
    return arrays, weights


assert_allclose_forgiving = partial(assert_allclose, atol=1e-5)


def _rough_check(a, b, compare_assert=assert_allclose_forgiving,
                  key=lambda x: x, w=None):
    check_a = key(a)
    check_b = key(b)
    try:
        if np.array(check_a != check_b).any():  # try strict equality for string types
            compare_assert(check_a, check_b)
    except AttributeError:  # masked array
        compare_assert(check_a, check_b)
    except (TypeError, ValueError):  # nested data structure
        for a_i, b_i in zip(check_a, check_b):
            _rough_check(a_i, b_i, compare_assert=compare_assert)

# diff from test_stats:
#  n_args=2, weight_arg='w', default_axis=None
#  ma_safe = False, nan_safe = False
def _weight_checked(fn, n_args=2, default_axis=None, key=lambda x: x, weight_arg='w',
                    squeeze=True, silent=False,
                    ones_test=True, const_test=True, dup_test=True,
                    split_test=True, dud_test=True, ma_safe=False, ma_very_safe=False,
                    nan_safe=False, split_per=1.0, seed=0,
                    compare_assert=assert_allclose_forgiving):
    """runs fn on its arguments 2 or 3 ways, checks that the results are the same,
       then returns the same thing it would have returned before"""
    @wraps(fn)
    def wrapped(*args, **kwargs):
        result = fn(*args, **kwargs)

        arrays = args[:n_args]
        rest = args[n_args:]
        weights = kwargs.get(weight_arg, None)
        axis = kwargs.get('axis', default_axis)

        chked = _chk_weights(arrays, weights=weights, axis=axis,
                             force_weights=True, mask_screen=True)
        arrays, weights, axis = chked[:-2], chked[-2], chked[-1]
        if squeeze:
            arrays = [np.atleast_1d(a.squeeze()) for a in arrays]

        try:
            # WEIGHTS CHECK 1: EQUAL WEIGHTED OBSERVATIONS
            args = tuple(arrays) + rest
            if ones_test:
                kwargs[weight_arg] = weights
                _rough_check(result, fn(*args, **kwargs), key=key)
            if const_test:
                kwargs[weight_arg] = weights * 101.0
                _rough_check(result, fn(*args, **kwargs), key=key)
                kwargs[weight_arg] = weights * 0.101
                try:
                    _rough_check(result, fn(*args, **kwargs), key=key)
                except Exception as e:
                    raise type(e)((e, arrays, weights)) from e

            # WEIGHTS CHECK 2: ADDL 0-WEIGHTED OBS
            if dud_test:
                # add randomly resampled rows, weighted at 0
                dud_arrays, dud_weights = _rand_split(arrays, weights, axis,
                                                      split_per=split_per, seed=seed)
                dud_weights[:weights.size] = weights # not exactly 1 because of masked arrays  # noqa: E501
                dud_weights[weights.size:] = 0
                dud_args = tuple(dud_arrays) + rest
                kwargs[weight_arg] = dud_weights
                _rough_check(result, fn(*dud_args, **kwargs), key=key)
                # increase the value of those 0-weighted rows
                for a in dud_arrays:
                    indexer = [slice(None)] * a.ndim
                    indexer[axis] = slice(weights.size, None)
                    indexer = tuple(indexer)
                    a[indexer] = a[indexer] * 101
                dud_args = tuple(dud_arrays) + rest
                _rough_check(result, fn(*dud_args, **kwargs), key=key)
                # set those 0-weighted rows to NaNs
                for a in dud_arrays:
                    indexer = [slice(None)] * a.ndim
                    indexer[axis] = slice(weights.size, None)
                    indexer = tuple(indexer)
                    a[indexer] = a[indexer] * np.nan
                if kwargs.get("nan_policy", None) == "omit" and nan_safe:
                    dud_args = tuple(dud_arrays) + rest
                    _rough_check(result, fn(*dud_args, **kwargs), key=key)
                # mask out those nan values
                if ma_safe:
                    dud_arrays = [np.ma.masked_invalid(a) for a in dud_arrays]
                    dud_args = tuple(dud_arrays) + rest
                    _rough_check(result, fn(*dud_args, **kwargs), key=key)
                    if ma_very_safe:
                        kwargs[weight_arg] = None
                        _rough_check(result, fn(*dud_args, **kwargs), key=key)
                del dud_arrays, dud_args, dud_weights

            # WEIGHTS CHECK 3: DUPLICATE DATA (DUMB SPLITTING)
            if dup_test:
                dup_arrays = [np.append(a, a, axis=axis) for a in arrays]
                dup_weights = np.append(weights, weights) / 2.0
                dup_args = tuple(dup_arrays) + rest
                kwargs[weight_arg] = dup_weights
                _rough_check(result, fn(*dup_args, **kwargs), key=key)
                del dup_args, dup_arrays, dup_weights

            # WEIGHT CHECK 3: RANDOM SPLITTING
            if split_test and split_per > 0:
                split = _rand_split(arrays, weights, axis,
                                    split_per=split_per, seed=seed)
                split_arrays, split_weights = split
                split_args = tuple(split_arrays) + rest
                kwargs[weight_arg] = split_weights
                _rough_check(result, fn(*split_args, **kwargs), key=key)
        except NotImplementedError as e:
            # when some combination of arguments makes weighting impossible,
            #  this is the desired response
            if not silent:
                warnings.warn(f"{fn.__name__} NotImplemented weights: {e}",
                              stacklevel=3)
        return result
    return wrapped


class DummyContextManager:
    def __enter__(self):
        pass
    def __exit__(self, *args):
        pass


def maybe_deprecated(metric: str):
    if metric in ('kulczynski1', 'sokalmichener'):
        return pytest.deprecated_call()
    else:
        return DummyContextManager()


wcdist = _weight_checked(cdist, default_axis=1, squeeze=False)
wcdist_no_const = _weight_checked(cdist, default_axis=1,
                                  squeeze=False, const_test=False)
wpdist = _weight_checked(pdist, default_axis=1, squeeze=False, n_args=1)
wpdist_no_const = _weight_checked(pdist, default_axis=1, squeeze=False,
                                  const_test=False, n_args=1)
wrogerstanimoto = _weight_checked(rogerstanimoto)
wmatching = whamming = _weight_checked(hamming, dud_test=False)
wyule = _weight_checked(yule)
wdice = _weight_checked(dice)
wcityblock = _weight_checked(cityblock)
wchebyshev = _weight_checked(chebyshev)
wcosine = _weight_checked(cosine)
wcorrelation = _weight_checked(correlation)
wkulczynski1 = _weight_checked(kulczynski1)
wjaccard = _weight_checked(jaccard)
weuclidean = _weight_checked(euclidean, const_test=False)
wsqeuclidean = _weight_checked(sqeuclidean, const_test=False)
wbraycurtis = _weight_checked(braycurtis)
wcanberra = _weight_checked(canberra, const_test=False)
wsokalsneath = _weight_checked(sokalsneath)
wsokalmichener = _weight_checked(sokalmichener)
wrussellrao = _weight_checked(russellrao)


class TestCdist:

    def setup_method(self):
        self.rnd_eo_names = ['random-float32-data', 'random-int-data',
                             'random-uint-data', 'random-double-data',
                             'random-bool-data']
        self.valid_upcasts = {'bool': [np_ulong, np_long, np.float32, np.float64],
                              'uint': [np_long, np.float32, np.float64],
                              'int': [np.float32, np.float64],
                              'float32': [np.float64]}

    @pytest.mark.thread_unsafe
    def test_cdist_extra_args(self, metric):
        # Tests that args and kwargs are correctly handled

        X1 = [[1., 2., 3.], [1.2, 2.3, 3.4], [2.2, 2.3, 4.4]]
        X2 = [[7., 5., 8.], [7.5, 5.8, 8.4], [5.5, 5.8, 4.4]]
        kwargs = {"N0tV4l1D_p4raM": 3.14, "w": np.arange(3)}
        args = [3.14] * 200

        with pytest.raises(TypeError):
            with maybe_deprecated(metric):
                cdist(X1, X2, metric=metric, **kwargs)
        with pytest.raises(TypeError):
            with maybe_deprecated(metric):
                cdist(X1, X2, metric=eval(metric), **kwargs)
        with pytest.raises(TypeError):
            with maybe_deprecated(metric):
                cdist(X1, X2, metric="test_" + metric, **kwargs)
        with pytest.raises(TypeError):
            cdist(X1, X2, metric=metric, *args)
        with pytest.raises(TypeError):
            cdist(X1, X2, metric=eval(metric), *args)
        with pytest.raises(TypeError):
            cdist(X1, X2, metric="test_" + metric, *args)

    def test_cdist_extra_args_custom(self):
        # Tests that args and kwargs are correctly handled
        # also for custom metric
        def _my_metric(x, y, arg, kwarg=1, kwarg2=2):
            return arg + kwarg + kwarg2

        X1 = [[1., 2., 3.], [1.2, 2.3, 3.4], [2.2, 2.3, 4.4]]
        X2 = [[7., 5., 8.], [7.5, 5.8, 8.4], [5.5, 5.8, 4.4]]
        kwargs = {"N0tV4l1D_p4raM": 3.14, "w": np.arange(3)}
        args = [3.14] * 200

        with pytest.raises(TypeError):
            cdist(X1, X2, _my_metric)
        with pytest.raises(TypeError):
            cdist(X1, X2, _my_metric, *args)
        with pytest.raises(TypeError):
            cdist(X1, X2, _my_metric, **kwargs)
        with pytest.raises(TypeError):
            cdist(X1, X2, _my_metric, kwarg=2.2, kwarg2=3.3)
        with pytest.raises(TypeError):
            cdist(X1, X2, _my_metric, 1, 2, kwarg=2.2)
        with pytest.raises(TypeError):
            cdist(X1, X2, _my_metric, 1, 2, kwarg=2.2)
        with pytest.raises(TypeError):
            cdist(X1, X2, _my_metric, 1.1, 2.2, 3.3)
        with pytest.raises(TypeError):
            cdist(X1, X2, _my_metric, 1.1, 2.2)
        with pytest.raises(TypeError):
            cdist(X1, X2, _my_metric, 1.1)
        with pytest.raises(TypeError):
            cdist(X1, X2, _my_metric, 1.1, kwarg=2.2, kwarg2=3.3)

        # this should work
        assert_allclose(cdist(X1, X2, metric=_my_metric,
                              arg=1.1, kwarg2=3.3), 5.4)

    def test_cdist_euclidean_random_unicode(self):
        eps = 1e-15
        X1 = eo['cdist-X1']
        X2 = eo['cdist-X2']
        Y1 = wcdist_no_const(X1, X2, 'euclidean')
        Y2 = wcdist_no_const(X1, X2, 'test_euclidean')
        assert_allclose(Y1, Y2, rtol=eps, verbose=verbose > 2)

    @pytest.mark.parametrize("p", [0.1, 0.25, 1.0, 1.23,
                                   2.0, 3.8, 4.6, np.inf])
    def test_cdist_minkowski_random(self, p):
        eps = 1e-13
        X1 = eo['cdist-X1']
        X2 = eo['cdist-X2']
        Y1 = wcdist_no_const(X1, X2, 'minkowski', p=p)
        Y2 = wcdist_no_const(X1, X2, 'test_minkowski', p=p)
        assert_allclose(Y1, Y2, atol=0, rtol=eps, verbose=verbose > 2)

    def test_cdist_cosine_random(self):
        eps = 1e-14
        X1 = eo['cdist-X1']
        X2 = eo['cdist-X2']
        Y1 = wcdist(X1, X2, 'cosine')

        # Naive implementation
        def norms(X):
            return np.linalg.norm(X, axis=1).reshape(-1, 1)

        Y2 = 1 - np.dot((X1 / norms(X1)), (X2 / norms(X2)).T)

        assert_allclose(Y1, Y2, rtol=eps, verbose=verbose > 2)

    def test_cdist_mahalanobis(self):
        # 1-dimensional observations
        x1 = np.array([[2], [3]])
        x2 = np.array([[2], [5]])
        dist = cdist(x1, x2, metric='mahalanobis')
        assert_allclose(dist, [[0.0, np.sqrt(4.5)], [np.sqrt(0.5), np.sqrt(2)]])

        # 2-dimensional observations
        x1 = np.array([[0, 0], [-1, 0]])
        x2 = np.array([[0, 2], [1, 0], [0, -2]])
        dist = cdist(x1, x2, metric='mahalanobis')
        rt2 = np.sqrt(2)
        assert_allclose(dist, [[rt2, rt2, rt2], [2, 2 * rt2, 2]])

        # Too few observations
        with pytest.raises(ValueError):
            cdist([[0, 1]], [[2, 3]], metric='mahalanobis')

    def test_cdist_custom_notdouble(self):
        class myclass:
            pass

        def _my_metric(x, y):
            if not isinstance(x[0], myclass) or not isinstance(y[0], myclass):
                raise ValueError("Type has been changed")
            return 1.123
        data = np.array([[myclass()]], dtype=object)
        cdist_y = cdist(data, data, metric=_my_metric)
        right_y = 1.123
        assert_equal(cdist_y, right_y, verbose=verbose > 2)

    def _check_calling_conventions(self, X1, X2, metric, eps=1e-07, **kwargs):
        # helper function for test_cdist_calling_conventions
        try:
            y1 = cdist(X1, X2, metric=metric, **kwargs)
            y2 = cdist(X1, X2, metric=eval(metric), **kwargs)
            y3 = cdist(X1, X2, metric="test_" + metric, **kwargs)
        except Exception as e:
            e_cls = e.__class__
            if verbose > 2:
                print(e_cls.__name__)
                print(e)
            with pytest.raises(e_cls):
                cdist(X1, X2, metric=metric, **kwargs)
            with pytest.raises(e_cls):
                cdist(X1, X2, metric=eval(metric), **kwargs)
            with pytest.raises(e_cls):
                cdist(X1, X2, metric="test_" + metric, **kwargs)
        else:
            assert_allclose(y1, y2, rtol=eps, verbose=verbose > 2)
            assert_allclose(y1, y3, rtol=eps, verbose=verbose > 2)

    def test_cdist_calling_conventions(self, metric):
        # Ensures that specifying the metric with a str or scipy function
        # gives the same behaviour (i.e. same result or same exception).
        # NOTE: The correctness should be checked within each metric tests.
        for eo_name in self.rnd_eo_names:
            # subsampling input data to speed-up tests
            # NOTE: num samples needs to be > than dimensions for mahalanobis
            X1 = eo[eo_name][::5, ::-2]
            X2 = eo[eo_name][1::5, ::2]
            if verbose > 2:
                print("testing: ", metric, " with: ", eo_name)
            if metric in {'dice', 'yule',
                          'rogerstanimoto',
                          'russellrao', 'sokalmichener',
                          'sokalsneath',
                          'kulczynski1'} and 'bool' not in eo_name:
                # python version permits non-bools e.g. for fuzzy logic
                continue
            self._check_calling_conventions(X1, X2, metric)

            # Testing built-in metrics with extra args
            if metric == "seuclidean":
                X12 = np.vstack([X1, X2]).astype(np.float64)
                V = np.var(X12, axis=0, ddof=1)
                self._check_calling_conventions(X1, X2, metric, V=V)
            elif metric == "mahalanobis":
                X12 = np.vstack([X1, X2]).astype(np.float64)
                V = np.atleast_2d(np.cov(X12.T))
                VI = np.array(np.linalg.inv(V).T)
                self._check_calling_conventions(X1, X2, metric, VI=VI)

    def test_cdist_dtype_equivalence(self, metric):
        # Tests that the result is not affected by type up-casting
        eps = 1e-07
        tests = [(eo['random-bool-data'], self.valid_upcasts['bool']),
                 (eo['random-uint-data'], self.valid_upcasts['uint']),
                 (eo['random-int-data'], self.valid_upcasts['int']),
                 (eo['random-float32-data'], self.valid_upcasts['float32'])]
        for test in tests:
            X1 = test[0][::5, ::-2]
            X2 = test[0][1::5, ::2]
            try:
                y1 = cdist(X1, X2, metric=metric)
            except Exception as e:
                e_cls = e.__class__
                if verbose > 2:
                    print(e_cls.__name__)
                    print(e)
                for new_type in test[1]:
                    X1new = new_type(X1)
                    X2new = new_type(X2)
                    with pytest.raises(e_cls):
                        cdist(X1new, X2new, metric=metric)
            else:
                for new_type in test[1]:
                    y2 = cdist(new_type(X1), new_type(X2), metric=metric)
                    assert_allclose(y1, y2, rtol=eps, verbose=verbose > 2)

    @pytest.mark.thread_unsafe
    def test_cdist_out(self, metric):
        # Test that out parameter works properly
        eps = 1e-15
        X1 = eo['cdist-X1']
        X2 = eo['cdist-X2']
        out_r, out_c = X1.shape[0], X2.shape[0]

        kwargs = dict()
        if metric == 'minkowski':
            kwargs['p'] = 1.23
        out1 = np.empty((out_r, out_c), dtype=np.float64)
        with maybe_deprecated(metric):
            Y1 = cdist(X1, X2, metric, **kwargs)
        with maybe_deprecated(metric):
            Y2 = cdist(X1, X2, metric, out=out1, **kwargs)

        # test that output is numerically equivalent
        assert_allclose(Y1, Y2, rtol=eps, verbose=verbose > 2)

        # test that Y_test1 and out1 are the same object
        assert_(Y2 is out1)

        # test for incorrect shape
        out2 = np.empty((out_r-1, out_c+1), dtype=np.float64)
        with pytest.raises(ValueError):
            with maybe_deprecated(metric):
                cdist(X1, X2, metric, out=out2, **kwargs)

        # test for C-contiguous order
        out3 = np.empty(
            (2 * out_r, 2 * out_c), dtype=np.float64)[::2, ::2]
        out4 = np.empty((out_r, out_c), dtype=np.float64, order='F')
        with pytest.raises(ValueError):
            with maybe_deprecated(metric):
                cdist(X1, X2, metric, out=out3, **kwargs)
        with pytest.raises(ValueError):
            with maybe_deprecated(metric):
                cdist(X1, X2, metric, out=out4, **kwargs)

        # test for incorrect dtype
        out5 = np.empty((out_r, out_c), dtype=np.int64)
        with pytest.raises(ValueError):
            with maybe_deprecated(metric):
                cdist(X1, X2, metric, out=out5, **kwargs)

    @pytest.mark.thread_unsafe
    def test_striding(self, metric):
        # test that striding is handled correct with calls to
        # _copy_array_if_base_present
        eps = 1e-15
        X1 = eo['cdist-X1'][::2, ::2]
        X2 = eo['cdist-X2'][::2, ::2]
        X1_copy = X1.copy()
        X2_copy = X2.copy()

        # confirm equivalence
        assert_equal(X1, X1_copy)
        assert_equal(X2, X2_copy)
        # confirm contiguity
        assert_(not X1.flags.c_contiguous)
        assert_(not X2.flags.c_contiguous)
        assert_(X1_copy.flags.c_contiguous)
        assert_(X2_copy.flags.c_contiguous)

        kwargs = dict()
        if metric == 'minkowski':
            kwargs['p'] = 1.23
        with maybe_deprecated(metric):
            Y1 = cdist(X1, X2, metric, **kwargs)
        with maybe_deprecated(metric):
            Y2 = cdist(X1_copy, X2_copy, metric, **kwargs)
        # test that output is numerically equivalent
        assert_allclose(Y1, Y2, rtol=eps, verbose=verbose > 2)

    @pytest.mark.thread_unsafe
    def test_cdist_refcount(self, metric):
        x1 = np.random.rand(10, 10)
        x2 = np.random.rand(10, 10)

        kwargs = dict()
        if metric == 'minkowski':
            kwargs['p'] = 1.23

        with maybe_deprecated(metric):
            out = cdist(x1, x2, metric=metric, **kwargs)

        # Check reference counts aren't messed up. If we only hold weak
        # references, the arrays should be deallocated.
        weak_refs = [weakref.ref(v) for v in (x1, x2, out)]
        del x1, x2, out

        if IS_PYPY:
            break_cycles()
        assert all(weak_ref() is None for weak_ref in weak_refs)


class TestPdist:

    def setup_method(self):
        self.rnd_eo_names = ['random-float32-data', 'random-int-data',
                             'random-uint-data', 'random-double-data',
                             'random-bool-data']
        self.valid_upcasts = {'bool': [np_ulong, np_long, np.float32, np.float64],
                              'uint': [np_long, np.float32, np.float64],
                              'int': [np.float32, np.float64],
                              'float32': [np.float64]}

    @pytest.mark.thread_unsafe
    def test_pdist_extra_args(self, metric):
        # Tests that args and kwargs are correctly handled
        X1 = [[1., 2.], [1.2, 2.3], [2.2, 2.3]]
        kwargs = {"N0tV4l1D_p4raM": 3.14, "w": np.arange(2)}
        args = [3.14] * 200

        with pytest.raises(TypeError):
            with maybe_deprecated(metric):
                pdist(X1, metric=metric, **kwargs)
        with pytest.raises(TypeError):
            with maybe_deprecated(metric):
                pdist(X1, metric=eval(metric), **kwargs)
        with pytest.raises(TypeError):
            with maybe_deprecated(metric):
                pdist(X1, metric="test_" + metric, **kwargs)
        with pytest.raises(TypeError):
            pdist(X1, metric=metric, *args)
        with pytest.raises(TypeError):
            pdist(X1, metric=eval(metric), *args)
        with pytest.raises(TypeError):
            pdist(X1, metric="test_" + metric, *args)

    def test_pdist_extra_args_custom(self):
        # Tests that args and kwargs are correctly handled
        # also for custom metric
        def _my_metric(x, y, arg, kwarg=1, kwarg2=2):
            return arg + kwarg + kwarg2

        X1 = [[1., 2.], [1.2, 2.3], [2.2, 2.3]]
        kwargs = {"N0tV4l1D_p4raM": 3.14, "w": np.arange(2)}
        args = [3.14] * 200

        with pytest.raises(TypeError):
            pdist(X1, _my_metric)
        with pytest.raises(TypeError):
            pdist(X1, _my_metric, *args)
        with pytest.raises(TypeError):
            pdist(X1, _my_metric, **kwargs)
        with pytest.raises(TypeError):
            pdist(X1, _my_metric, kwarg=2.2, kwarg2=3.3)
        with pytest.raises(TypeError):
            pdist(X1, _my_metric, 1, 2, kwarg=2.2)
        with pytest.raises(TypeError):
            pdist(X1, _my_metric, 1, 2, kwarg=2.2)
        with pytest.raises(TypeError):
            pdist(X1, _my_metric, 1.1, 2.2, 3.3)
        with pytest.raises(TypeError):
            pdist(X1, _my_metric, 1.1, 2.2)
        with pytest.raises(TypeError):
            pdist(X1, _my_metric, 1.1)
        with pytest.raises(TypeError):
            pdist(X1, _my_metric, 1.1, kwarg=2.2, kwarg2=3.3)

        # these should work
        assert_allclose(pdist(X1, metric=_my_metric,
                              arg=1.1, kwarg2=3.3), 5.4)

    def test_pdist_euclidean_random(self):
        eps = 1e-07
        X = eo['pdist-double-inp']
        Y_right = eo['pdist-euclidean']
        Y_test1 = wpdist_no_const(X, 'euclidean')
        assert_allclose(Y_test1, Y_right, rtol=eps)

    def test_pdist_euclidean_random_u(self):
        eps = 1e-07
        X = eo['pdist-double-inp']
        Y_right = eo['pdist-euclidean']
        Y_test1 = wpdist_no_const(X, 'euclidean')
        assert_allclose(Y_test1, Y_right, rtol=eps)

    def test_pdist_euclidean_random_float32(self):
        eps = 1e-07
        X = np.float32(eo['pdist-double-inp'])
        Y_right = eo['pdist-euclidean']
        Y_test1 = wpdist_no_const(X, 'euclidean')
        assert_allclose(Y_test1, Y_right, rtol=eps)

    def test_pdist_euclidean_random_nonC(self):
        eps = 1e-07
        X = eo['pdist-double-inp']
        Y_right = eo['pdist-euclidean']
        Y_test2 = wpdist_no_const(X, 'test_euclidean')
        assert_allclose(Y_test2, Y_right, rtol=eps)

    @pytest.mark.slow
    def test_pdist_euclidean_iris_double(self):
        eps = 1e-7
        X = eo['iris']
        Y_right = eo['pdist-euclidean-iris']
        Y_test1 = wpdist_no_const(X, 'euclidean')
        assert_allclose(Y_test1, Y_right, rtol=eps)

    @pytest.mark.slow
    def test_pdist_euclidean_iris_float32(self):
        eps = 1e-5
        X = np.float32(eo['iris'])
        Y_right = eo['pdist-euclidean-iris']
        Y_test1 = wpdist_no_const(X, 'euclidean')
        assert_allclose(Y_test1, Y_right, rtol=eps, verbose=verbose > 2)

    @pytest.mark.slow
    def test_pdist_euclidean_iris_nonC(self):
        # Test pdist(X, 'test_euclidean') [the non-C implementation] on the
        # Iris data set.
        eps = 1e-7
        X = eo['iris']
        Y_right = eo['pdist-euclidean-iris']
        Y_test2 = wpdist_no_const(X, 'test_euclidean')
        assert_allclose(Y_test2, Y_right, rtol=eps)

    def test_pdist_seuclidean_random(self):
        eps = 1e-7
        X = eo['pdist-double-inp']
        Y_right = eo['pdist-seuclidean']
        Y_test1 = pdist(X, 'seuclidean')
        assert_allclose(Y_test1, Y_right, rtol=eps)

    def test_pdist_seuclidean_random_float32(self):
        eps = 1e-7
        X = np.float32(eo['pdist-double-inp'])
        Y_right = eo['pdist-seuclidean']
        Y_test1 = pdist(X, 'seuclidean')
        assert_allclose(Y_test1, Y_right, rtol=eps)

        # Check no error is raise when V has float32 dtype (#11171).
        V = np.var(X, axis=0, ddof=1)
        Y_test2 = pdist(X, 'seuclidean', V=V)
        assert_allclose(Y_test2, Y_right, rtol=eps)

    def test_pdist_seuclidean_random_nonC(self):
        # Test pdist(X, 'test_sqeuclidean') [the non-C implementation]
        eps = 1e-07
        X = eo['pdist-double-inp']
        Y_right = eo['pdist-seuclidean']
        Y_test2 = pdist(X, 'test_seuclidean')
        assert_allclose(Y_test2, Y_right, rtol=eps)

    def test_pdist_seuclidean_iris(self):
        eps = 1e-7
        X = eo['iris']
        Y_right = eo['pdist-seuclidean-iris']
        Y_test1 = pdist(X, 'seuclidean')
        assert_allclose(Y_test1, Y_right, rtol=eps)

    def test_pdist_seuclidean_iris_float32(self):
        # Tests pdist(X, 'seuclidean') on the Iris data set (float32).
        eps = 1e-5
        X = np.float32(eo['iris'])
        Y_right = eo['pdist-seuclidean-iris']
        Y_test1 = pdist(X, 'seuclidean')
        assert_allclose(Y_test1, Y_right, rtol=eps)

    def test_pdist_seuclidean_iris_nonC(self):
        # Test pdist(X, 'test_seuclidean') [the non-C implementation] on the
        # Iris data set.
        eps = 1e-7
        X = eo['iris']
        Y_right = eo['pdist-seuclidean-iris']
        Y_test2 = pdist(X, 'test_seuclidean')
        assert_allclose(Y_test2, Y_right, rtol=eps)

    def test_pdist_cosine_random(self):
        eps = 1e-7
        X = eo['pdist-double-inp']
        Y_right = eo['pdist-cosine']
        Y_test1 = wpdist(X, 'cosine')
        assert_allclose(Y_test1, Y_right, rtol=eps)

    def test_pdist_cosine_random_float32(self):
        eps = 1e-7
        X = np.float32(eo['pdist-double-inp'])
        Y_right = eo['pdist-cosine']
        Y_test1 = wpdist(X, 'cosine')
        assert_allclose(Y_test1, Y_right, rtol=eps)

    def test_pdist_cosine_random_nonC(self):
        # Test pdist(X, 'test_cosine') [the non-C implementation]
        eps = 1e-7
        X = eo['pdist-double-inp']
        Y_right = eo['pdist-cosine']
        Y_test2 = wpdist(X, 'test_cosine')
        assert_allclose(Y_test2, Y_right, rtol=eps)

    @pytest.mark.slow
    def test_pdist_cosine_iris(self):
        eps = 1e-05
        X = eo['iris']
        Y_right = eo['pdist-cosine-iris']
        Y_test1 = wpdist(X, 'cosine')
        assert_allclose(Y_test1, Y_right, atol=eps)

    @pytest.mark.slow
    def test_pdist_cosine_iris_float32(self):
        eps = 1e-05
        X = np.float32(eo['iris'])
        Y_right = eo['pdist-cosine-iris']
        Y_test1 = wpdist(X, 'cosine')
        assert_allclose(Y_test1, Y_right, atol=eps, verbose=verbose > 2)

    @pytest.mark.slow
    def test_pdist_cosine_iris_nonC(self):
        eps = 1e-05
        X = eo['iris']
        Y_right = eo['pdist-cosine-iris']
        Y_test2 = wpdist(X, 'test_cosine')
        assert_allclose(Y_test2, Y_right, atol=eps)

    def test_pdist_cosine_bounds(self):
        # Test adapted from @joernhees's example at gh-5208: case where
        # cosine distance used to be negative. XXX: very sensitive to the
        # specific norm computation.
        x = np.abs(np.random.RandomState(1337).rand(91))
        X = np.vstack([x, x])
        assert_(wpdist(X, 'cosine')[0] >= 0,
                msg='cosine distance should be non-negative')

    def test_pdist_cityblock_random(self):
        eps = 1e-7
        X = eo['pdist-double-inp']
        Y_right = eo['pdist-cityblock']
        Y_test1 = wpdist_no_const(X, 'cityblock')
        assert_allclose(Y_test1, Y_right, rtol=eps)

    def test_pdist_cityblock_random_float32(self):
        eps = 1e-7
        X = np.float32(eo['pdist-double-inp'])
        Y_right = eo['pdist-cityblock']
        Y_test1 = wpdist_no_const(X, 'cityblock')
        assert_allclose(Y_test1, Y_right, rtol=eps)

    def test_pdist_cityblock_random_nonC(self):
        eps = 1e-7
        X = eo['pdist-double-inp']
        Y_right = eo['pdist-cityblock']
        Y_test2 = wpdist_no_const(X, 'test_cityblock')
        assert_allclose(Y_test2, Y_right, rtol=eps)

    @pytest.mark.slow
    def test_pdist_cityblock_iris(self):
        eps = 1e-14
        X = eo['iris']
        Y_right = eo['pdist-cityblock-iris']
        Y_test1 = wpdist_no_const(X, 'cityblock')
        assert_allclose(Y_test1, Y_right, rtol=eps)

    @pytest.mark.slow
    def test_pdist_cityblock_iris_float32(self):
        eps = 1e-5
        X = np.float32(eo['iris'])
        Y_right = eo['pdist-cityblock-iris']
        Y_test1 = wpdist_no_const(X, 'cityblock')
        assert_allclose(Y_test1, Y_right, rtol=eps, verbose=verbose > 2)

    @pytest.mark.slow
    def test_pdist_cityblock_iris_nonC(self):
        # Test pdist(X, 'test_cityblock') [the non-C implementation] on the
        # Iris data set.
        eps = 1e-14
        X = eo['iris']
        Y_right = eo['pdist-cityblock-iris']
        Y_test2 = wpdist_no_const(X, 'test_cityblock')
        assert_allclose(Y_test2, Y_right, rtol=eps)

    def test_pdist_correlation_random(self):
        eps = 1e-7
        X = eo['pdist-double-inp']
        Y_right = eo['pdist-correlation']
        Y_test1 = wpdist(X, 'correlation')
        assert_allclose(Y_test1, Y_right, rtol=eps)

    def test_pdist_correlation_random_float32(self):
        eps = 1e-7
        X = np.float32(eo['pdist-double-inp'])
        Y_right = eo['pdist-correlation']
        Y_test1 = wpdist(X, 'correlation')
        assert_allclose(Y_test1, Y_right, rtol=eps)

    def test_pdist_correlation_random_nonC(self):
        eps = 1e-7
        X = eo['pdist-double-inp']
        Y_right = eo['pdist-correlation']
        Y_test2 = wpdist(X, 'test_correlation')
        assert_allclose(Y_test2, Y_right, rtol=eps)

    @pytest.mark.slow
    def test_pdist_correlation_iris(self):
        eps = 1e-7
        X = eo['iris']
        Y_right = eo['pdist-correlation-iris']
        Y_test1 = wpdist(X, 'correlation')
        assert_allclose(Y_test1, Y_right, rtol=eps)

    @pytest.mark.slow
    def test_pdist_correlation_iris_float32(self):
        eps = 1e-7
        X = eo['iris']
        Y_right = np.float32(eo['pdist-correlation-iris'])
        Y_test1 = wpdist(X, 'correlation')
        assert_allclose(Y_test1, Y_right, rtol=eps, verbose=verbose > 2)

    @pytest.mark.slow
    def test_pdist_correlation_iris_nonC(self):
        if sys.maxsize > 2**32:
            eps = 1e-7
        else:
            pytest.skip("see gh-16456")
        X = eo['iris']
        Y_right = eo['pdist-correlation-iris']
        Y_test2 = wpdist(X, 'test_correlation')
        assert_allclose(Y_test2, Y_right, rtol=eps)

    @pytest.mark.parametrize("p", [0.1, 0.25, 1.0, 2.0, 3.2, np.inf])
    def test_pdist_minkowski_random_p(self, p):
        eps = 1e-13
        X = eo['pdist-double-inp']
        Y1 = wpdist_no_const(X, 'minkowski', p=p)
        Y2 = wpdist_no_const(X, 'test_minkowski', p=p)
        assert_allclose(Y1, Y2, atol=0, rtol=eps)

    def test_pdist_minkowski_random(self):
        eps = 1e-7
        X = eo['pdist-double-inp']
        Y_right = eo['pdist-minkowski-3.2']
        Y_test1 = wpdist_no_const(X, 'minkowski', p=3.2)
        assert_allclose(Y_test1, Y_right, rtol=eps)

    def test_pdist_minkowski_random_float32(self):
        eps = 1e-7
        X = np.float32(eo['pdist-double-inp'])
        Y_right = eo['pdist-minkowski-3.2']
        Y_test1 = wpdist_no_const(X, 'minkowski', p=3.2)
        assert_allclose(Y_test1, Y_right, rtol=eps)

    def test_pdist_minkowski_random_nonC(self):
        eps = 1e-7
        X = eo['pdist-double-inp']
        Y_right = eo['pdist-minkowski-3.2']
        Y_test2 = wpdist_no_const(X, 'test_minkowski', p=3.2)
        assert_allclose(Y_test2, Y_right, rtol=eps)

    @pytest.mark.slow
    def test_pdist_minkowski_3_2_iris(self):
        eps = 1e-7
        X = eo['iris']
        Y_right = eo['pdist-minkowski-3.2-iris']
        Y_test1 = wpdist_no_const(X, 'minkowski', p=3.2)
        assert_allclose(Y_test1, Y_right, rtol=eps)

    @pytest.mark.slow
    def test_pdist_minkowski_3_2_iris_float32(self):
        eps = 1e-5
        X = np.float32(eo['iris'])
        Y_right = eo['pdist-minkowski-3.2-iris']
        Y_test1 = wpdist_no_const(X, 'minkowski', p=3.2)
        assert_allclose(Y_test1, Y_right, rtol=eps)

    @pytest.mark.slow
    def test_pdist_minkowski_3_2_iris_nonC(self):
        eps = 1e-7
        X = eo['iris']
        Y_right = eo['pdist-minkowski-3.2-iris']
        Y_test2 = wpdist_no_const(X, 'test_minkowski', p=3.2)
        assert_allclose(Y_test2, Y_right, rtol=eps)

    @pytest.mark.slow
    def test_pdist_minkowski_5_8_iris(self):
        eps = 1e-7
        X = eo['iris']
        Y_right = eo['pdist-minkowski-5.8-iris']
        Y_test1 = wpdist_no_const(X, 'minkowski', p=5.8)
        assert_allclose(Y_test1, Y_right, rtol=eps)

    @pytest.mark.slow
    def test_pdist_minkowski_5_8_iris_float32(self):
        eps = 1e-5
        X = np.float32(eo['iris'])
        Y_right = eo['pdist-minkowski-5.8-iris']
        Y_test1 = wpdist_no_const(X, 'minkowski', p=5.8)
        assert_allclose(Y_test1, Y_right, rtol=eps, verbose=verbose > 2)

    @pytest.mark.slow
    def test_pdist_minkowski_5_8_iris_nonC(self):
        eps = 1e-7
        X = eo['iris']
        Y_right = eo['pdist-minkowski-5.8-iris']
        Y_test2 = wpdist_no_const(X, 'test_minkowski', p=5.8)
        assert_allclose(Y_test2, Y_right, rtol=eps)

    def test_pdist_mahalanobis(self):
        # 1-dimensional observations
        x = np.array([2.0, 2.0, 3.0, 5.0]).reshape(-1, 1)
        dist = pdist(x, metric='mahalanobis')
        assert_allclose(dist, [0.0, np.sqrt(0.5), np.sqrt(4.5),
                               np.sqrt(0.5), np.sqrt(4.5), np.sqrt(2.0)])

        # 2-dimensional observations
        x = np.array([[0, 0], [-1, 0], [0, 2], [1, 0], [0, -2]])
        dist = pdist(x, metric='mahalanobis')
        rt2 = np.sqrt(2)
        assert_allclose(dist, [rt2, rt2, rt2, rt2, 2, 2 * rt2, 2, 2, 2 * rt2, 2])

        # Too few observations
        with pytest.raises(ValueError):
            wpdist([[0, 1], [2, 3]], metric='mahalanobis')

    def test_pdist_hamming_random(self):
        eps = 1e-15
        X = eo['pdist-boolean-inp']
        Y_right = eo['pdist-hamming']
        Y_test1 = wpdist(X, 'hamming')
        assert_allclose(Y_test1, Y_right, rtol=eps)

    def test_pdist_hamming_random_float32(self):
        eps = 1e-15
        X = np.float32(eo['pdist-boolean-inp'])
        Y_right = eo['pdist-hamming']
        Y_test1 = wpdist(X, 'hamming')
        assert_allclose(Y_test1, Y_right, rtol=eps)

    def test_pdist_hamming_random_nonC(self):
        eps = 1e-15
        X = eo['pdist-boolean-inp']
        Y_right = eo['pdist-hamming']
        Y_test2 = wpdist(X, 'test_hamming')
        assert_allclose(Y_test2, Y_right, rtol=eps)

    def test_pdist_dhamming_random(self):
        eps = 1e-15
        X = np.float64(eo['pdist-boolean-inp'])
        Y_right = eo['pdist-hamming']
        Y_test1 = wpdist(X, 'hamming')
        assert_allclose(Y_test1, Y_right, rtol=eps)

    def test_pdist_dhamming_random_float32(self):
        eps = 1e-15
        X = np.float32(eo['pdist-boolean-inp'])
        Y_right = eo['pdist-hamming']
        Y_test1 = wpdist(X, 'hamming')
        assert_allclose(Y_test1, Y_right, rtol=eps)

    def test_pdist_dhamming_random_nonC(self):
        eps = 1e-15
        X = np.float64(eo['pdist-boolean-inp'])
        Y_right = eo['pdist-hamming']
        Y_test2 = wpdist(X, 'test_hamming')
        assert_allclose(Y_test2, Y_right, rtol=eps)

    def test_pdist_jensenshannon_random(self):
        eps = 1e-11
        X = eo['pdist-double-inp']
        Y_right = eo['pdist-jensenshannon']
        Y_test1 = pdist(X, 'jensenshannon')
        assert_allclose(Y_test1, Y_right, rtol=eps)

    def test_pdist_jensenshannon_random_float32(self):
        eps = 1e-8
        X = np.float32(eo['pdist-double-inp'])
        Y_right = eo['pdist-jensenshannon']
        Y_test1 = pdist(X, 'jensenshannon')
        assert_allclose(Y_test1, Y_right, rtol=eps, verbose=verbose > 2)

    def test_pdist_jensenshannon_random_nonC(self):
        eps = 1e-11
        X = eo['pdist-double-inp']
        Y_right = eo['pdist-jensenshannon']
        Y_test2 = pdist(X, 'test_jensenshannon')
        assert_allclose(Y_test2, Y_right, rtol=eps)

    def test_pdist_jensenshannon_iris(self):
        if _is_32bit():
            # Test failing on 32-bit Linux on Azure otherwise, see gh-12810
            eps = 2.5e-10
        else:
            eps = 1e-12

        X = eo['iris']
        Y_right = eo['pdist-jensenshannon-iris']
        Y_test1 = pdist(X, 'jensenshannon')
        assert_allclose(Y_test1, Y_right, atol=eps)

    def test_pdist_jensenshannon_iris_float32(self):
        eps = 1e-06
        X = np.float32(eo['iris'])
        Y_right = eo['pdist-jensenshannon-iris']
        Y_test1 = pdist(X, 'jensenshannon')
        assert_allclose(Y_test1, Y_right, atol=eps, verbose=verbose > 2)

    def test_pdist_jensenshannon_iris_nonC(self):
        eps = 5e-5
        X = eo['iris']
        Y_right = eo['pdist-jensenshannon-iris']
        Y_test2 = pdist(X, 'test_jensenshannon')
        assert_allclose(Y_test2, Y_right, rtol=eps)

    def test_pdist_matching_mtica1(self):
        # Test matching(*,*) with mtica example #1 (nums).
        m = wmatching(np.array([1, 0, 1, 1, 0]),
                      np.array([1, 1, 0, 1, 1]))
        m2 = wmatching(np.array([1, 0, 1, 1, 0], dtype=bool),
                       np.array([1, 1, 0, 1, 1], dtype=bool))
        assert_allclose(m, 0.6, rtol=0, atol=1e-10)
        assert_allclose(m2, 0.6, rtol=0, atol=1e-10)

    def test_pdist_matching_mtica2(self):
        # Test matching(*,*) with mtica example #2.
        m = wmatching(np.array([1, 0, 1]),
                     np.array([1, 1, 0]))
        m2 = wmatching(np.array([1, 0, 1], dtype=bool),
                      np.array([1, 1, 0], dtype=bool))
        assert_allclose(m, 2 / 3, rtol=0, atol=1e-10)
        assert_allclose(m2, 2 / 3, rtol=0, atol=1e-10)

    def test_pdist_yule_mtica1(self):
        m = wyule(np.array([1, 0, 1, 1, 0]),
                  np.array([1, 1, 0, 1, 1]))
        m2 = wyule(np.array([1, 0, 1, 1, 0], dtype=bool),
                   np.array([1, 1, 0, 1, 1], dtype=bool))
        if verbose > 2:
            print(m)
        assert_allclose(m, 2, rtol=0, atol=1e-10)
        assert_allclose(m2, 2, rtol=0, atol=1e-10)

    def test_pdist_yule_mtica2(self):
        m = wyule(np.array([1, 0, 1]),
                  np.array([1, 1, 0]))
        m2 = wyule(np.array([1, 0, 1], dtype=bool),
                   np.array([1, 1, 0], dtype=bool))
        if verbose > 2:
            print(m)
        assert_allclose(m, 2, rtol=0, atol=1e-10)
        assert_allclose(m2, 2, rtol=0, atol=1e-10)

    def test_pdist_dice_mtica1(self):
        m = wdice(np.array([1, 0, 1, 1, 0]),
                  np.array([1, 1, 0, 1, 1]))
        m2 = wdice(np.array([1, 0, 1, 1, 0], dtype=bool),
                   np.array([1, 1, 0, 1, 1], dtype=bool))
        if verbose > 2:
            print(m)
        assert_allclose(m, 3 / 7, rtol=0, atol=1e-10)
        assert_allclose(m2, 3 / 7, rtol=0, atol=1e-10)

    def test_pdist_dice_mtica2(self):
        m = wdice(np.array([1, 0, 1]),
                  np.array([1, 1, 0]))
        m2 = wdice(np.array([1, 0, 1], dtype=bool),
                   np.array([1, 1, 0], dtype=bool))
        if verbose > 2:
            print(m)
        assert_allclose(m, 0.5, rtol=0, atol=1e-10)
        assert_allclose(m2, 0.5, rtol=0, atol=1e-10)

    def test_pdist_sokalsneath_mtica1(self):
        m = sokalsneath(np.array([1, 0, 1, 1, 0]),
                        np.array([1, 1, 0, 1, 1]))
        m2 = sokalsneath(np.array([1, 0, 1, 1, 0], dtype=bool),
                         np.array([1, 1, 0, 1, 1], dtype=bool))
        if verbose > 2:
            print(m)
        assert_allclose(m, 3 / 4, rtol=0, atol=1e-10)
        assert_allclose(m2, 3 / 4, rtol=0, atol=1e-10)

    def test_pdist_sokalsneath_mtica2(self):
        m = wsokalsneath(np.array([1, 0, 1]),
                         np.array([1, 1, 0]))
        m2 = wsokalsneath(np.array([1, 0, 1], dtype=bool),
                          np.array([1, 1, 0], dtype=bool))
        if verbose > 2:
            print(m)
        assert_allclose(m, 4 / 5, rtol=0, atol=1e-10)
        assert_allclose(m2, 4 / 5, rtol=0, atol=1e-10)

    def test_pdist_rogerstanimoto_mtica1(self):
        m = wrogerstanimoto(np.array([1, 0, 1, 1, 0]),
                            np.array([1, 1, 0, 1, 1]))
        m2 = wrogerstanimoto(np.array([1, 0, 1, 1, 0], dtype=bool),
                             np.array([1, 1, 0, 1, 1], dtype=bool))
        if verbose > 2:
            print(m)
        assert_allclose(m, 3 / 4, rtol=0, atol=1e-10)
        assert_allclose(m2, 3 / 4, rtol=0, atol=1e-10)

    def test_pdist_rogerstanimoto_mtica2(self):
        m = wrogerstanimoto(np.array([1, 0, 1]),
                            np.array([1, 1, 0]))
        m2 = wrogerstanimoto(np.array([1, 0, 1], dtype=bool),
                             np.array([1, 1, 0], dtype=bool))
        if verbose > 2:
            print(m)
        assert_allclose(m, 4 / 5, rtol=0, atol=1e-10)
        assert_allclose(m2, 4 / 5, rtol=0, atol=1e-10)

    def test_pdist_russellrao_mtica1(self):
        m = wrussellrao(np.array([1, 0, 1, 1, 0]),
                        np.array([1, 1, 0, 1, 1]))
        m2 = wrussellrao(np.array([1, 0, 1, 1, 0], dtype=bool),
                         np.array([1, 1, 0, 1, 1], dtype=bool))
        if verbose > 2:
            print(m)
        assert_allclose(m, 3 / 5, rtol=0, atol=1e-10)
        assert_allclose(m2, 3 / 5, rtol=0, atol=1e-10)

    def test_pdist_russellrao_mtica2(self):
        m = wrussellrao(np.array([1, 0, 1]),
                        np.array([1, 1, 0]))
        m2 = wrussellrao(np.array([1, 0, 1], dtype=bool),
                         np.array([1, 1, 0], dtype=bool))
        if verbose > 2:
            print(m)
        assert_allclose(m, 2 / 3, rtol=0, atol=1e-10)
        assert_allclose(m2, 2 / 3, rtol=0, atol=1e-10)

    @pytest.mark.slow
    def test_pdist_canberra_match(self):
        D = eo['iris']
        if verbose > 2:
            print(D.shape, D.dtype)
        eps = 1e-15
        y1 = wpdist_no_const(D, "canberra")
        y2 = wpdist_no_const(D, "test_canberra")
        assert_allclose(y1, y2, rtol=eps, verbose=verbose > 2)

    def test_pdist_canberra_ticket_711(self):
        # Test pdist(X, 'canberra') to see if Canberra gives the right result
        # as reported on gh-1238.
        eps = 1e-8
        pdist_y = wpdist_no_const(([3.3], [3.4]), "canberra")
        right_y = 0.01492537
        assert_allclose(pdist_y, right_y, atol=eps, verbose=verbose > 2)

    def test_pdist_custom_notdouble(self):
        # tests that when using a custom metric the data type is not altered
        class myclass:
            pass

        def _my_metric(x, y):
            if not isinstance(x[0], myclass) or not isinstance(y[0], myclass):
                raise ValueError("Type has been changed")
            return 1.123
        data = np.array([[myclass()], [myclass()]], dtype=object)
        pdist_y = pdist(data, metric=_my_metric)
        right_y = 1.123
        assert_equal(pdist_y, right_y, verbose=verbose > 2)

    def _check_calling_conventions(self, X, metric, eps=1e-07, **kwargs):
        # helper function for test_pdist_calling_conventions
        try:
            y1 = pdist(X, metric=metric, **kwargs)
            y2 = pdist(X, metric=eval(metric), **kwargs)
            y3 = pdist(X, metric="test_" + metric, **kwargs)
        except Exception as e:
            e_cls = e.__class__
            if verbose > 2:
                print(e_cls.__name__)
                print(e)
            with pytest.raises(e_cls):
                pdist(X, metric=metric, **kwargs)
            with pytest.raises(e_cls):
                pdist(X, metric=eval(metric), **kwargs)
            with pytest.raises(e_cls):
                pdist(X, metric="test_" + metric, **kwargs)
        else:
            assert_allclose(y1, y2, rtol=eps, verbose=verbose > 2)
            assert_allclose(y1, y3, rtol=eps, verbose=verbose > 2)

    def test_pdist_calling_conventions(self, metric):
        # Ensures that specifying the metric with a str or scipy function
        # gives the same behaviour (i.e. same result or same exception).
        # NOTE: The correctness should be checked within each metric tests.
        # NOTE: Extra args should be checked with a dedicated test
        for eo_name in self.rnd_eo_names:
            # subsampling input data to speed-up tests
            # NOTE: num samples needs to be > than dimensions for mahalanobis
            X = eo[eo_name][::5, ::2]
            if verbose > 2:
                print("testing: ", metric, " with: ", eo_name)
            if metric in {'dice', 'yule', 'matching',
                          'rogerstanimoto', 'russellrao', 'sokalmichener',
                          'sokalsneath',
                          'kulczynski1'} and 'bool' not in eo_name:
                # python version permits non-bools e.g. for fuzzy logic
                continue
            self._check_calling_conventions(X, metric)

            # Testing built-in metrics with extra args
            if metric == "seuclidean":
                V = np.var(X.astype(np.float64), axis=0, ddof=1)
                self._check_calling_conventions(X, metric, V=V)
            elif metric == "mahalanobis":
                V = np.atleast_2d(np.cov(X.astype(np.float64).T))
                VI = np.array(np.linalg.inv(V).T)
                self._check_calling_conventions(X, metric, VI=VI)

    def test_pdist_dtype_equivalence(self, metric):
        # Tests that the result is not affected by type up-casting
        eps = 1e-07
        tests = [(eo['random-bool-data'], self.valid_upcasts['bool']),
                 (eo['random-uint-data'], self.valid_upcasts['uint']),
                 (eo['random-int-data'], self.valid_upcasts['int']),
                 (eo['random-float32-data'], self.valid_upcasts['float32'])]
        for test in tests:
            X1 = test[0][::5, ::2]
            try:
                y1 = pdist(X1, metric=metric)
            except Exception as e:
                e_cls = e.__class__
                if verbose > 2:
                    print(e_cls.__name__)
                    print(e)
                for new_type in test[1]:
                    X2 = new_type(X1)
                    with pytest.raises(e_cls):
                        pdist(X2, metric=metric)
            else:
                for new_type in test[1]:
                    y2 = pdist(new_type(X1), metric=metric)
                    assert_allclose(y1, y2, rtol=eps, verbose=verbose > 2)

    @pytest.mark.thread_unsafe
    def test_pdist_out(self, metric):
        # Test that out parameter works properly
        eps = 1e-15
        X = eo['random-float32-data'][::5, ::2]
        out_size = int((X.shape[0] * (X.shape[0] - 1)) / 2)

        kwargs = dict()
        if metric == 'minkowski':
            kwargs['p'] = 1.23
        out1 = np.empty(out_size, dtype=np.float64)
        with maybe_deprecated(metric):
            Y_right = pdist(X, metric, **kwargs)
        with maybe_deprecated(metric):
            Y_test1 = pdist(X, metric, out=out1, **kwargs)

        # test that output is numerically equivalent
        assert_allclose(Y_test1, Y_right, rtol=eps)

        # test that Y_test1 and out1 are the same object
        assert_(Y_test1 is out1)

        # test for incorrect shape
        out2 = np.empty(out_size + 3, dtype=np.float64)
        with pytest.raises(ValueError):
            with maybe_deprecated(metric):
                pdist(X, metric, out=out2, **kwargs)

        # test for (C-)contiguous output
        out3 = np.empty(2 * out_size, dtype=np.float64)[::2]
        with pytest.raises(ValueError):
            with maybe_deprecated(metric):
                pdist(X, metric, out=out3, **kwargs)

        # test for incorrect dtype
        out5 = np.empty(out_size, dtype=np.int64)
        with pytest.raises(ValueError):
            with maybe_deprecated(metric):
                pdist(X, metric, out=out5, **kwargs)

    @pytest.mark.thread_unsafe
    def test_striding(self, metric):
        # test that striding is handled correct with calls to
        # _copy_array_if_base_present
        eps = 1e-15
        X = eo['random-float32-data'][::5, ::2]
        X_copy = X.copy()

        # confirm contiguity
        assert_(not X.flags.c_contiguous)
        assert_(X_copy.flags.c_contiguous)

        kwargs = dict()
        if metric == 'minkowski':
            kwargs['p'] = 1.23
        with maybe_deprecated(metric):
            Y1 = pdist(X, metric, **kwargs)
        with maybe_deprecated(metric):
            Y2 = pdist(X_copy, metric, **kwargs)
        # test that output is numerically equivalent
        assert_allclose(Y1, Y2, rtol=eps, verbose=verbose > 2)

class TestSomeDistanceFunctions:

    def setup_method(self):
        # 1D arrays
        x = np.array([1.0, 2.0, 3.0])
        y = np.array([1.0, 1.0, 5.0])

        self.cases = [(x, y)]

    def test_minkowski(self):
        for x, y in self.cases:
            dist1 = minkowski(x, y, p=1)
            assert_almost_equal(dist1, 3.0)
            dist1p5 = minkowski(x, y, p=1.5)
            assert_almost_equal(dist1p5, (1.0 + 2.0**1.5)**(2. / 3))
            dist2 = minkowski(x, y, p=2)
            assert_almost_equal(dist2, 5.0 ** 0.5)
            dist0p25 = minkowski(x, y, p=0.25)
            assert_almost_equal(dist0p25, (1.0 + 2.0 ** 0.25) ** 4)

        # Check that casting input to minimum scalar type doesn't affect result
        # (issue #10262). This could be extended to more test inputs with
        # np.min_scalar_type(np.max(input_matrix)).
        a = np.array([352, 916])
        b = np.array([350, 660])
        assert_equal(minkowski(a, b),
                     minkowski(a.astype('uint16'), b.astype('uint16')))

    def test_euclidean(self):
        for x, y in self.cases:
            dist = weuclidean(x, y)
            assert_almost_equal(dist, np.sqrt(5))

    def test_sqeuclidean(self):
        for x, y in self.cases:
            dist = wsqeuclidean(x, y)
            assert_almost_equal(dist, 5.0)

    def test_cosine(self):
        for x, y in self.cases:
            dist = wcosine(x, y)
            assert_almost_equal(dist, 1.0 - 18.0 / (np.sqrt(14) * np.sqrt(27)))

    def test_cosine_output_dtype(self):
        # Regression test for gh-19541
        assert isinstance(wcorrelation([1, 1], [1, 1], centered=False), float)
        assert isinstance(wcosine([1, 1], [1, 1]), float)

    def test_correlation(self):
        xm = np.array([-1.0, 0, 1.0])
        ym = np.array([-4.0 / 3, -4.0 / 3, 5.0 - 7.0 / 3])
        for x, y in self.cases:
            dist = wcorrelation(x, y)
            assert_almost_equal(dist, 1.0 - np.dot(xm, ym) / (norm(xm) * norm(ym)))

    def test_correlation_positive(self):
        # Regression test for gh-12320 (negative return value due to rounding
        x = np.array([0., 0., 0., 0., 0., 0., -2., 0., 0., 0., -2., -2., -2.,
                      0., -2., 0., -2., 0., 0., -1., -2., 0., 1., 0., 0., -2.,
                      0., 0., -2., 0., -2., -2., -2., -2., -2., -2., 0.])
        y = np.array([1., 1., 1., 1., 1., 1., -1., 1., 1., 1., -1., -1., -1.,
                      1., -1., 1., -1., 1., 1., 0., -1., 1., 2., 1., 1., -1.,
                      1., 1., -1., 1., -1., -1., -1., -1., -1., -1., 1.])
        dist = correlation(x, y)
        assert 0 <= dist <= 10 * np.finfo(np.float64).eps

    @pytest.mark.thread_unsafe
    @pytest.mark.filterwarnings('ignore:Casting complex')
    @pytest.mark.parametrize("func", [correlation, cosine])
    def test_corr_dep_complex(self, func):
        x = [1+0j, 2+0j]
        y = [3+0j, 4+0j]
        with pytest.deprecated_call(match="Complex `u` and `v` are deprecated"):
            func(x, y)

    def test_mahalanobis(self):
        x = np.array([1.0, 2.0, 3.0])
        y = np.array([1.0, 1.0, 5.0])
        vi = np.array([[2.0, 1.0, 0.0], [1.0, 2.0, 1.0], [0.0, 1.0, 2.0]])
        for x, y in self.cases:
            dist = mahalanobis(x, y, vi)
            assert_almost_equal(dist, np.sqrt(6.0))


class TestSquareForm:
    checked_dtypes = [np.float64, np.float32, np.int32, np.int8, bool]

    def test_squareform_matrix(self):
        for dtype in self.checked_dtypes:
            self.check_squareform_matrix(dtype)

    def test_squareform_vector(self):
        for dtype in self.checked_dtypes:
            self.check_squareform_vector(dtype)

    def check_squareform_matrix(self, dtype):
        A = np.zeros((0, 0), dtype=dtype)
        rA = squareform(A)
        assert_equal(rA.shape, (0,))
        assert_equal(rA.dtype, dtype)

        A = np.zeros((1, 1), dtype=dtype)
        rA = squareform(A)
        assert_equal(rA.shape, (0,))
        assert_equal(rA.dtype, dtype)

        A = np.array([[0, 4.2], [4.2, 0]], dtype=dtype)
        rA = squareform(A)
        assert_equal(rA.shape, (1,))
        assert_equal(rA.dtype, dtype)
        assert_array_equal(rA, np.array([4.2], dtype=dtype))

    def check_squareform_vector(self, dtype):
        v = np.zeros((0,), dtype=dtype)
        rv = squareform(v)
        assert_equal(rv.shape, (1, 1))
        assert_equal(rv.dtype, dtype)
        assert_array_equal(rv, [[0]])

        v = np.array([8.3], dtype=dtype)
        rv = squareform(v)
        assert_equal(rv.shape, (2, 2))
        assert_equal(rv.dtype, dtype)
        assert_array_equal(rv, np.array([[0, 8.3], [8.3, 0]], dtype=dtype))

    def test_squareform_multi_matrix(self):
        for n in range(2, 5):
            self.check_squareform_multi_matrix(n)

    def check_squareform_multi_matrix(self, n):
        X = np.random.rand(n, 4)
        Y = wpdist_no_const(X)
        assert_equal(len(Y.shape), 1)
        A = squareform(Y)
        Yr = squareform(A)
        s = A.shape
        k = 0
        if verbose >= 3:
            print(A.shape, Y.shape, Yr.shape)
        assert_equal(len(s), 2)
        assert_equal(len(Yr.shape), 1)
        assert_equal(s[0], s[1])
        for i in range(0, s[0]):
            for j in range(i + 1, s[1]):
                if i != j:
                    assert_equal(A[i, j], Y[k])
                    k += 1
                else:
                    assert_equal(A[i, j], 0)


class TestNumObsY:

    def test_num_obs_y_multi_matrix(self):
        for n in range(2, 10):
            X = np.random.rand(n, 4)
            Y = wpdist_no_const(X)
            assert_equal(num_obs_y(Y), n)

    def test_num_obs_y_1(self):
        # Tests num_obs_y(y) on a condensed distance matrix over 1
        # observations. Expecting exception.
        with pytest.raises(ValueError):
            self.check_y(1)

    def test_num_obs_y_2(self):
        # Tests num_obs_y(y) on a condensed distance matrix over 2
        # observations.
        assert_(self.check_y(2))

    def test_num_obs_y_3(self):
        assert_(self.check_y(3))

    def test_num_obs_y_4(self):
        assert_(self.check_y(4))

    def test_num_obs_y_5_10(self):
        for i in range(5, 16):
            self.minit(i)

    def test_num_obs_y_2_100(self):
        # Tests num_obs_y(y) on 100 improper condensed distance matrices.
        # Expecting exception.
        a = set()
        for n in range(2, 16):
            a.add(n * (n - 1) / 2)
        for i in range(5, 105):
            if i not in a:
                with pytest.raises(ValueError):
                    self.bad_y(i)

    def minit(self, n):
        assert_(self.check_y(n))

    def bad_y(self, n):
        y = np.random.rand(n)
        return num_obs_y(y)

    def check_y(self, n):
        return num_obs_y(self.make_y(n)) == n

    def make_y(self, n):
        return np.random.rand((n * (n - 1)) // 2)


class TestNumObsDM:

    def test_num_obs_dm_multi_matrix(self):
        for n in range(1, 10):
            X = np.random.rand(n, 4)
            Y = wpdist_no_const(X)
            A = squareform(Y)
            if verbose >= 3:
                print(A.shape, Y.shape)
            assert_equal(num_obs_dm(A), n)

    def test_num_obs_dm_0(self):
        # Tests num_obs_dm(D) on a 0x0 distance matrix. Expecting exception.
        assert_(self.check_D(0))

    def test_num_obs_dm_1(self):
        # Tests num_obs_dm(D) on a 1x1 distance matrix.
        assert_(self.check_D(1))

    def test_num_obs_dm_2(self):
        assert_(self.check_D(2))

    def test_num_obs_dm_3(self):
        assert_(self.check_D(2))

    def test_num_obs_dm_4(self):
        assert_(self.check_D(4))

    def check_D(self, n):
        return num_obs_dm(self.make_D(n)) == n

    def make_D(self, n):
        return np.random.rand(n, n)


def is_valid_dm_throw(D):
    return is_valid_dm(D, throw=True)


class TestIsValidDM:

    def test_is_valid_dm_improper_shape_1D_E(self):
        D = np.zeros((5,), dtype=np.float64)
        with pytest.raises(ValueError):
            is_valid_dm_throw(D)

    def test_is_valid_dm_improper_shape_1D_F(self):
        D = np.zeros((5,), dtype=np.float64)
        assert_equal(is_valid_dm(D), False)

    def test_is_valid_dm_improper_shape_3D_E(self):
        D = np.zeros((3, 3, 3), dtype=np.float64)
        with pytest.raises(ValueError):
            is_valid_dm_throw(D)

    def test_is_valid_dm_improper_shape_3D_F(self):
        D = np.zeros((3, 3, 3), dtype=np.float64)
        assert_equal(is_valid_dm(D), False)

    def test_is_valid_dm_nonzero_diagonal_E(self):
        y = np.random.rand(10)
        D = squareform(y)
        for i in range(0, 5):
            D[i, i] = 2.0
        with pytest.raises(ValueError):
            is_valid_dm_throw(D)

    def test_is_valid_dm_nonzero_diagonal_F(self):
        y = np.random.rand(10)
        D = squareform(y)
        for i in range(0, 5):
            D[i, i] = 2.0
        assert_equal(is_valid_dm(D), False)

    def test_is_valid_dm_asymmetric_E(self):
        y = np.random.rand(10)
        D = squareform(y)
        D[1, 3] = D[3, 1] + 1
        with pytest.raises(ValueError):
            is_valid_dm_throw(D)

    def test_is_valid_dm_asymmetric_F(self):
        y = np.random.rand(10)
        D = squareform(y)
        D[1, 3] = D[3, 1] + 1
        assert_equal(is_valid_dm(D), False)

    def test_is_valid_dm_correct_1_by_1(self):
        D = np.zeros((1, 1), dtype=np.float64)
        assert_equal(is_valid_dm(D), True)

    def test_is_valid_dm_correct_2_by_2(self):
        y = np.random.rand(1)
        D = squareform(y)
        assert_equal(is_valid_dm(D), True)

    def test_is_valid_dm_correct_3_by_3(self):
        y = np.random.rand(3)
        D = squareform(y)
        assert_equal(is_valid_dm(D), True)

    def test_is_valid_dm_correct_4_by_4(self):
        y = np.random.rand(6)
        D = squareform(y)
        assert_equal(is_valid_dm(D), True)

    def test_is_valid_dm_correct_5_by_5(self):
        y = np.random.rand(10)
        D = squareform(y)
        assert_equal(is_valid_dm(D), True)


def is_valid_y_throw(y):
    return is_valid_y(y, throw=True)


class TestIsValidY:
    # If test case name ends on "_E" then an exception is expected for the
    # given input, if it ends in "_F" then False is expected for the is_valid_y
    # check.  Otherwise the input is expected to be valid.

    def test_is_valid_y_improper_shape_2D_E(self):
        y = np.zeros((3, 3,), dtype=np.float64)
        with pytest.raises(ValueError):
            is_valid_y_throw(y)

    def test_is_valid_y_improper_shape_2D_F(self):
        y = np.zeros((3, 3,), dtype=np.float64)
        assert_equal(is_valid_y(y), False)

    def test_is_valid_y_improper_shape_3D_E(self):
        y = np.zeros((3, 3, 3), dtype=np.float64)
        with pytest.raises(ValueError):
            is_valid_y_throw(y)

    def test_is_valid_y_improper_shape_3D_F(self):
        y = np.zeros((3, 3, 3), dtype=np.float64)
        assert_equal(is_valid_y(y), False)

    def test_is_valid_y_correct_2_by_2(self):
        y = self.correct_n_by_n(2)
        assert_equal(is_valid_y(y), True)

    def test_is_valid_y_correct_3_by_3(self):
        y = self.correct_n_by_n(3)
        assert_equal(is_valid_y(y), True)

    def test_is_valid_y_correct_4_by_4(self):
        y = self.correct_n_by_n(4)
        assert_equal(is_valid_y(y), True)

    def test_is_valid_y_correct_5_by_5(self):
        y = self.correct_n_by_n(5)
        assert_equal(is_valid_y(y), True)

    def test_is_valid_y_2_100(self):
        a = set()
        for n in range(2, 16):
            a.add(n * (n - 1) / 2)
        for i in range(5, 105):
            if i not in a:
                with pytest.raises(ValueError):
                    self.bad_y(i)

    def bad_y(self, n):
        y = np.random.rand(n)
        return is_valid_y(y, throw=True)

    def correct_n_by_n(self, n):
        y = np.random.rand((n * (n - 1)) // 2)
        return y


@pytest.mark.parametrize("p", [-10.0, -0.5, 0.0])
def test_bad_p(p):
    # Raise ValueError if p <=0.
    with pytest.raises(ValueError):
        minkowski([1, 2], [3, 4], p)
    with pytest.raises(ValueError):
        minkowski([1, 2], [3, 4], p, [1, 1])


def test_sokalsneath_all_false():
    # Regression test for ticket #876
    with pytest.raises(ValueError):
        sokalsneath([False, False, False], [False, False, False])


def test_canberra():
    # Regression test for ticket #1430.
    assert_equal(wcanberra([1, 2, 3], [2, 4, 6]), 1)
    assert_equal(wcanberra([1, 1, 0, 0], [1, 0, 1, 0]), 2)


def test_braycurtis():
    # Regression test for ticket #1430.
    assert_almost_equal(wbraycurtis([1, 2, 3], [2, 4, 6]), 1. / 3, decimal=15)
    assert_almost_equal(wbraycurtis([1, 1, 0, 0], [1, 0, 1, 0]), 0.5, decimal=15)


def test_euclideans():
    # Regression test for ticket #1328.
    x1 = np.array([1, 1, 1])
    x2 = np.array([0, 0, 0])

    # Basic test of the calculation.
    assert_almost_equal(wsqeuclidean(x1, x2), 3.0, decimal=14)
    assert_almost_equal(weuclidean(x1, x2), np.sqrt(3), decimal=14)

    # Check flattening for (1, N) or (N, 1) inputs
    with pytest.raises(ValueError, match="Input vector should be 1-D"):
        weuclidean(x1[np.newaxis, :], x2[np.newaxis, :]), np.sqrt(3)
    with pytest.raises(ValueError, match="Input vector should be 1-D"):
        wsqeuclidean(x1[np.newaxis, :], x2[np.newaxis, :])
    with pytest.raises(ValueError, match="Input vector should be 1-D"):
        wsqeuclidean(x1[:, np.newaxis], x2[:, np.newaxis])

    # Distance metrics only defined for vectors (= 1-D)
    x = np.arange(4).reshape(2, 2)
    with pytest.raises(ValueError):
        weuclidean(x, x)
    with pytest.raises(ValueError):
        wsqeuclidean(x, x)

    # Another check, with random data.
    rs = np.random.RandomState(1234567890)
    x = rs.rand(10)
    y = rs.rand(10)
    d1 = weuclidean(x, y)
    d2 = wsqeuclidean(x, y)
    assert_almost_equal(d1**2, d2, decimal=14)


def test_hamming_unequal_length():
    # Regression test for gh-4290.
    x = [0, 0, 1]
    y = [1, 0, 1, 0]
    # Used to give an AttributeError from ndarray.mean called on bool
    with pytest.raises(ValueError):
        whamming(x, y)


def test_hamming_unequal_length_with_w():
    u = [0, 0, 1]
    v = [0, 0, 1]
    w = [1, 0, 1, 0]
    msg = "'w' should have the same length as 'u' and 'v'."
    with pytest.raises(ValueError, match=msg):
        whamming(u, v, w)


def test_hamming_string_array():
    # https://github.com/scikit-learn/scikit-learn/issues/4014
    a = np.array(['eggs', 'spam', 'spam', 'eggs', 'spam', 'spam', 'spam',
                  'spam', 'spam', 'spam', 'spam', 'eggs', 'eggs', 'spam',
                  'eggs', 'eggs', 'eggs', 'eggs', 'eggs', 'spam'],
                  dtype='|S4')
    b = np.array(['eggs', 'spam', 'spam', 'eggs', 'eggs', 'spam', 'spam',
                  'spam', 'spam', 'eggs', 'spam', 'eggs', 'spam', 'eggs',
                  'spam', 'spam', 'eggs', 'spam', 'spam', 'eggs'],
                  dtype='|S4')
    desired = 0.45
    assert_allclose(whamming(a, b), desired)


def test_minkowski_w():
    # Regression test for gh-8142.
    arr_in = np.array([[83.33333333, 100., 83.33333333, 100., 36.,
                        60., 90., 150., 24., 48.],
                       [83.33333333, 100., 83.33333333, 100., 36.,
                        60., 90., 150., 24., 48.]])
    p0 = pdist(arr_in, metric='minkowski', p=1, w=None)
    c0 = cdist(arr_in, arr_in, metric='minkowski', p=1, w=None)
    p1 = pdist(arr_in, metric='minkowski', p=1)
    c1 = cdist(arr_in, arr_in, metric='minkowski', p=1)

    assert_allclose(p0, p1, rtol=1e-15)
    assert_allclose(c0, c1, rtol=1e-15)


def test_sqeuclidean_dtypes():
    # Assert that sqeuclidean returns the right types of values.
    # Integer types should be converted to floating for stability.
    # Floating point types should be the same as the input.
    x = [1, 2, 3]
    y = [4, 5, 6]

    for dtype in [np.int8, np.int16, np.int32, np.int64]:
        d = wsqeuclidean(np.asarray(x, dtype=dtype), np.asarray(y, dtype=dtype))
        assert_(np.issubdtype(d.dtype, np.floating))

    for dtype in [np.uint8, np.uint16, np.uint32, np.uint64]:
        umax = np.iinfo(dtype).max
        d1 = wsqeuclidean([0], np.asarray([umax], dtype=dtype))
        d2 = wsqeuclidean(np.asarray([umax], dtype=dtype), [0])

        assert_equal(d1, d2)
        assert_equal(d1, np.float64(umax)**2)

    dtypes = [np.float32, np.float64, np.complex64, np.complex128]
    for dtype in ['float16', 'float128']:
        # These aren't present in older numpy versions; float128 may also not
        # be present on all platforms.
        if hasattr(np, dtype):
            dtypes.append(getattr(np, dtype))

    for dtype in dtypes:
        d = wsqeuclidean(np.asarray(x, dtype=dtype), np.asarray(y, dtype=dtype))
        assert_equal(d.dtype, dtype)


@pytest.mark.thread_unsafe
def test_sokalmichener():
    # Test that sokalmichener has the same result for bool and int inputs.
    p = [True, True, False]
    q = [True, False, True]
    x = [int(b) for b in p]
    y = [int(b) for b in q]
    with pytest.deprecated_call():
        dist1 = sokalmichener(p, q)
    with pytest.deprecated_call():
        dist2 = sokalmichener(x, y)
    # These should be exactly the same.
    assert_equal(dist1, dist2)


@pytest.mark.thread_unsafe
def test_sokalmichener_with_weight():
    # from: | 1 |   | 0 |
    # to:   | 1 |   | 1 |
    # weight|   | 1 |   | 0.2
    ntf = 0 * 1 + 0 * 0.2
    nft = 0 * 1 + 1 * 0.2
    ntt = 1 * 1 + 0 * 0.2
    nff = 0 * 1 + 0 * 0.2
    expected = 2 * (nft + ntf) / (ntt + nff + 2 * (nft + ntf))
    assert_almost_equal(expected, 0.2857143)
    with pytest.deprecated_call():
        actual = sokalmichener([1, 0], [1, 1], w=[1, 0.2])
    assert_almost_equal(expected, actual)

    a1 = [False, False, True, True, True, False, False, True, True, True, True,
          True, True, False, True, False, False, False, True, True]
    a2 = [True, True, True, False, False, True, True, True, False, True,
          True, True, True, True, False, False, False, True, True, True]

    for w in [0.05, 0.1, 1.0, 20.0]:
        with pytest.deprecated_call():
            assert_almost_equal(sokalmichener(a2, a1, [w]), 0.6666666666666666)


@pytest.mark.thread_unsafe
def test_modifies_input(metric):
    # test whether cdist or pdist modifies input arrays
    X1 = np.asarray([[1., 2., 3.],
                     [1.2, 2.3, 3.4],
                     [2.2, 2.3, 4.4],
                     [22.2, 23.3, 44.4]])
    X1_copy = X1.copy()
    with maybe_deprecated(metric):
        cdist(X1, X1, metric)
    with maybe_deprecated(metric):
        pdist(X1, metric)
    assert_array_equal(X1, X1_copy)


@pytest.mark.thread_unsafe
def test_Xdist_deprecated_args(metric):
    # testing both cdist and pdist deprecated warnings
    X1 = np.asarray([[1., 2., 3.],
                     [1.2, 2.3, 3.4],
                     [2.2, 2.3, 4.4],
                     [22.2, 23.3, 44.4]])

    with pytest.raises(TypeError):
        cdist(X1, X1, metric, 2.)

    with pytest.raises(TypeError):
        pdist(X1, metric, 2.)

    for arg in ["p", "V", "VI"]:
        kwargs = {arg: "foo"}

        if ((arg == "V" and metric == "seuclidean")
                or (arg == "VI" and metric == "mahalanobis")
                or (arg == "p" and metric == "minkowski")):
            continue

        with pytest.raises(TypeError):
            with maybe_deprecated(metric):
                cdist(X1, X1, metric, **kwargs)

        with pytest.raises(TypeError):
            with maybe_deprecated(metric):
                pdist(X1, metric, **kwargs)


@pytest.mark.thread_unsafe
def test_Xdist_non_negative_weights(metric):
    X = eo['random-float32-data'][::5, ::2]
    w = np.ones(X.shape[1])
    w[::5] = -w[::5]

    if metric in ['seuclidean', 'mahalanobis', 'jensenshannon']:
        pytest.skip("not applicable")

    for m in [metric, eval(metric), "test_" + metric]:
        with pytest.raises(ValueError):
            with maybe_deprecated(metric):
                pdist(X, m, w=w)
        with pytest.raises(ValueError):
            with maybe_deprecated(metric):
                cdist(X, X, m, w=w)


def test__validate_vector():
    x = [1, 2, 3]
    y = _validate_vector(x)
    assert_array_equal(y, x)

    y = _validate_vector(x, dtype=np.float64)
    assert_array_equal(y, x)
    assert_equal(y.dtype, np.float64)

    x = [1]
    y = _validate_vector(x)
    assert_equal(y.ndim, 1)
    assert_equal(y, x)

    x = 1
    with pytest.raises(ValueError, match="Input vector should be 1-D"):
        _validate_vector(x)

    x = np.arange(5).reshape(1, -1, 1)
    with pytest.raises(ValueError, match="Input vector should be 1-D"):
        _validate_vector(x)

    x = [[1, 2], [3, 4]]
    with pytest.raises(ValueError, match="Input vector should be 1-D"):
        _validate_vector(x)

def test_yule_all_same():
    # Test yule avoids a divide by zero when exactly equal
    x = np.ones((2, 6), dtype=bool)
    d = wyule(x[0], x[0])
    assert d == 0.0

    d = pdist(x, 'yule')
    assert_equal(d, [0.0])

    d = cdist(x[:1], x[:1], 'yule')
    assert_equal(d, [[0.0]])


def test_jensenshannon():
    assert_almost_equal(jensenshannon([1.0, 0.0, 0.0], [0.0, 1.0, 0.0], 2.0),
                        1.0)
    assert_almost_equal(jensenshannon([1.0, 0.0], [0.5, 0.5]),
                        0.46450140402245893)
    assert_almost_equal(jensenshannon([1.0, 0.0, 0.0], [1.0, 0.0, 0.0]), 0.0)

    assert_almost_equal(jensenshannon([[1.0, 2.0]], [[0.5, 1.5]], axis=0),
                        [0.0, 0.0])
    assert_almost_equal(jensenshannon([[1.0, 2.0]], [[0.5, 1.5]], axis=1),
                        [0.0649045])
    assert_almost_equal(jensenshannon([[1.0, 2.0]], [[0.5, 1.5]], axis=0,
                                      keepdims=True), [[0.0, 0.0]])
    assert_almost_equal(jensenshannon([[1.0, 2.0]], [[0.5, 1.5]], axis=1,
                                      keepdims=True), [[0.0649045]])

    a = np.array([[1, 2, 3, 4],
                  [5, 6, 7, 8],
                  [9, 10, 11, 12]])
    b = np.array([[13, 14, 15, 16],
                  [17, 18, 19, 20],
                  [21, 22, 23, 24]])

    assert_almost_equal(jensenshannon(a, b, axis=0),
                        [0.1954288, 0.1447697, 0.1138377, 0.0927636])
    assert_almost_equal(jensenshannon(a, b, axis=1),
                        [0.1402339, 0.0399106, 0.0201815])


def test_gh_17703():
    arr_1 = np.array([1, 0, 0])
    arr_2 = np.array([2, 0, 0])
    expected = dice(arr_1, arr_2)
    actual = pdist([arr_1, arr_2], metric='dice')
    assert_allclose(actual, expected)
    actual = cdist(np.atleast_2d(arr_1),
                   np.atleast_2d(arr_2), metric='dice')
    assert_allclose(actual, expected)


@pytest.mark.thread_unsafe
def test_immutable_input(metric):
    if metric in ("jensenshannon", "mahalanobis", "seuclidean"):
        pytest.skip("not applicable")
    x = np.arange(10, dtype=np.float64)
    x.setflags(write=False)
    with maybe_deprecated(metric):
        getattr(scipy.spatial.distance, metric)(x, x, w=x)


class TestJaccard:

    def test_pdist_jaccard_random(self):
        eps = 1e-8
        X = eo['pdist-boolean-inp']
        Y_right = eo['pdist-jaccard']
        Y_test1 = wpdist(X, 'jaccard')
        assert_allclose(Y_test1, Y_right, rtol=eps)

    def test_pdist_jaccard_random_float32(self):
        eps = 1e-8
        X = np.float32(eo['pdist-boolean-inp'])
        Y_right = eo['pdist-jaccard']
        Y_test1 = wpdist(X, 'jaccard')
        assert_allclose(Y_test1, Y_right, rtol=eps)

    def test_pdist_jaccard_random_nonC(self):
        eps = 1e-8
        X = eo['pdist-boolean-inp']
        Y_right = eo['pdist-jaccard']
        Y_test2 = wpdist(X, 'test_jaccard')
        assert_allclose(Y_test2, Y_right, rtol=eps)

    def test_pdist_djaccard_random(self):
        eps = 1e-8
        X = np.float64(eo['pdist-boolean-inp'])
        Y_right = eo['pdist-jaccard']
        Y_test1 = wpdist(X, 'jaccard')
        assert_allclose(Y_test1, Y_right, rtol=eps)

    def test_pdist_djaccard_random_float32(self):
        eps = 1e-8
        X = np.float32(eo['pdist-boolean-inp'])
        Y_right = eo['pdist-jaccard']
        Y_test1 = wpdist(X, 'jaccard')
        assert_allclose(Y_test1, Y_right, rtol=eps)

    def test_pdist_djaccard_allzeros(self):
        eps = 1e-15
        Y = pdist(np.zeros((5, 3)), 'jaccard')
        assert_allclose(np.zeros(10), Y, rtol=eps)

    def test_pdist_djaccard_random_nonC(self):
        eps = 1e-8
        X = np.float64(eo['pdist-boolean-inp'])
        Y_right = eo['pdist-jaccard']
        Y_test2 = wpdist(X, 'test_jaccard')
        assert_allclose(Y_test2, Y_right, rtol=eps)

    def test_pdist_djaccard_allzeros_nonC(self):
        eps = 1e-15
        Y = pdist(np.zeros((5, 3)), 'test_jaccard')
        assert_allclose(np.zeros(10), Y, rtol=eps)

    def test_pdist_jaccard_mtica1(self):
        m = wjaccard(np.array([1, 0, 1, 1, 0]),
                     np.array([1, 1, 0, 1, 1]))
        m2 = wjaccard(np.array([1, 0, 1, 1, 0], dtype=bool),
                      np.array([1, 1, 0, 1, 1], dtype=bool))
        assert_allclose(m, 0.6, rtol=0, atol=1e-10)
        assert_allclose(m2, 0.6, rtol=0, atol=1e-10)

    def test_pdist_jaccard_mtica2(self):
        m = wjaccard(np.array([1, 0, 1]),
                     np.array([1, 1, 0]))
        m2 = wjaccard(np.array([1, 0, 1], dtype=bool),
                      np.array([1, 1, 0], dtype=bool))
        assert_allclose(m, 2 / 3, rtol=0, atol=1e-10)
        assert_allclose(m2, 2 / 3, rtol=0, atol=1e-10)

    def test_non_01_input(self):
        # Non-0/1 numeric input should be cast to bool before computation.
        # See gh-21176.
        x = np.array([-10, 2.5, 0])  # [True, True, False]
        y = np.array([ 2,   -5, 2])  # [True, True, True]
        eps = np.finfo(float).eps
        assert_allclose(jaccard(x, y), 1/3, rtol=eps)
        assert_allclose(cdist([x], [y], 'jaccard'), [[1/3]])
        assert_allclose(pdist([x, y], 'jaccard'), [1/3])


class TestChebyshev:

    def test_pdist_chebyshev_random(self):
        eps = 1e-8
        X = eo['pdist-double-inp']
        Y_right = eo['pdist-chebyshev']
        Y_test1 = pdist(X, 'chebyshev')
        assert_allclose(Y_test1, Y_right, rtol=eps)

    def test_pdist_chebyshev_random_float32(self):
        eps = 1e-7
        X = np.float32(eo['pdist-double-inp'])
        Y_right = eo['pdist-chebyshev']
        Y_test1 = pdist(X, 'chebyshev')
        assert_allclose(Y_test1, Y_right, rtol=eps, verbose=verbose > 2)

    def test_pdist_chebyshev_random_nonC(self):
        eps = 1e-8
        X = eo['pdist-double-inp']
        Y_right = eo['pdist-chebyshev']
        Y_test2 = pdist(X, 'test_chebyshev')
        assert_allclose(Y_test2, Y_right, rtol=eps)

    def test_pdist_chebyshev_iris(self):
        eps = 1e-14
        X = eo['iris']
        Y_right = eo['pdist-chebyshev-iris']
        Y_test1 = pdist(X, 'chebyshev')
        assert_allclose(Y_test1, Y_right, rtol=eps)

    def test_pdist_chebyshev_iris_float32(self):
        eps = 1e-5
        X = np.float32(eo['iris'])
        Y_right = eo['pdist-chebyshev-iris']
        Y_test1 = pdist(X, 'chebyshev')
        assert_allclose(Y_test1, Y_right, rtol=eps, verbose=verbose > 2)

    def test_pdist_chebyshev_iris_nonC(self):
        eps = 1e-14
        X = eo['iris']
        Y_right = eo['pdist-chebyshev-iris']
        Y_test2 = pdist(X, 'test_chebyshev')
        assert_allclose(Y_test2, Y_right, rtol=eps)

    def test_weighted(self):
        # Basic test for weighted Chebyshev.  Only components with non-zero
        # weight participate in the 'max'.
        x = [1, 2, 3]
        y = [6, 5, 4]
        w = [0, 1, 5]
        assert_equal(chebyshev(x, y, w), 3)
        assert_equal(pdist([x, y], 'chebyshev', w=w), [3])
        assert_equal(cdist([x], [y], 'chebyshev', w=w), [[3]])

    def test_zero_weight(self):
        # If the weight is identically zero, the distance should be zero.
        x = [1, 2, 3]
        y = [6, 5, 4]
        w = [0, 0, 0]
        assert_equal(chebyshev(x, y, w), 0)
        assert_equal(pdist([x, y], 'chebyshev', w=w), [0])
        assert_equal(cdist([x], [y], 'chebyshev', w=w), [[0]])