|
import numpy as np |
|
|
|
from scipy import stats |
|
from ._stats_py import _get_pvalue, _rankdata, _SimpleNormal |
|
from . import _morestats |
|
from ._axis_nan_policy import _broadcast_arrays |
|
from ._hypotests import _get_wilcoxon_distr |
|
from scipy._lib._util import _lazywhere, _get_nan |
|
|
|
|
|
class WilcoxonDistribution: |
|
|
|
def __init__(self, n): |
|
n = np.asarray(n).astype(int, copy=False) |
|
self.n = n |
|
self._dists = {ni: _get_wilcoxon_distr(ni) for ni in np.unique(n)} |
|
|
|
def _cdf1(self, k, n): |
|
pmfs = self._dists[n] |
|
return pmfs[:k + 1].sum() |
|
|
|
def _cdf(self, k, n): |
|
return np.vectorize(self._cdf1, otypes=[float])(k, n) |
|
|
|
def _sf1(self, k, n): |
|
pmfs = self._dists[n] |
|
return pmfs[k:].sum() |
|
|
|
def _sf(self, k, n): |
|
return np.vectorize(self._sf1, otypes=[float])(k, n) |
|
|
|
def mean(self): |
|
return self.n * (self.n + 1) / 4 |
|
|
|
def _prep(self, k): |
|
k = np.asarray(k).astype(int, copy=False) |
|
mn = self.mean() |
|
out = np.empty(k.shape, dtype=np.float64) |
|
return k, mn, out |
|
|
|
def cdf(self, k): |
|
k, mn, out = self._prep(k) |
|
return _lazywhere(k <= mn, (k, self.n), self._cdf, |
|
f2=lambda k, n: 1 - self._sf(k+1, n))[()] |
|
|
|
def sf(self, k): |
|
k, mn, out = self._prep(k) |
|
return _lazywhere(k <= mn, (k, self.n), self._sf, |
|
f2=lambda k, n: 1 - self._cdf(k-1, n))[()] |
|
|
|
|
|
def _wilcoxon_iv(x, y, zero_method, correction, alternative, method, axis): |
|
|
|
axis = np.asarray(axis)[()] |
|
message = "`axis` must be an integer." |
|
if not np.issubdtype(axis.dtype, np.integer) or axis.ndim != 0: |
|
raise ValueError(message) |
|
|
|
message = '`axis` must be compatible with the shape(s) of `x` (and `y`)' |
|
try: |
|
if y is None: |
|
x = np.asarray(x) |
|
d = x |
|
else: |
|
x, y = _broadcast_arrays((x, y), axis=axis) |
|
d = x - y |
|
d = np.moveaxis(d, axis, -1) |
|
except np.AxisError as e: |
|
raise ValueError(message) from e |
|
|
|
message = "`x` and `y` must have the same length along `axis`." |
|
if y is not None and x.shape[axis] != y.shape[axis]: |
|
raise ValueError(message) |
|
|
|
message = "`x` (and `y`, if provided) must be an array of real numbers." |
|
if np.issubdtype(d.dtype, np.integer): |
|
d = d.astype(np.float64) |
|
if not np.issubdtype(d.dtype, np.floating): |
|
raise ValueError(message) |
|
|
|
zero_method = str(zero_method).lower() |
|
zero_methods = {"wilcox", "pratt", "zsplit"} |
|
message = f"`zero_method` must be one of {zero_methods}." |
|
if zero_method not in zero_methods: |
|
raise ValueError(message) |
|
|
|
corrections = {True, False} |
|
message = f"`correction` must be one of {corrections}." |
|
if correction not in corrections: |
|
raise ValueError(message) |
|
|
|
alternative = str(alternative).lower() |
|
alternatives = {"two-sided", "less", "greater"} |
|
message = f"`alternative` must be one of {alternatives}." |
|
if alternative not in alternatives: |
|
raise ValueError(message) |
|
|
|
if not isinstance(method, stats.PermutationMethod): |
|
methods = {"auto", "asymptotic", "exact"} |
|
message = (f"`method` must be one of {methods} or " |
|
"an instance of `stats.PermutationMethod`.") |
|
if method not in methods: |
|
raise ValueError(message) |
|
output_z = True if method == 'asymptotic' else False |
|
|
|
|
|
|
|
|
|
n_zero = np.sum(d == 0) |
|
if method == "auto" and d.shape[-1] > 50: |
|
method = "asymptotic" |
|
|
|
return d, zero_method, correction, alternative, method, axis, output_z, n_zero |
|
|
|
|
|
def _wilcoxon_statistic(d, method, zero_method='wilcox'): |
|
|
|
i_zeros = (d == 0) |
|
|
|
if zero_method == 'wilcox': |
|
|
|
|
|
|
|
if not d.flags['WRITEABLE']: |
|
d = d.copy() |
|
d[i_zeros] = np.nan |
|
|
|
i_nan = np.isnan(d) |
|
n_nan = np.sum(i_nan, axis=-1) |
|
count = d.shape[-1] - n_nan |
|
|
|
r, t = _rankdata(abs(d), 'average', return_ties=True) |
|
|
|
r_plus = np.sum((d > 0) * r, axis=-1) |
|
r_minus = np.sum((d < 0) * r, axis=-1) |
|
|
|
has_ties = (t == 0).any() |
|
|
|
if zero_method == "zsplit": |
|
|
|
|
|
|
|
r_zero_2 = np.sum(i_zeros * r, axis=-1) / 2 |
|
r_plus += r_zero_2 |
|
r_minus += r_zero_2 |
|
|
|
mn = count * (count + 1.) * 0.25 |
|
se = count * (count + 1.) * (2. * count + 1.) |
|
|
|
if zero_method == "pratt": |
|
|
|
|
|
|
|
n_zero = i_zeros.sum(axis=-1) |
|
mn -= n_zero * (n_zero + 1.) * 0.25 |
|
se -= n_zero * (n_zero + 1.) * (2. * n_zero + 1.) |
|
|
|
|
|
|
|
t[i_zeros.any(axis=-1), 0] = 0 |
|
|
|
tie_correct = (t**3 - t).sum(axis=-1) |
|
se -= tie_correct/2 |
|
se = np.sqrt(se / 24) |
|
|
|
|
|
|
|
|
|
|
|
|
|
if method in ["asymptotic", "auto"]: |
|
z = (r_plus - mn) / se |
|
else: |
|
z = np.nan |
|
|
|
return r_plus, r_minus, se, z, count, has_ties |
|
|
|
|
|
def _correction_sign(z, alternative): |
|
if alternative == 'greater': |
|
return 1 |
|
elif alternative == 'less': |
|
return -1 |
|
else: |
|
return np.sign(z) |
|
|
|
|
|
def _wilcoxon_nd(x, y=None, zero_method='wilcox', correction=True, |
|
alternative='two-sided', method='auto', axis=0): |
|
|
|
temp = _wilcoxon_iv(x, y, zero_method, correction, alternative, method, axis) |
|
d, zero_method, correction, alternative, method, axis, output_z, n_zero = temp |
|
|
|
if d.size == 0: |
|
NaN = _get_nan(d) |
|
res = _morestats.WilcoxonResult(statistic=NaN, pvalue=NaN) |
|
if method == 'asymptotic': |
|
res.zstatistic = NaN |
|
return res |
|
|
|
r_plus, r_minus, se, z, count, has_ties = _wilcoxon_statistic( |
|
d, method, zero_method |
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
if method == "auto": |
|
if not (has_ties or n_zero > 0): |
|
method = "exact" |
|
elif d.shape[-1] <= 13: |
|
|
|
|
|
|
|
|
|
method = stats.PermutationMethod() |
|
else: |
|
|
|
|
|
method = "asymptotic" |
|
|
|
if method == 'asymptotic': |
|
if correction: |
|
sign = _correction_sign(z, alternative) |
|
z -= sign * 0.5 / se |
|
p = _get_pvalue(z, _SimpleNormal(), alternative, xp=np) |
|
elif method == 'exact': |
|
dist = WilcoxonDistribution(count) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if alternative == 'less': |
|
p = dist.cdf(np.ceil(r_plus)) |
|
elif alternative == 'greater': |
|
p = dist.sf(np.floor(r_plus)) |
|
else: |
|
p = 2 * np.minimum(dist.sf(np.floor(r_plus)), |
|
dist.cdf(np.ceil(r_plus))) |
|
p = np.clip(p, 0, 1) |
|
else: |
|
p = stats.permutation_test( |
|
(d,), lambda d: _wilcoxon_statistic(d, method, zero_method)[0], |
|
permutation_type='samples', **method._asdict(), |
|
alternative=alternative, axis=-1).pvalue |
|
|
|
|
|
statistic = np.minimum(r_plus, r_minus) if alternative=='two-sided' else r_plus |
|
z = -np.abs(z) if (alternative == 'two-sided' and method == 'asymptotic') else z |
|
|
|
res = _morestats.WilcoxonResult(statistic=statistic, pvalue=p[()]) |
|
if output_z: |
|
res.zstatistic = z[()] |
|
return res |
|
|