|
import numpy as np |
|
from numpy.testing import (assert_equal, assert_array_equal, |
|
assert_array_almost_equal, assert_approx_equal, |
|
assert_allclose) |
|
import pytest |
|
from pytest import raises as assert_raises |
|
from scipy import stats |
|
from scipy.special import xlogy |
|
from scipy.stats.contingency import (margins, expected_freq, |
|
chi2_contingency, association) |
|
|
|
|
|
def test_margins(): |
|
a = np.array([1]) |
|
m = margins(a) |
|
assert_equal(len(m), 1) |
|
m0 = m[0] |
|
assert_array_equal(m0, np.array([1])) |
|
|
|
a = np.array([[1]]) |
|
m0, m1 = margins(a) |
|
expected0 = np.array([[1]]) |
|
expected1 = np.array([[1]]) |
|
assert_array_equal(m0, expected0) |
|
assert_array_equal(m1, expected1) |
|
|
|
a = np.arange(12).reshape(2, 6) |
|
m0, m1 = margins(a) |
|
expected0 = np.array([[15], [51]]) |
|
expected1 = np.array([[6, 8, 10, 12, 14, 16]]) |
|
assert_array_equal(m0, expected0) |
|
assert_array_equal(m1, expected1) |
|
|
|
a = np.arange(24).reshape(2, 3, 4) |
|
m0, m1, m2 = margins(a) |
|
expected0 = np.array([[[66]], [[210]]]) |
|
expected1 = np.array([[[60], [92], [124]]]) |
|
expected2 = np.array([[[60, 66, 72, 78]]]) |
|
assert_array_equal(m0, expected0) |
|
assert_array_equal(m1, expected1) |
|
assert_array_equal(m2, expected2) |
|
|
|
|
|
def test_expected_freq(): |
|
assert_array_equal(expected_freq([1]), np.array([1.0])) |
|
|
|
observed = np.array([[[2, 0], [0, 2]], [[0, 2], [2, 0]], [[1, 1], [1, 1]]]) |
|
e = expected_freq(observed) |
|
assert_array_equal(e, np.ones_like(observed)) |
|
|
|
observed = np.array([[10, 10, 20], [20, 20, 20]]) |
|
e = expected_freq(observed) |
|
correct = np.array([[12., 12., 16.], [18., 18., 24.]]) |
|
assert_array_almost_equal(e, correct) |
|
|
|
|
|
class TestChi2Contingency: |
|
def test_chi2_contingency_trivial(self): |
|
|
|
|
|
|
|
obs = np.array([[1, 2], [1, 2]]) |
|
chi2, p, dof, expected = chi2_contingency(obs, correction=False) |
|
assert_equal(chi2, 0.0) |
|
assert_equal(p, 1.0) |
|
assert_equal(dof, 1) |
|
assert_array_equal(obs, expected) |
|
|
|
|
|
obs = np.array([1, 2, 3]) |
|
chi2, p, dof, expected = chi2_contingency(obs, correction=False) |
|
assert_equal(chi2, 0.0) |
|
assert_equal(p, 1.0) |
|
assert_equal(dof, 0) |
|
assert_array_equal(obs, expected) |
|
|
|
def test_chi2_contingency_R(self): |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
obs = np.array( |
|
[[[12, 34, 23], |
|
[35, 31, 11], |
|
[12, 32, 9], |
|
[12, 12, 14]], |
|
[[4, 47, 11], |
|
[34, 10, 18], |
|
[18, 13, 19], |
|
[9, 33, 25]]]) |
|
chi2, p, dof, expected = chi2_contingency(obs) |
|
assert_approx_equal(chi2, 102.17, significant=5) |
|
assert_approx_equal(p, 3.514e-14, significant=4) |
|
assert_equal(dof, 17) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
obs = np.array( |
|
[[[[12, 17], |
|
[11, 16]], |
|
[[11, 12], |
|
[15, 16]]], |
|
[[[23, 15], |
|
[30, 22]], |
|
[[14, 17], |
|
[15, 16]]]]) |
|
chi2, p, dof, expected = chi2_contingency(obs) |
|
assert_approx_equal(chi2, 8.758, significant=4) |
|
assert_approx_equal(p, 0.6442, significant=4) |
|
assert_equal(dof, 11) |
|
|
|
def test_chi2_contingency_g(self): |
|
c = np.array([[15, 60], [15, 90]]) |
|
g, p, dof, e = chi2_contingency(c, lambda_='log-likelihood', |
|
correction=False) |
|
assert_allclose(g, 2*xlogy(c, c/e).sum()) |
|
|
|
g, p, dof, e = chi2_contingency(c, lambda_='log-likelihood', |
|
correction=True) |
|
c_corr = c + np.array([[-0.5, 0.5], [0.5, -0.5]]) |
|
assert_allclose(g, 2*xlogy(c_corr, c_corr/e).sum()) |
|
|
|
c = np.array([[10, 12, 10], [12, 10, 10]]) |
|
g, p, dof, e = chi2_contingency(c, lambda_='log-likelihood') |
|
assert_allclose(g, 2*xlogy(c, c/e).sum()) |
|
|
|
def test_chi2_contingency_bad_args(self): |
|
|
|
|
|
|
|
obs = np.array([[-1, 10], [1, 2]]) |
|
assert_raises(ValueError, chi2_contingency, obs) |
|
|
|
|
|
|
|
obs = np.array([[0, 1], [0, 1]]) |
|
assert_raises(ValueError, chi2_contingency, obs) |
|
|
|
|
|
obs = np.empty((0, 8)) |
|
assert_raises(ValueError, chi2_contingency, obs) |
|
|
|
def test_chi2_contingency_yates_gh13875(self): |
|
|
|
|
|
observed = np.array([[1573, 3], [4, 0]]) |
|
p = chi2_contingency(observed)[1] |
|
assert_allclose(p, 1, rtol=1e-12) |
|
|
|
@pytest.mark.parametrize("correction", [False, True]) |
|
def test_result(self, correction): |
|
obs = np.array([[1, 2], [1, 2]]) |
|
res = chi2_contingency(obs, correction=correction) |
|
assert_equal((res.statistic, res.pvalue, res.dof, res.expected_freq), res) |
|
|
|
@pytest.mark.slow |
|
def test_exact_permutation(self): |
|
table = np.arange(4).reshape(2, 2) |
|
ref_statistic = chi2_contingency(table, correction=False).statistic |
|
ref_pvalue = stats.fisher_exact(table).pvalue |
|
method = stats.PermutationMethod(n_resamples=50000) |
|
res = chi2_contingency(table, correction=False, method=method) |
|
assert_equal(res.statistic, ref_statistic) |
|
assert_allclose(res.pvalue, ref_pvalue, rtol=1e-15) |
|
|
|
@pytest.mark.slow |
|
@pytest.mark.parametrize('method', (stats.PermutationMethod, |
|
stats.MonteCarloMethod)) |
|
def test_resampling_randomized(self, method): |
|
rng = np.random.default_rng(2592340925) |
|
|
|
rows = [300, 1000, 800] |
|
cols = [200, 400, 800, 700] |
|
table = stats.random_table(rows, cols, seed=rng).rvs() |
|
res = chi2_contingency(table, correction=False, method=method(rng=rng)) |
|
ref = chi2_contingency(table, correction=False) |
|
assert_equal(res.statistic, ref.statistic) |
|
assert_allclose(res.pvalue, ref.pvalue, atol=5e-3) |
|
assert_equal(res.dof, np.nan) |
|
assert_equal(res.expected_freq, ref.expected_freq) |
|
|
|
def test_resampling_invalid_args(self): |
|
table = np.arange(8).reshape(2, 2, 2) |
|
|
|
method = stats.PermutationMethod() |
|
message = "Use of `method` is only compatible with two-way tables." |
|
with pytest.raises(ValueError, match=message): |
|
chi2_contingency(table, correction=False, method=method) |
|
|
|
table = np.arange(4).reshape(2, 2) |
|
|
|
method = stats.PermutationMethod() |
|
message = "`correction=True` is not compatible with..." |
|
with pytest.raises(ValueError, match=message): |
|
chi2_contingency(table, method=method) |
|
|
|
method = stats.MonteCarloMethod() |
|
message = "`lambda_=2` is not compatible with..." |
|
with pytest.raises(ValueError, match=message): |
|
chi2_contingency(table, correction=False, lambda_=2, method=method) |
|
|
|
method = 'herring' |
|
message = "`method='herring'` not recognized; if provided, `method`..." |
|
with pytest.raises(ValueError, match=message): |
|
chi2_contingency(table, correction=False, method=method) |
|
|
|
method = stats.MonteCarloMethod(rvs=stats.norm.rvs) |
|
message = "If the `method` argument of `chi2_contingency` is..." |
|
with pytest.raises(ValueError, match=message): |
|
chi2_contingency(table, correction=False, method=method) |
|
|
|
|
|
def test_bad_association_args(): |
|
|
|
assert_raises(ValueError, association, [[1, 2], [3, 4]], "X") |
|
|
|
assert_raises(ValueError, association, [[[1, 2]], [[3, 4]]], "cramer") |
|
|
|
assert_raises(ValueError, association, [[-1, 10], [1, 2]], 'cramer') |
|
|
|
assert_raises(ValueError, association, |
|
np.array([[1, 2], ["dd", 4]], dtype=object), 'cramer') |
|
|
|
|
|
@pytest.mark.parametrize('stat, expected', |
|
[('cramer', 0.09222412010290792), |
|
('tschuprow', 0.0775509319944633), |
|
('pearson', 0.12932925727138758)]) |
|
def test_assoc(stat, expected): |
|
|
|
obs1 = np.array([[12, 13, 14, 15, 16], |
|
[17, 16, 18, 19, 11], |
|
[9, 15, 14, 12, 11]]) |
|
a = association(observed=obs1, method=stat) |
|
assert_allclose(a, expected) |
|
|