|
import numpy as np |
|
from numpy.testing import assert_allclose |
|
from pytest import approx |
|
|
|
from sklearn.utils.stats import _weighted_percentile |
|
|
|
|
|
def test_weighted_percentile(): |
|
y = np.empty(102, dtype=np.float64) |
|
y[:50] = 0 |
|
y[-51:] = 2 |
|
y[-1] = 100000 |
|
y[50] = 1 |
|
sw = np.ones(102, dtype=np.float64) |
|
sw[-1] = 0.0 |
|
score = _weighted_percentile(y, sw, 50) |
|
assert approx(score) == 1 |
|
|
|
|
|
def test_weighted_percentile_equal(): |
|
y = np.empty(102, dtype=np.float64) |
|
y.fill(0.0) |
|
sw = np.ones(102, dtype=np.float64) |
|
sw[-1] = 0.0 |
|
score = _weighted_percentile(y, sw, 50) |
|
assert score == 0 |
|
|
|
|
|
def test_weighted_percentile_zero_weight(): |
|
y = np.empty(102, dtype=np.float64) |
|
y.fill(1.0) |
|
sw = np.ones(102, dtype=np.float64) |
|
sw.fill(0.0) |
|
score = _weighted_percentile(y, sw, 50) |
|
assert approx(score) == 1.0 |
|
|
|
|
|
def test_weighted_percentile_zero_weight_zero_percentile(): |
|
y = np.array([0, 1, 2, 3, 4, 5]) |
|
sw = np.array([0, 0, 1, 1, 1, 0]) |
|
score = _weighted_percentile(y, sw, 0) |
|
assert approx(score) == 2 |
|
|
|
score = _weighted_percentile(y, sw, 50) |
|
assert approx(score) == 3 |
|
|
|
score = _weighted_percentile(y, sw, 100) |
|
assert approx(score) == 4 |
|
|
|
|
|
def test_weighted_median_equal_weights(): |
|
|
|
rng = np.random.RandomState(0) |
|
|
|
x = rng.randint(10, size=11) |
|
weights = np.ones(x.shape) |
|
|
|
median = np.median(x) |
|
w_median = _weighted_percentile(x, weights) |
|
assert median == approx(w_median) |
|
|
|
|
|
def test_weighted_median_integer_weights(): |
|
|
|
|
|
rng = np.random.RandomState(0) |
|
x = rng.randint(20, size=10) |
|
weights = rng.choice(5, size=10) |
|
x_manual = np.repeat(x, weights) |
|
|
|
median = np.median(x_manual) |
|
w_median = _weighted_percentile(x, weights) |
|
|
|
assert median == approx(w_median) |
|
|
|
|
|
def test_weighted_percentile_2d(): |
|
|
|
rng = np.random.RandomState(0) |
|
x1 = rng.randint(10, size=10) |
|
w1 = rng.choice(5, size=10) |
|
|
|
x2 = rng.randint(20, size=10) |
|
x_2d = np.vstack((x1, x2)).T |
|
|
|
w_median = _weighted_percentile(x_2d, w1) |
|
p_axis_0 = [_weighted_percentile(x_2d[:, i], w1) for i in range(x_2d.shape[1])] |
|
assert_allclose(w_median, p_axis_0) |
|
|
|
|
|
w2 = rng.choice(5, size=10) |
|
w_2d = np.vstack((w1, w2)).T |
|
|
|
w_median = _weighted_percentile(x_2d, w_2d) |
|
p_axis_0 = [ |
|
_weighted_percentile(x_2d[:, i], w_2d[:, i]) for i in range(x_2d.shape[1]) |
|
] |
|
assert_allclose(w_median, p_axis_0) |
|
|