File size: 3,460 Bytes
7885a28 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 |
# Authors: The scikit-learn developers
# SPDX-License-Identifier: BSD-3-Clause
from cython cimport floating
from cython.parallel cimport prange
from libc.math cimport fabs
from ..utils._typedefs cimport intp_t
from ..utils._openmp_helpers import _openmp_effective_n_threads
def _chi2_kernel_fast(floating[:, :] X,
floating[:, :] Y,
floating[:, :] result):
cdef intp_t i, j, k
cdef intp_t n_samples_X = X.shape[0]
cdef intp_t n_samples_Y = Y.shape[0]
cdef intp_t n_features = X.shape[1]
cdef double res, nom, denom
with nogil:
for i in range(n_samples_X):
for j in range(n_samples_Y):
res = 0
for k in range(n_features):
denom = (X[i, k] - Y[j, k])
nom = (X[i, k] + Y[j, k])
if nom != 0:
res += denom * denom / nom
result[i, j] = -res
def _sparse_manhattan(
const floating[::1] X_data,
const int[:] X_indices,
const int[:] X_indptr,
const floating[::1] Y_data,
const int[:] Y_indices,
const int[:] Y_indptr,
double[:, ::1] D,
):
"""Pairwise L1 distances for CSR matrices.
Usage:
>>> D = np.zeros(X.shape[0], Y.shape[0])
>>> _sparse_manhattan(X.data, X.indices, X.indptr,
... Y.data, Y.indices, Y.indptr,
... D)
"""
cdef intp_t px, py, i, j, ix, iy
cdef double d = 0.0
cdef int m = D.shape[0]
cdef int n = D.shape[1]
cdef int X_indptr_end = 0
cdef int Y_indptr_end = 0
cdef int num_threads = _openmp_effective_n_threads()
# We scan the matrices row by row.
# Given row px in X and row py in Y, we find the positions (i and j
# respectively), in .indices where the indices for the two rows start.
# If the indices (ix and iy) are the same, the corresponding data values
# are processed and the cursors i and j are advanced.
# If not, the lowest index is considered. Its associated data value is
# processed and its cursor is advanced.
# We proceed like this until one of the cursors hits the end for its row.
# Then we process all remaining data values in the other row.
# Below the avoidance of inplace operators is intentional.
# When prange is used, the inplace operator has a special meaning, i.e. it
# signals a "reduction"
for px in prange(m, nogil=True, num_threads=num_threads):
X_indptr_end = X_indptr[px + 1]
for py in range(n):
Y_indptr_end = Y_indptr[py + 1]
i = X_indptr[px]
j = Y_indptr[py]
d = 0.0
while i < X_indptr_end and j < Y_indptr_end:
ix = X_indices[i]
iy = Y_indices[j]
if ix == iy:
d = d + fabs(X_data[i] - Y_data[j])
i = i + 1
j = j + 1
elif ix < iy:
d = d + fabs(X_data[i])
i = i + 1
else:
d = d + fabs(Y_data[j])
j = j + 1
if i == X_indptr_end:
while j < Y_indptr_end:
d = d + fabs(Y_data[j])
j = j + 1
else:
while i < X_indptr_end:
d = d + fabs(X_data[i])
i = i + 1
D[px, py] = d
|