|
"""Graph utilities and algorithms.""" |
|
|
|
|
|
|
|
|
|
import numpy as np |
|
from scipy import sparse |
|
|
|
from ..metrics.pairwise import pairwise_distances |
|
from ._param_validation import Integral, Interval, validate_params |
|
|
|
|
|
|
|
|
|
|
|
@validate_params( |
|
{ |
|
"graph": ["array-like", "sparse matrix"], |
|
"source": [Interval(Integral, 0, None, closed="left")], |
|
"cutoff": [Interval(Integral, 0, None, closed="left"), None], |
|
}, |
|
prefer_skip_nested_validation=True, |
|
) |
|
def single_source_shortest_path_length(graph, source, *, cutoff=None): |
|
"""Return the length of the shortest path from source to all reachable nodes. |
|
|
|
Parameters |
|
---------- |
|
graph : {array-like, sparse matrix} of shape (n_nodes, n_nodes) |
|
Adjacency matrix of the graph. Sparse matrix of format LIL is |
|
preferred. |
|
|
|
source : int |
|
Start node for path. |
|
|
|
cutoff : int, default=None |
|
Depth to stop the search - only paths of length <= cutoff are returned. |
|
|
|
Returns |
|
------- |
|
paths : dict |
|
Reachable end nodes mapped to length of path from source, |
|
i.e. `{end: path_length}`. |
|
|
|
Examples |
|
-------- |
|
>>> from sklearn.utils.graph import single_source_shortest_path_length |
|
>>> import numpy as np |
|
>>> graph = np.array([[ 0, 1, 0, 0], |
|
... [ 1, 0, 1, 0], |
|
... [ 0, 1, 0, 0], |
|
... [ 0, 0, 0, 0]]) |
|
>>> single_source_shortest_path_length(graph, 0) |
|
{0: 0, 1: 1, 2: 2} |
|
>>> graph = np.ones((6, 6)) |
|
>>> sorted(single_source_shortest_path_length(graph, 2).items()) |
|
[(0, 1), (1, 1), (2, 0), (3, 1), (4, 1), (5, 1)] |
|
""" |
|
if sparse.issparse(graph): |
|
graph = graph.tolil() |
|
else: |
|
graph = sparse.lil_matrix(graph) |
|
seen = {} |
|
level = 0 |
|
next_level = [source] |
|
while next_level: |
|
this_level = next_level |
|
next_level = set() |
|
for v in this_level: |
|
if v not in seen: |
|
seen[v] = level |
|
next_level.update(graph.rows[v]) |
|
if cutoff is not None and cutoff <= level: |
|
break |
|
level += 1 |
|
return seen |
|
|
|
|
|
def _fix_connected_components( |
|
X, |
|
graph, |
|
n_connected_components, |
|
component_labels, |
|
mode="distance", |
|
metric="euclidean", |
|
**kwargs, |
|
): |
|
"""Add connections to sparse graph to connect unconnected components. |
|
|
|
For each pair of unconnected components, compute all pairwise distances |
|
from one component to the other, and add a connection on the closest pair |
|
of samples. This is a hacky way to get a graph with a single connected |
|
component, which is necessary for example to compute a shortest path |
|
between all pairs of samples in the graph. |
|
|
|
Parameters |
|
---------- |
|
X : array of shape (n_samples, n_features) or (n_samples, n_samples) |
|
Features to compute the pairwise distances. If `metric = |
|
"precomputed"`, X is the matrix of pairwise distances. |
|
|
|
graph : sparse matrix of shape (n_samples, n_samples) |
|
Graph of connection between samples. |
|
|
|
n_connected_components : int |
|
Number of connected components, as computed by |
|
`scipy.sparse.csgraph.connected_components`. |
|
|
|
component_labels : array of shape (n_samples) |
|
Labels of connected components, as computed by |
|
`scipy.sparse.csgraph.connected_components`. |
|
|
|
mode : {'connectivity', 'distance'}, default='distance' |
|
Type of graph matrix: 'connectivity' corresponds to the connectivity |
|
matrix with ones and zeros, and 'distance' corresponds to the distances |
|
between neighbors according to the given metric. |
|
|
|
metric : str |
|
Metric used in `sklearn.metrics.pairwise.pairwise_distances`. |
|
|
|
kwargs : kwargs |
|
Keyword arguments passed to |
|
`sklearn.metrics.pairwise.pairwise_distances`. |
|
|
|
Returns |
|
------- |
|
graph : sparse matrix of shape (n_samples, n_samples) |
|
Graph of connection between samples, with a single connected component. |
|
""" |
|
if metric == "precomputed" and sparse.issparse(X): |
|
raise RuntimeError( |
|
"_fix_connected_components with metric='precomputed' requires the " |
|
"full distance matrix in X, and does not work with a sparse " |
|
"neighbors graph." |
|
) |
|
|
|
for i in range(n_connected_components): |
|
idx_i = np.flatnonzero(component_labels == i) |
|
Xi = X[idx_i] |
|
for j in range(i): |
|
idx_j = np.flatnonzero(component_labels == j) |
|
Xj = X[idx_j] |
|
|
|
if metric == "precomputed": |
|
D = X[np.ix_(idx_i, idx_j)] |
|
else: |
|
D = pairwise_distances(Xi, Xj, metric=metric, **kwargs) |
|
|
|
ii, jj = np.unravel_index(D.argmin(axis=None), D.shape) |
|
if mode == "connectivity": |
|
graph[idx_i[ii], idx_j[jj]] = 1 |
|
graph[idx_j[jj], idx_i[ii]] = 1 |
|
elif mode == "distance": |
|
graph[idx_i[ii], idx_j[jj]] = D[ii, jj] |
|
graph[idx_j[jj], idx_i[ii]] = D[ii, jj] |
|
else: |
|
raise ValueError( |
|
"Unknown mode=%r, should be one of ['connectivity', 'distance']." |
|
% mode |
|
) |
|
|
|
return graph |
|
|