|
10 | 10 | # Jake Vanderplas <[email protected]> |
11 | 11 | # License: BSD 3 clause |
12 | 12 |
|
| 13 | +import numpy as np |
13 | 14 | from scipy import sparse |
14 | 15 |
|
15 | | -from .graph_shortest_path import graph_shortest_path # noqa |
| 16 | +from .deprecation import deprecated |
| 17 | +from ..metrics.pairwise import pairwise_distances |
16 | 18 |
|
17 | 19 |
|
18 | 20 | ############################################################################### |
@@ -67,3 +69,130 @@ def single_source_shortest_path_length(graph, source, *, cutoff=None): |
67 | 69 | break |
68 | 70 | level += 1 |
69 | 71 | return seen # return all path lengths as dictionary |
| 72 | + |
| 73 | + |
| 74 | +@deprecated( |
| 75 | + "`graph_shortest_path` is deprecated in 1.0 (renaming of 0.25) and will " |
| 76 | + "be removed in 1.2. Use `scipy.sparse.csgraph.shortest_path` instead." |
| 77 | +) |
| 78 | +def graph_shortest_path(dist_matrix, directed=True, method="auto"): |
| 79 | + """Shortest-path graph search on a positive directed or undirected graph. |
| 80 | +
|
| 81 | + Parameters |
| 82 | + ---------- |
| 83 | + dist_matrix : arraylike or sparse matrix, shape = (N,N) |
| 84 | + Array of positive distances. |
| 85 | + If vertex i is connected to vertex j, then dist_matrix[i,j] gives |
| 86 | + the distance between the vertices. |
| 87 | + If vertex i is not connected to vertex j, then dist_matrix[i,j] = 0 |
| 88 | +
|
| 89 | + directed : boolean |
| 90 | + if True, then find the shortest path on a directed graph: only |
| 91 | + progress from a point to its neighbors, not the other way around. |
| 92 | + if False, then find the shortest path on an undirected graph: the |
| 93 | + algorithm can progress from a point to its neighbors and vice versa. |
| 94 | +
|
| 95 | + method : string ['auto'|'FW'|'D'] |
| 96 | + method to use. Options are |
| 97 | + 'auto' : attempt to choose the best method for the current problem |
| 98 | + 'FW' : Floyd-Warshall algorithm. O[N^3] |
| 99 | + 'D' : Dijkstra's algorithm with Fibonacci stacks. O[(k+log(N))N^2] |
| 100 | +
|
| 101 | + Returns |
| 102 | + ------- |
| 103 | + G : np.ndarray, float, shape = [N,N] |
| 104 | + G[i,j] gives the shortest distance from point i to point j |
| 105 | + along the graph. |
| 106 | +
|
| 107 | + Notes |
| 108 | + ----- |
| 109 | + As currently implemented, Dijkstra's algorithm does not work for |
| 110 | + graphs with direction-dependent distances when directed == False. |
| 111 | + i.e., if dist_matrix[i,j] and dist_matrix[j,i] are not equal and |
| 112 | + both are nonzero, method='D' will not necessarily yield the correct |
| 113 | + result. |
| 114 | + Also, these routines have not been tested for graphs with negative |
| 115 | + distances. Negative distances can lead to infinite cycles that must |
| 116 | + be handled by specialized algorithms. |
| 117 | + """ |
| 118 | + return sparse.csgraph.shortest_path(dist_matrix, method=method, directed=directed) |
| 119 | + |
| 120 | + |
| 121 | +def _fix_connected_components( |
| 122 | + X, |
| 123 | + graph, |
| 124 | + n_connected_components, |
| 125 | + component_labels, |
| 126 | + mode="distance", |
| 127 | + metric="euclidean", |
| 128 | + **kwargs, |
| 129 | +): |
| 130 | + """Add connections to sparse graph to connect unconnected components. |
| 131 | +
|
| 132 | + For each pair of unconnected components, compute all pairwise distances |
| 133 | + from one component to the other, and add a connection on the closest pair |
| 134 | + of samples. This is a hacky way to get a graph with a single connected |
| 135 | + component, which is necessary for example to compute a shortest path |
| 136 | + between all pairs of samples in the graph. |
| 137 | +
|
| 138 | + Parameters |
| 139 | + ---------- |
| 140 | + X : array of shape (n_samples, n_features) or (n_samples, n_samples) |
| 141 | + Features to compute the pairwise distances. If `metric = |
| 142 | + "precomputed"`, X is the matrix of pairwise distances. |
| 143 | +
|
| 144 | + graph : sparse matrix of shape (n_samples, n_samples) |
| 145 | + Graph of connection between samples. |
| 146 | +
|
| 147 | + n_connected_components : int |
| 148 | + Number of connected components, as computed by |
| 149 | + `scipy.sparse.csgraph.connected_components`. |
| 150 | +
|
| 151 | + component_labels : array of shape (n_samples) |
| 152 | + Labels of connected components, as computed by |
| 153 | + `scipy.sparse.csgraph.connected_components`. |
| 154 | +
|
| 155 | + mode : {'connectivity', 'distance'}, default='distance' |
| 156 | + Type of graph matrix: 'connectivity' corresponds to the connectivity |
| 157 | + matrix with ones and zeros, and 'distance' corresponds to the distances |
| 158 | + between neighbors according to the given metric. |
| 159 | +
|
| 160 | + metric : str |
| 161 | + Metric used in `sklearn.metrics.pairwise.pairwise_distances`. |
| 162 | +
|
| 163 | + kwargs : kwargs |
| 164 | + Keyword arguments passed to |
| 165 | + `sklearn.metrics.pairwise.pairwise_distances`. |
| 166 | +
|
| 167 | + Returns |
| 168 | + ------- |
| 169 | + graph : sparse matrix of shape (n_samples, n_samples) |
| 170 | + Graph of connection between samples, with a single connected component. |
| 171 | + """ |
| 172 | + |
| 173 | + for i in range(n_connected_components): |
| 174 | + idx_i = np.flatnonzero(component_labels == i) |
| 175 | + Xi = X[idx_i] |
| 176 | + for j in range(i): |
| 177 | + idx_j = np.flatnonzero(component_labels == j) |
| 178 | + Xj = X[idx_j] |
| 179 | + |
| 180 | + if metric == "precomputed": |
| 181 | + D = X[np.ix_(idx_i, idx_j)] |
| 182 | + else: |
| 183 | + D = pairwise_distances(Xi, Xj, metric=metric, **kwargs) |
| 184 | + |
| 185 | + ii, jj = np.unravel_index(D.argmin(axis=None), D.shape) |
| 186 | + if mode == "connectivity": |
| 187 | + graph[idx_i[ii], idx_j[jj]] = 1 |
| 188 | + graph[idx_j[jj], idx_i[ii]] = 1 |
| 189 | + elif mode == "distance": |
| 190 | + graph[idx_i[ii], idx_j[jj]] = D[ii, jj] |
| 191 | + graph[idx_j[jj], idx_i[ii]] = D[ii, jj] |
| 192 | + else: |
| 193 | + raise ValueError( |
| 194 | + "Unknown mode=%r, should be one of ['connectivity', 'distance']." |
| 195 | + % mode |
| 196 | + ) |
| 197 | + |
| 198 | + return graph |
0 commit comments