Spaces:
Running
Running
Add n_neighbors, min_dist, and metric options for UMAP.
Browse files
lynxkite-graph-analytics/src/lynxkite_graph_analytics/ml_ops.py
CHANGED
@@ -1,5 +1,7 @@
|
|
1 |
"""Operations for machine learning."""
|
2 |
|
|
|
|
|
3 |
import numpy as np
|
4 |
from . import core
|
5 |
from lynxkite.core import workspace
|
@@ -153,6 +155,24 @@ VIRIDIS = [
|
|
153 |
]
|
154 |
|
155 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
156 |
@op("View vectors", view="visualization")
|
157 |
def view_vectors(
|
158 |
bundle: core.Bundle,
|
@@ -160,15 +180,24 @@ def view_vectors(
|
|
160 |
table_name: str = "nodes",
|
161 |
vector_column: str = "",
|
162 |
label_column: str = "",
|
|
|
|
|
|
|
163 |
):
|
164 |
vec = np.stack(bundle.dfs[table_name][vector_column].to_numpy())
|
165 |
-
|
166 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
167 |
data = [[*p.tolist(), "", c.item()] for p, c in zip(proj, color)]
|
168 |
if label_column:
|
169 |
for i, row in enumerate(bundle.dfs[table_name][label_column]):
|
170 |
data[i][2] = row
|
171 |
-
size =
|
172 |
v = {
|
173 |
"title": {
|
174 |
"text": f"UMAP projection of {vector_column}",
|
|
|
1 |
"""Operations for machine learning."""
|
2 |
|
3 |
+
import enum
|
4 |
+
import functools
|
5 |
import numpy as np
|
6 |
from . import core
|
7 |
from lynxkite.core import workspace
|
|
|
155 |
]
|
156 |
|
157 |
|
158 |
+
class UMAPMetric(enum.Enum):
|
159 |
+
l1 = "l1"
|
160 |
+
cityblock = "cityblock"
|
161 |
+
taxicab = "taxicab"
|
162 |
+
manhattan = "manhattan"
|
163 |
+
euclidean = "euclidean"
|
164 |
+
l2 = "l2"
|
165 |
+
sqeuclidean = "sqeuclidean"
|
166 |
+
canberra = "canberra"
|
167 |
+
minkowski = "minkowski"
|
168 |
+
chebyshev = "chebyshev"
|
169 |
+
linf = "linf"
|
170 |
+
cosine = "cosine"
|
171 |
+
correlation = "correlation"
|
172 |
+
hellinger = "hellinger"
|
173 |
+
hamming = "hamming"
|
174 |
+
|
175 |
+
|
176 |
@op("View vectors", view="visualization")
|
177 |
def view_vectors(
|
178 |
bundle: core.Bundle,
|
|
|
180 |
table_name: str = "nodes",
|
181 |
vector_column: str = "",
|
182 |
label_column: str = "",
|
183 |
+
n_neighbors: int = 15,
|
184 |
+
min_dist: float = 0.1,
|
185 |
+
metric: UMAPMetric = UMAPMetric.euclidean,
|
186 |
):
|
187 |
vec = np.stack(bundle.dfs[table_name][vector_column].to_numpy())
|
188 |
+
umap = functools.partial(
|
189 |
+
cuml.manifold.umap.UMAP,
|
190 |
+
n_neighbors=n_neighbors,
|
191 |
+
min_dist=min_dist,
|
192 |
+
metric=metric.value,
|
193 |
+
)
|
194 |
+
proj = umap(n_components=2).fit_transform(vec)
|
195 |
+
color = umap(n_components=1).fit_transform(vec)
|
196 |
data = [[*p.tolist(), "", c.item()] for p, c in zip(proj, color)]
|
197 |
if label_column:
|
198 |
for i, row in enumerate(bundle.dfs[table_name][label_column]):
|
199 |
data[i][2] = row
|
200 |
+
size = 100 / len(data) ** 0.4
|
201 |
v = {
|
202 |
"title": {
|
203 |
"text": f"UMAP projection of {vector_column}",
|