darabos commited on
Commit
16d9f6b
·
1 Parent(s): f369afc

Add n_neighbors, min_dist, and metric options for UMAP.

Browse files
lynxkite-graph-analytics/src/lynxkite_graph_analytics/ml_ops.py CHANGED
@@ -1,5 +1,7 @@
1
  """Operations for machine learning."""
2
 
 
 
3
  import numpy as np
4
  from . import core
5
  from lynxkite.core import workspace
@@ -153,6 +155,24 @@ VIRIDIS = [
153
  ]
154
 
155
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
156
  @op("View vectors", view="visualization")
157
  def view_vectors(
158
  bundle: core.Bundle,
@@ -160,15 +180,24 @@ def view_vectors(
160
  table_name: str = "nodes",
161
  vector_column: str = "",
162
  label_column: str = "",
 
 
 
163
  ):
164
  vec = np.stack(bundle.dfs[table_name][vector_column].to_numpy())
165
- proj = cuml.manifold.umap.UMAP(n_components=2).fit_transform(vec)
166
- color = cuml.manifold.umap.UMAP(n_components=1).fit_transform(vec)
 
 
 
 
 
 
167
  data = [[*p.tolist(), "", c.item()] for p, c in zip(proj, color)]
168
  if label_column:
169
  for i, row in enumerate(bundle.dfs[table_name][label_column]):
170
  data[i][2] = row
171
- size = 50 / len(data) ** 0.5
172
  v = {
173
  "title": {
174
  "text": f"UMAP projection of {vector_column}",
 
1
  """Operations for machine learning."""
2
 
3
+ import enum
4
+ import functools
5
  import numpy as np
6
  from . import core
7
  from lynxkite.core import workspace
 
155
  ]
156
 
157
 
158
+ class UMAPMetric(enum.Enum):
159
+ l1 = "l1"
160
+ cityblock = "cityblock"
161
+ taxicab = "taxicab"
162
+ manhattan = "manhattan"
163
+ euclidean = "euclidean"
164
+ l2 = "l2"
165
+ sqeuclidean = "sqeuclidean"
166
+ canberra = "canberra"
167
+ minkowski = "minkowski"
168
+ chebyshev = "chebyshev"
169
+ linf = "linf"
170
+ cosine = "cosine"
171
+ correlation = "correlation"
172
+ hellinger = "hellinger"
173
+ hamming = "hamming"
174
+
175
+
176
  @op("View vectors", view="visualization")
177
  def view_vectors(
178
  bundle: core.Bundle,
 
180
  table_name: str = "nodes",
181
  vector_column: str = "",
182
  label_column: str = "",
183
+ n_neighbors: int = 15,
184
+ min_dist: float = 0.1,
185
+ metric: UMAPMetric = UMAPMetric.euclidean,
186
  ):
187
  vec = np.stack(bundle.dfs[table_name][vector_column].to_numpy())
188
+ umap = functools.partial(
189
+ cuml.manifold.umap.UMAP,
190
+ n_neighbors=n_neighbors,
191
+ min_dist=min_dist,
192
+ metric=metric.value,
193
+ )
194
+ proj = umap(n_components=2).fit_transform(vec)
195
+ color = umap(n_components=1).fit_transform(vec)
196
  data = [[*p.tolist(), "", c.item()] for p, c in zip(proj, color)]
197
  if label_column:
198
  for i, row in enumerate(bundle.dfs[table_name][label_column]):
199
  data[i][2] = row
200
+ size = 100 / len(data) ** 0.4
201
  v = {
202
  "title": {
203
  "text": f"UMAP projection of {vector_column}",