darabos commited on
Commit
0cdd509
·
1 Parent(s): 17a0053

Test for repeats.

Browse files
examples/Model definition CHANGED
@@ -24,7 +24,7 @@
24
  {
25
  "id": "Input: tensor 1 Linear 2",
26
  "source": "Input: tensor 1",
27
- "sourceHandle": "x",
28
  "target": "Linear 2",
29
  "targetHandle": "x"
30
  },
@@ -45,7 +45,7 @@
45
  {
46
  "id": "Input: tensor 3 MSE loss 2",
47
  "source": "Input: tensor 3",
48
- "sourceHandle": "x",
49
  "target": "MSE loss 2",
50
  "targetHandle": "y"
51
  }
@@ -302,8 +302,8 @@
302
  "inputs": {},
303
  "name": "Input: tensor",
304
  "outputs": {
305
- "x": {
306
- "name": "x",
307
  "position": "top",
308
  "type": {
309
  "type": "tensor"
@@ -352,8 +352,8 @@
352
  "inputs": {},
353
  "name": "Input: tensor",
354
  "outputs": {
355
- "x": {
356
- "name": "x",
357
  "position": "top",
358
  "type": {
359
  "type": "tensor"
 
24
  {
25
  "id": "Input: tensor 1 Linear 2",
26
  "source": "Input: tensor 1",
27
+ "sourceHandle": "output",
28
  "target": "Linear 2",
29
  "targetHandle": "x"
30
  },
 
45
  {
46
  "id": "Input: tensor 3 MSE loss 2",
47
  "source": "Input: tensor 3",
48
+ "sourceHandle": "output",
49
  "target": "MSE loss 2",
50
  "targetHandle": "y"
51
  }
 
302
  "inputs": {},
303
  "name": "Input: tensor",
304
  "outputs": {
305
+ "output": {
306
+ "name": "output",
307
  "position": "top",
308
  "type": {
309
  "type": "tensor"
 
352
  "inputs": {},
353
  "name": "Input: tensor",
354
  "outputs": {
355
+ "output": {
356
+ "name": "output",
357
  "position": "top",
358
  "type": {
359
  "type": "tensor"
lynxkite-app/web/src/workspace/nodes/NodeParameter.tsx CHANGED
@@ -1,5 +1,5 @@
1
- // @ts-ignore
2
  import { useRef } from "react";
 
3
  import ArrowsHorizontal from "~icons/tabler/arrows-horizontal.jsx";
4
 
5
  const BOOLEAN = "<class 'bool'>";
 
 
1
  import { useRef } from "react";
2
+ // @ts-ignore
3
  import ArrowsHorizontal from "~icons/tabler/arrows-horizontal.jsx";
4
 
5
  const BOOLEAN = "<class 'bool'>";
lynxkite-graph-analytics/src/lynxkite_graph_analytics/pytorch_model_ops.py CHANGED
@@ -42,7 +42,7 @@ def reg(name, inputs=[], outputs=None, params=[]):
42
  )
43
 
44
 
45
- reg("Input: tensor", outputs=["x"], params=[P.basic("name")])
46
  reg("Input: graph edges", outputs=["edges"])
47
  reg("Input: sequential", outputs=["y"])
48
 
@@ -274,20 +274,22 @@ class ModelBuilder:
274
  self.catalog = ops.CATALOGS[ENV]
275
  optimizers = []
276
  self.nodes: dict[str, workspace.WorkspaceNode] = {}
 
277
  for node in ws.nodes:
278
  self.nodes[node.id] = node
279
  if node.data.title == "Optimizer":
280
  optimizers.append(node.id)
 
 
 
 
281
  assert optimizers, "No optimizer found."
282
  assert len(optimizers) == 1, f"More than one optimizer found: {optimizers}"
283
  [self.optimizer] = optimizers
284
- self.dependencies = {n.id: [] for n in ws.nodes}
285
- self.in_edges: dict[str, dict[str, list[(str, str)]]] = {}
286
- self.out_edges: dict[str, dict[str, list[(str, str)]]] = {}
287
- repeats = []
288
  for e in ws.edges:
289
- if self.nodes[e.target].data.title == "Repeat":
290
- repeats.append(e.target)
291
  self.dependencies[e.target].append(e.source)
292
  self.in_edges.setdefault(e.target, {}).setdefault(e.targetHandle, []).append(
293
  (e.source, e.sourceHandle)
@@ -298,6 +300,8 @@ class ModelBuilder:
298
  # Split repeat boxes into start and end, and insert them into the flow.
299
  # TODO: Think about recursive repeats.
300
  for repeat in repeats:
 
 
301
  start_id = f"START {repeat}"
302
  end_id = f"END {repeat}"
303
  # repeat -> first <- real_input
@@ -334,7 +338,7 @@ class ModelBuilder:
334
  k if k != (last, lasth) else (end_id, "output")
335
  for k in self.in_edges[real_output][real_outputh]
336
  ]
337
- self.inv_dependencies = {n: [] for n in self.dependencies}
338
  for k, v in self.dependencies.items():
339
  for i in v:
340
  self.inv_dependencies[i].append(k)
@@ -342,6 +346,19 @@ class ModelBuilder:
342
  for k, i in inputs.items():
343
  self.sizes[k] = i.shape[-1]
344
  self.layers = []
 
 
 
 
 
 
 
 
 
 
 
 
 
345
 
346
  def all_upstream(self, node: str) -> set[str]:
347
  """Returns all nodes upstream of a node."""
@@ -361,12 +378,7 @@ class ModelBuilder:
361
 
362
  def run_node(self, node_id: str) -> None:
363
  """Adds the layer(s) produced by this node to self.layers."""
364
- if node_id.startswith("START "):
365
- node = self.nodes[node_id.removeprefix("START ")]
366
- elif node_id.startswith("END "):
367
- node = self.nodes[node_id.removeprefix("END ")]
368
- else:
369
- node = self.nodes[node_id]
370
  t = node.data.title
371
  op = self.catalog[t]
372
  p = op.convert_params(node.data.params)
@@ -385,6 +397,7 @@ class ModelBuilder:
385
  f"edges leave repeated section '{repeat_id}':\n{affected_nodes - repeated_nodes}"
386
  )
387
  repeated_layers = [e for e in self.layers if e._origin_id in repeated_nodes]
 
388
  for i in range(p["times"] - 1):
389
  # Copy repeat section's output to repeat section's input.
390
  self.layers.append(
 
42
  )
43
 
44
 
45
+ reg("Input: tensor", outputs=["output"], params=[P.basic("name")])
46
  reg("Input: graph edges", outputs=["edges"])
47
  reg("Input: sequential", outputs=["y"])
48
 
 
274
  self.catalog = ops.CATALOGS[ENV]
275
  optimizers = []
276
  self.nodes: dict[str, workspace.WorkspaceNode] = {}
277
+ repeats: list[str] = []
278
  for node in ws.nodes:
279
  self.nodes[node.id] = node
280
  if node.data.title == "Optimizer":
281
  optimizers.append(node.id)
282
+ elif node.data.title == "Repeat":
283
+ repeats.append(node.id)
284
+ self.nodes[f"START {node.id}"] = node
285
+ self.nodes[f"END {node.id}"] = node
286
  assert optimizers, "No optimizer found."
287
  assert len(optimizers) == 1, f"More than one optimizer found: {optimizers}"
288
  [self.optimizer] = optimizers
289
+ self.dependencies = {n: [] for n in self.nodes}
290
+ self.in_edges: dict[str, dict[str, list[(str, str)]]] = {n: {} for n in self.nodes}
291
+ self.out_edges: dict[str, dict[str, list[(str, str)]]] = {n: {} for n in self.nodes}
 
292
  for e in ws.edges:
 
 
293
  self.dependencies[e.target].append(e.source)
294
  self.in_edges.setdefault(e.target, {}).setdefault(e.targetHandle, []).append(
295
  (e.source, e.sourceHandle)
 
300
  # Split repeat boxes into start and end, and insert them into the flow.
301
  # TODO: Think about recursive repeats.
302
  for repeat in repeats:
303
+ if not self.out_edges[repeat] or not self.in_edges[repeat]:
304
+ continue
305
  start_id = f"START {repeat}"
306
  end_id = f"END {repeat}"
307
  # repeat -> first <- real_input
 
338
  k if k != (last, lasth) else (end_id, "output")
339
  for k in self.in_edges[real_output][real_outputh]
340
  ]
341
+ self.inv_dependencies = {n: [] for n in self.nodes}
342
  for k, v in self.dependencies.items():
343
  for i in v:
344
  self.inv_dependencies[i].append(k)
 
346
  for k, i in inputs.items():
347
  self.sizes[k] = i.shape[-1]
348
  self.layers = []
349
+ # Clean up disconnected nodes.
350
+ disconnected = set()
351
+ for node_id in self.nodes:
352
+ op = self.catalog[self.nodes[node_id].data.title]
353
+ if len(self.in_edges[node_id]) != len(op.inputs):
354
+ disconnected.add(node_id)
355
+ disconnected |= self.all_upstream(node_id)
356
+ for node_id in disconnected:
357
+ del self.dependencies[node_id]
358
+ del self.in_edges[node_id]
359
+ del self.out_edges[node_id]
360
+ del self.inv_dependencies[node_id]
361
+ del self.nodes[node_id]
362
 
363
  def all_upstream(self, node: str) -> set[str]:
364
  """Returns all nodes upstream of a node."""
 
378
 
379
  def run_node(self, node_id: str) -> None:
380
  """Adds the layer(s) produced by this node to self.layers."""
381
+ node = self.nodes[node_id]
 
 
 
 
 
382
  t = node.data.title
383
  op = self.catalog[t]
384
  p = op.convert_params(node.data.params)
 
397
  f"edges leave repeated section '{repeat_id}':\n{affected_nodes - repeated_nodes}"
398
  )
399
  repeated_layers = [e for e in self.layers if e._origin_id in repeated_nodes]
400
+ assert p["times"] >= 1, f"Cannot repeat {repeat_id} {p['times']} times."
401
  for i in range(p["times"] - 1):
402
  # Copy repeat section's output to repeat section's input.
403
  self.layers.append(
lynxkite-graph-analytics/tests/test_pytorch_model_ops.py CHANGED
@@ -7,11 +7,13 @@ import pytest
7
  def make_ws(env, nodes: dict[str, dict], edges: list[tuple[str, str, str, str]]):
8
  ws = workspace.Workspace(env=env)
9
  for id, data in nodes.items():
 
 
10
  ws.nodes.append(
11
  workspace.WorkspaceNode(
12
  id=id,
13
  type="basic",
14
- data=workspace.WorkspaceNodeData(title=data["title"], params=data),
15
  position=workspace.Position(
16
  x=data.get("x", 0),
17
  y=data.get("y", 0),
@@ -31,35 +33,86 @@ def make_ws(env, nodes: dict[str, dict], edges: list[tuple[str, str, str, str]])
31
  return ws
32
 
33
 
 
 
 
 
 
 
 
 
 
 
 
34
  async def test_build_model():
35
  ws = make_ws(
36
  pytorch_model_ops.ENV,
37
  {
38
- "emb": {"title": "Input: embedding"},
39
  "lin": {"title": "Linear", "output_dim": "same"},
40
- "act": {"title": "Activation", "type": "Leaky ReLU"},
41
- "label": {"title": "Input: label"},
42
  "loss": {"title": "MSE loss"},
43
  "optim": {"title": "Optimizer", "type": "SGD", "lr": 0.1},
44
  },
45
  [
46
- ("emb:x", "lin:x"),
47
- ("lin:x", "act:x"),
48
- ("act:x", "loss:x"),
49
- ("label:y", "loss:y"),
50
- ("loss:loss", "optim:loss"),
51
  ],
52
  )
53
  x = torch.rand(100, 4)
54
  y = x + 1
55
- m = pytorch_model_ops.build_model(ws, {"emb_x": x, "label_y": y})
56
  for i in range(1000):
57
- loss = m.train({"emb_x": x, "label_y": y})
58
  assert loss < 0.1
59
- o = m.inference({"emb_x": x[:1]})
60
- error = torch.nn.functional.mse_loss(o["act_x"], x[:1] + 1)
61
  assert error < 0.1
62
 
63
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  if __name__ == "__main__":
65
  pytest.main()
 
7
  def make_ws(env, nodes: dict[str, dict], edges: list[tuple[str, str, str, str]]):
8
  ws = workspace.Workspace(env=env)
9
  for id, data in nodes.items():
10
+ title = data["title"]
11
+ del data["title"]
12
  ws.nodes.append(
13
  workspace.WorkspaceNode(
14
  id=id,
15
  type="basic",
16
+ data=workspace.WorkspaceNodeData(title=title, params=data),
17
  position=workspace.Position(
18
  x=data.get("x", 0),
19
  y=data.get("y", 0),
 
33
  return ws
34
 
35
 
36
+ def summarize_layers(m: pytorch_model_ops.ModelConfig) -> str:
37
+ return "".join(str(e)[0] for e in m.model)
38
+
39
+
40
+ def summarize_connections(m: pytorch_model_ops.ModelConfig) -> str:
41
+ return " ".join(
42
+ "".join(n[0] for n in c.param_names) + "->" + "".join(n[0] for n in c.return_names)
43
+ for c in m.model._children
44
+ )
45
+
46
+
47
  async def test_build_model():
48
  ws = make_ws(
49
  pytorch_model_ops.ENV,
50
  {
51
+ "emb": {"title": "Input: tensor"},
52
  "lin": {"title": "Linear", "output_dim": "same"},
53
+ "act": {"title": "Activation", "type": "Leaky_ReLU"},
54
+ "label": {"title": "Input: tensor"},
55
  "loss": {"title": "MSE loss"},
56
  "optim": {"title": "Optimizer", "type": "SGD", "lr": 0.1},
57
  },
58
  [
59
+ ("emb:output", "lin:x"),
60
+ ("lin:output", "act:x"),
61
+ ("act:output", "loss:x"),
62
+ ("label:output", "loss:y"),
63
+ ("loss:output", "optim:loss"),
64
  ],
65
  )
66
  x = torch.rand(100, 4)
67
  y = x + 1
68
+ m = pytorch_model_ops.build_model(ws, {"emb_output": x, "label_output": y})
69
  for i in range(1000):
70
+ loss = m.train({"emb_output": x, "label_output": y})
71
  assert loss < 0.1
72
+ o = m.inference({"emb_output": x[:1]})
73
+ error = torch.nn.functional.mse_loss(o["act_output"], x[:1] + 1)
74
  assert error < 0.1
75
 
76
 
77
+ async def test_build_model_with_repeat():
78
+ def repeated_ws(times):
79
+ return make_ws(
80
+ pytorch_model_ops.ENV,
81
+ {
82
+ "emb": {"title": "Input: tensor"},
83
+ "lin": {"title": "Linear", "output_dim": "same"},
84
+ "act": {"title": "Activation", "type": "Leaky_ReLU"},
85
+ "label": {"title": "Input: tensor"},
86
+ "loss": {"title": "MSE loss"},
87
+ "optim": {"title": "Optimizer", "type": "SGD", "lr": 0.1},
88
+ "repeat": {"title": "Repeat", "times": times, "same_weights": False},
89
+ },
90
+ [
91
+ ("emb:output", "lin:x"),
92
+ ("lin:output", "act:x"),
93
+ ("act:output", "loss:x"),
94
+ ("label:output", "loss:y"),
95
+ ("loss:output", "optim:loss"),
96
+ ("repeat:output", "lin:x"),
97
+ ("act:output", "repeat:input"),
98
+ ],
99
+ )
100
+
101
+ # 1 repetition
102
+ m = pytorch_model_ops.build_model(repeated_ws(1), {})
103
+ assert summarize_layers(m) == "IL<II"
104
+ assert summarize_connections(m) == "e->S S->l l->a a->E E->E"
105
+
106
+ # 2 repetitions
107
+ m = pytorch_model_ops.build_model(repeated_ws(2), {})
108
+ assert summarize_layers(m) == "IL<IL<II"
109
+ assert summarize_connections(m) == "e->S S->l l->a a->S S->l l->a a->E E->E"
110
+
111
+ # 3 repetitions
112
+ m = pytorch_model_ops.build_model(repeated_ws(3), {})
113
+ assert summarize_layers(m) == "IL<IL<IL<II"
114
+ assert summarize_connections(m) == "e->S S->l l->a a->S S->l l->a a->S S->l l->a a->E E->E"
115
+
116
+
117
  if __name__ == "__main__":
118
  pytest.main()