Spaces:

lynx-analytics
/

lynxkite

Running

App Files Files Community

darabos commited on Apr 4

Commit

0cdd509

1 Parent(s): 17a0053

Test for repeats.

Browse files

Files changed (4) hide show

examples/Model definition +6 -6
lynxkite-app/web/src/workspace/nodes/NodeParameter.tsx +1 -1
lynxkite-graph-analytics/src/lynxkite_graph_analytics/pytorch_model_ops.py +27 -14
lynxkite-graph-analytics/tests/test_pytorch_model_ops.py +66 -13

examples/Model definition CHANGED Viewed

@@ -24,7 +24,7 @@
     {
       "id": "Input: tensor 1 Linear 2",
       "source": "Input: tensor 1",
-      "sourceHandle": "x",
       "target": "Linear 2",
       "targetHandle": "x"
     },
@@ -45,7 +45,7 @@
     {
       "id": "Input: tensor 3 MSE loss 2",
       "source": "Input: tensor 3",
-      "sourceHandle": "x",
       "target": "MSE loss 2",
       "targetHandle": "y"
     }
@@ -302,8 +302,8 @@
           "inputs": {},
           "name": "Input: tensor",
           "outputs": {
-            "x": {
-              "name": "x",
               "position": "top",
               "type": {
                 "type": "tensor"
@@ -352,8 +352,8 @@
           "inputs": {},
           "name": "Input: tensor",
           "outputs": {
-            "x": {
-              "name": "x",
               "position": "top",
               "type": {
                 "type": "tensor"

     {
       "id": "Input: tensor 1 Linear 2",
       "source": "Input: tensor 1",
+      "sourceHandle": "output",
       "target": "Linear 2",
       "targetHandle": "x"
     },
     {
       "id": "Input: tensor 3 MSE loss 2",
       "source": "Input: tensor 3",
+      "sourceHandle": "output",
       "target": "MSE loss 2",
       "targetHandle": "y"
     }
           "inputs": {},
           "name": "Input: tensor",
           "outputs": {
+            "output": {
+              "name": "output",
               "position": "top",
               "type": {
                 "type": "tensor"
           "inputs": {},
           "name": "Input: tensor",
           "outputs": {
+            "output": {
+              "name": "output",
               "position": "top",
               "type": {
                 "type": "tensor"

lynxkite-app/web/src/workspace/nodes/NodeParameter.tsx CHANGED Viewed

@@ -1,5 +1,5 @@
-// @ts-ignore
 import { useRef } from "react";
 import ArrowsHorizontal from "~icons/tabler/arrows-horizontal.jsx";
 const BOOLEAN = "<class 'bool'>";

 import { useRef } from "react";
+// @ts-ignore
 import ArrowsHorizontal from "~icons/tabler/arrows-horizontal.jsx";
 const BOOLEAN = "<class 'bool'>";

lynxkite-graph-analytics/src/lynxkite_graph_analytics/pytorch_model_ops.py CHANGED Viewed

@@ -42,7 +42,7 @@ def reg(name, inputs=[], outputs=None, params=[]):
     )
-reg("Input: tensor", outputs=["x"], params=[P.basic("name")])
 reg("Input: graph edges", outputs=["edges"])
 reg("Input: sequential", outputs=["y"])
@@ -274,20 +274,22 @@ class ModelBuilder:
         self.catalog = ops.CATALOGS[ENV]
         optimizers = []
         self.nodes: dict[str, workspace.WorkspaceNode] = {}
         for node in ws.nodes:
             self.nodes[node.id] = node
             if node.data.title == "Optimizer":
                 optimizers.append(node.id)
         assert optimizers, "No optimizer found."
         assert len(optimizers) == 1, f"More than one optimizer found: {optimizers}"
         [self.optimizer] = optimizers
-        self.dependencies = {n.id: [] for n in ws.nodes}
-        self.in_edges: dict[str, dict[str, list[(str, str)]]] = {}
-        self.out_edges: dict[str, dict[str, list[(str, str)]]] = {}
-        repeats = []
         for e in ws.edges:
-            if self.nodes[e.target].data.title == "Repeat":
-                repeats.append(e.target)
             self.dependencies[e.target].append(e.source)
             self.in_edges.setdefault(e.target, {}).setdefault(e.targetHandle, []).append(
                 (e.source, e.sourceHandle)
@@ -298,6 +300,8 @@ class ModelBuilder:
         # Split repeat boxes into start and end, and insert them into the flow.
         # TODO: Think about recursive repeats.
         for repeat in repeats:
             start_id = f"START {repeat}"
             end_id = f"END {repeat}"
             # repeat -> first <- real_input
@@ -334,7 +338,7 @@ class ModelBuilder:
                 k if k != (last, lasth) else (end_id, "output")
                 for k in self.in_edges[real_output][real_outputh]
             ]
-        self.inv_dependencies = {n: [] for n in self.dependencies}
         for k, v in self.dependencies.items():
             for i in v:
                 self.inv_dependencies[i].append(k)
@@ -342,6 +346,19 @@ class ModelBuilder:
         for k, i in inputs.items():
             self.sizes[k] = i.shape[-1]
         self.layers = []
     def all_upstream(self, node: str) -> set[str]:
         """Returns all nodes upstream of a node."""
@@ -361,12 +378,7 @@ class ModelBuilder:
     def run_node(self, node_id: str) -> None:
         """Adds the layer(s) produced by this node to self.layers."""
-        if node_id.startswith("START "):
-            node = self.nodes[node_id.removeprefix("START ")]
-        elif node_id.startswith("END "):
-            node = self.nodes[node_id.removeprefix("END ")]
-        else:
-            node = self.nodes[node_id]
         t = node.data.title
         op = self.catalog[t]
         p = op.convert_params(node.data.params)
@@ -385,6 +397,7 @@ class ModelBuilder:
                         f"edges leave repeated section '{repeat_id}':\n{affected_nodes - repeated_nodes}"
                     )
                     repeated_layers = [e for e in self.layers if e._origin_id in repeated_nodes]
                     for i in range(p["times"] - 1):
                         # Copy repeat section's output to repeat section's input.
                         self.layers.append(

     )
+reg("Input: tensor", outputs=["output"], params=[P.basic("name")])
 reg("Input: graph edges", outputs=["edges"])
 reg("Input: sequential", outputs=["y"])
         self.catalog = ops.CATALOGS[ENV]
         optimizers = []
         self.nodes: dict[str, workspace.WorkspaceNode] = {}
+        repeats: list[str] = []
         for node in ws.nodes:
             self.nodes[node.id] = node
             if node.data.title == "Optimizer":
                 optimizers.append(node.id)
+            elif node.data.title == "Repeat":
+                repeats.append(node.id)
+                self.nodes[f"START {node.id}"] = node
+                self.nodes[f"END {node.id}"] = node
         assert optimizers, "No optimizer found."
         assert len(optimizers) == 1, f"More than one optimizer found: {optimizers}"
         [self.optimizer] = optimizers
+        self.dependencies = {n: [] for n in self.nodes}
+        self.in_edges: dict[str, dict[str, list[(str, str)]]] = {n: {} for n in self.nodes}
+        self.out_edges: dict[str, dict[str, list[(str, str)]]] = {n: {} for n in self.nodes}
         for e in ws.edges:
             self.dependencies[e.target].append(e.source)
             self.in_edges.setdefault(e.target, {}).setdefault(e.targetHandle, []).append(
                 (e.source, e.sourceHandle)
         # Split repeat boxes into start and end, and insert them into the flow.
         # TODO: Think about recursive repeats.
         for repeat in repeats:
+            if not self.out_edges[repeat] or not self.in_edges[repeat]:
+                continue
             start_id = f"START {repeat}"
             end_id = f"END {repeat}"
             # repeat -> first <- real_input
                 k if k != (last, lasth) else (end_id, "output")
                 for k in self.in_edges[real_output][real_outputh]
             ]
+        self.inv_dependencies = {n: [] for n in self.nodes}
         for k, v in self.dependencies.items():
             for i in v:
                 self.inv_dependencies[i].append(k)
         for k, i in inputs.items():
             self.sizes[k] = i.shape[-1]
         self.layers = []
+        # Clean up disconnected nodes.
+        disconnected = set()
+        for node_id in self.nodes:
+            op = self.catalog[self.nodes[node_id].data.title]
+            if len(self.in_edges[node_id]) != len(op.inputs):
+                disconnected.add(node_id)
+                disconnected |= self.all_upstream(node_id)
+        for node_id in disconnected:
+            del self.dependencies[node_id]
+            del self.in_edges[node_id]
+            del self.out_edges[node_id]
+            del self.inv_dependencies[node_id]
+            del self.nodes[node_id]
     def all_upstream(self, node: str) -> set[str]:
         """Returns all nodes upstream of a node."""
     def run_node(self, node_id: str) -> None:
         """Adds the layer(s) produced by this node to self.layers."""
+        node = self.nodes[node_id]
         t = node.data.title
         op = self.catalog[t]
         p = op.convert_params(node.data.params)
                         f"edges leave repeated section '{repeat_id}':\n{affected_nodes - repeated_nodes}"
                     )
                     repeated_layers = [e for e in self.layers if e._origin_id in repeated_nodes]
+                    assert p["times"] >= 1, f"Cannot repeat {repeat_id} {p['times']} times."
                     for i in range(p["times"] - 1):
                         # Copy repeat section's output to repeat section's input.
                         self.layers.append(

lynxkite-graph-analytics/tests/test_pytorch_model_ops.py CHANGED Viewed

@@ -7,11 +7,13 @@ import pytest
 def make_ws(env, nodes: dict[str, dict], edges: list[tuple[str, str, str, str]]):
     ws = workspace.Workspace(env=env)
     for id, data in nodes.items():
         ws.nodes.append(
             workspace.WorkspaceNode(
                 id=id,
                 type="basic",
-                data=workspace.WorkspaceNodeData(title=data["title"], params=data),
                 position=workspace.Position(
                     x=data.get("x", 0),
                     y=data.get("y", 0),
@@ -31,35 +33,86 @@ def make_ws(env, nodes: dict[str, dict], edges: list[tuple[str, str, str, str]])
     return ws
 async def test_build_model():
     ws = make_ws(
         pytorch_model_ops.ENV,
         {
-            "emb": {"title": "Input: embedding"},
             "lin": {"title": "Linear", "output_dim": "same"},
-            "act": {"title": "Activation", "type": "Leaky ReLU"},
-            "label": {"title": "Input: label"},
             "loss": {"title": "MSE loss"},
             "optim": {"title": "Optimizer", "type": "SGD", "lr": 0.1},
         },
         [
-            ("emb:x", "lin:x"),
-            ("lin:x", "act:x"),
-            ("act:x", "loss:x"),
-            ("label:y", "loss:y"),
-            ("loss:loss", "optim:loss"),
         ],
     )
     x = torch.rand(100, 4)
     y = x + 1
-    m = pytorch_model_ops.build_model(ws, {"emb_x": x, "label_y": y})
     for i in range(1000):
-        loss = m.train({"emb_x": x, "label_y": y})
     assert loss < 0.1
-    o = m.inference({"emb_x": x[:1]})
-    error = torch.nn.functional.mse_loss(o["act_x"], x[:1] + 1)
     assert error < 0.1
 if __name__ == "__main__":
     pytest.main()

 def make_ws(env, nodes: dict[str, dict], edges: list[tuple[str, str, str, str]]):
     ws = workspace.Workspace(env=env)
     for id, data in nodes.items():
+        title = data["title"]
+        del data["title"]
         ws.nodes.append(
             workspace.WorkspaceNode(
                 id=id,
                 type="basic",
+                data=workspace.WorkspaceNodeData(title=title, params=data),
                 position=workspace.Position(
                     x=data.get("x", 0),
                     y=data.get("y", 0),
     return ws
+def summarize_layers(m: pytorch_model_ops.ModelConfig) -> str:
+    return "".join(str(e)[0] for e in m.model)
+def summarize_connections(m: pytorch_model_ops.ModelConfig) -> str:
+    return " ".join(
+        "".join(n[0] for n in c.param_names) + "->" + "".join(n[0] for n in c.return_names)
+        for c in m.model._children
+    )
 async def test_build_model():
     ws = make_ws(
         pytorch_model_ops.ENV,
         {
+            "emb": {"title": "Input: tensor"},
             "lin": {"title": "Linear", "output_dim": "same"},
+            "act": {"title": "Activation", "type": "Leaky_ReLU"},
+            "label": {"title": "Input: tensor"},
             "loss": {"title": "MSE loss"},
             "optim": {"title": "Optimizer", "type": "SGD", "lr": 0.1},
         },
         [
+            ("emb:output", "lin:x"),
+            ("lin:output", "act:x"),
+            ("act:output", "loss:x"),
+            ("label:output", "loss:y"),
+            ("loss:output", "optim:loss"),
         ],
     )
     x = torch.rand(100, 4)
     y = x + 1
+    m = pytorch_model_ops.build_model(ws, {"emb_output": x, "label_output": y})
     for i in range(1000):
+        loss = m.train({"emb_output": x, "label_output": y})
     assert loss < 0.1
+    o = m.inference({"emb_output": x[:1]})
+    error = torch.nn.functional.mse_loss(o["act_output"], x[:1] + 1)
     assert error < 0.1
+async def test_build_model_with_repeat():
+    def repeated_ws(times):
+        return make_ws(
+            pytorch_model_ops.ENV,
+            {
+                "emb": {"title": "Input: tensor"},
+                "lin": {"title": "Linear", "output_dim": "same"},
+                "act": {"title": "Activation", "type": "Leaky_ReLU"},
+                "label": {"title": "Input: tensor"},
+                "loss": {"title": "MSE loss"},
+                "optim": {"title": "Optimizer", "type": "SGD", "lr": 0.1},
+                "repeat": {"title": "Repeat", "times": times, "same_weights": False},
+            },
+            [
+                ("emb:output", "lin:x"),
+                ("lin:output", "act:x"),
+                ("act:output", "loss:x"),
+                ("label:output", "loss:y"),
+                ("loss:output", "optim:loss"),
+                ("repeat:output", "lin:x"),
+                ("act:output", "repeat:input"),
+            ],
+        )
+    # 1 repetition
+    m = pytorch_model_ops.build_model(repeated_ws(1), {})
+    assert summarize_layers(m) == "IL<II"
+    assert summarize_connections(m) == "e->S S->l l->a a->E E->E"
+    # 2 repetitions
+    m = pytorch_model_ops.build_model(repeated_ws(2), {})
+    assert summarize_layers(m) == "IL<IL<II"
+    assert summarize_connections(m) == "e->S S->l l->a a->S S->l l->a a->E E->E"
+    # 3 repetitions
+    m = pytorch_model_ops.build_model(repeated_ws(3), {})
+    assert summarize_layers(m) == "IL<IL<IL<II"
+    assert summarize_connections(m) == "e->S S->l l->a a->S S->l l->a a->S S->l l->a a->E E->E"
 if __name__ == "__main__":
     pytest.main()