darabos commited on
Commit
3cc3a0a
·
1 Parent(s): e8a4151

Model creation and training for basic layers.

Browse files
lynxkite-graph-analytics/src/lynxkite_graph_analytics/pytorch_model_ops.py CHANGED
@@ -1,9 +1,11 @@
1
  """Boxes for defining PyTorch models."""
2
 
 
3
  from lynxkite.core import ops, workspace
4
  from lynxkite.core.ops import Parameter as P
5
  import torch
6
  import torch_geometric as pyg
 
7
 
8
  ENV = "PyTorch model"
9
 
@@ -70,7 +72,7 @@ reg(
70
  reg(
71
  "Activation",
72
  inputs=["x"],
73
- params=[P.options("type", ["ReLU", "LeakyReLU", "Tanh", "Mish"])],
74
  )
75
  reg("Concatenate", inputs=["a", "b"], outputs=["x"])
76
  reg("Add", inputs=["a", "b"], outputs=["x"])
@@ -105,7 +107,10 @@ ops.register_passive_op(
105
  "Repeat",
106
  inputs=[ops.Input(name="input", position="top", type="tensor")],
107
  outputs=[ops.Output(name="output", position="bottom", type="tensor")],
108
- params=[ops.Parameter.basic("times", 1, int)],
 
 
 
109
  )
110
 
111
  ops.register_passive_op(
@@ -117,24 +122,133 @@ ops.register_passive_op(
117
  )
118
 
119
 
120
- def build_model(ws: workspace.Workspace, inputs: dict):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
  """Builds the model described in the workspace."""
122
  optimizers = []
 
123
  for node in ws.nodes:
124
- if node.op.name == "Optimizer":
125
- optimizers.append(node)
 
126
  assert optimizers, "No optimizer found."
127
  assert len(optimizers) == 1, f"More than one optimizer found: {optimizers}"
128
  [optimizer] = optimizers
129
- inputs = {n.id: [] for n in ws.nodes}
 
 
130
  for e in ws.edges:
131
- inputs[e.target].append(e.source)
132
- layers = []
133
- # TODO: Create layers based on the workspace.
 
134
  sizes = {}
135
- for k, v in inputs.items():
136
- sizes[k] = v.size
137
- layers.append((pyg.nn.Linear(sizes["x"], 1024), "x -> x"))
138
- layers.append((torch.nn.LayerNorm(1024), "x -> x"))
139
- m = pyg.nn.Sequential("x, edge_index", layers)
140
- return m
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  """Boxes for defining PyTorch models."""
2
 
3
+ import graphlib
4
  from lynxkite.core import ops, workspace
5
  from lynxkite.core.ops import Parameter as P
6
  import torch
7
  import torch_geometric as pyg
8
+ from dataclasses import dataclass
9
 
10
  ENV = "PyTorch model"
11
 
 
72
  reg(
73
  "Activation",
74
  inputs=["x"],
75
+ params=[P.options("type", ["ReLU", "Leaky ReLU", "Tanh", "Mish"])],
76
  )
77
  reg("Concatenate", inputs=["a", "b"], outputs=["x"])
78
  reg("Add", inputs=["a", "b"], outputs=["x"])
 
107
  "Repeat",
108
  inputs=[ops.Input(name="input", position="top", type="tensor")],
109
  outputs=[ops.Output(name="output", position="bottom", type="tensor")],
110
+ params=[
111
+ ops.Parameter.basic("times", 1, int),
112
+ ops.Parameter.basic("same_weights", True, bool),
113
+ ],
114
  )
115
 
116
  ops.register_passive_op(
 
122
  )
123
 
124
 
125
+ def _to_id(s: str) -> str:
126
+ """Replaces all non-alphanumeric characters with underscores."""
127
+ return "".join(c if c.isalnum() else "_" for c in s)
128
+
129
+
130
+ @dataclass
131
+ class ModelConfig:
132
+ model: torch.nn.Module
133
+ model_inputs: list[str]
134
+ model_outputs: list[str]
135
+ loss_inputs: list[str]
136
+ loss: torch.nn.Module
137
+ optimizer: torch.optim.Optimizer
138
+
139
+ def _forward(self, inputs: dict[str, torch.Tensor]) -> dict[str, torch.Tensor]:
140
+ model_inputs = [inputs[i] for i in self.model_inputs]
141
+ output = self.model(*model_inputs)
142
+ if not isinstance(output, tuple):
143
+ output = (output,)
144
+ values = {k: v for k, v in zip(self.model_outputs, output)}
145
+ return values
146
+
147
+ def inference(self, inputs: dict[str, torch.Tensor]) -> dict[str, torch.Tensor]:
148
+ # TODO: Do multiple batches.
149
+ self.model.eval()
150
+ return self._forward(inputs)
151
+
152
+ def train(self, inputs: dict[str, torch.Tensor]) -> float:
153
+ """Train the model for one epoch. Returns the loss."""
154
+ # TODO: Do multiple batches.
155
+ self.model.train()
156
+ self.optimizer.zero_grad()
157
+ values = self._forward(inputs)
158
+ values.update(inputs)
159
+ loss_inputs = [values[i] for i in self.loss_inputs]
160
+ loss = self.loss(*loss_inputs)
161
+ loss.backward()
162
+ self.optimizer.step()
163
+ return loss.item()
164
+
165
+
166
+ def build_model(
167
+ ws: workspace.Workspace, inputs: dict[str, torch.Tensor]
168
+ ) -> ModelConfig:
169
  """Builds the model described in the workspace."""
170
  optimizers = []
171
+ nodes = {}
172
  for node in ws.nodes:
173
+ nodes[node.id] = node
174
+ if node.data.title == "Optimizer":
175
+ optimizers.append(node.id)
176
  assert optimizers, "No optimizer found."
177
  assert len(optimizers) == 1, f"More than one optimizer found: {optimizers}"
178
  [optimizer] = optimizers
179
+ dependencies = {n.id: [] for n in ws.nodes}
180
+ edges = {}
181
+ # TODO: Dissolve repeat boxes here.
182
  for e in ws.edges:
183
+ dependencies[e.target].append(e.source)
184
+ edges.setdefault((e.target, e.targetHandle), []).append(
185
+ (e.source, e.sourceHandle)
186
+ )
187
  sizes = {}
188
+ for k, i in inputs.items():
189
+ sizes[k] = i.shape[-1]
190
+ ts = graphlib.TopologicalSorter(dependencies)
191
+ layers = []
192
+ loss_layers = []
193
+ in_loss = set()
194
+ cfg = {}
195
+ loss_inputs = set()
196
+ used_inputs = set()
197
+ for node_id in ts.static_order():
198
+ node = nodes[node_id]
199
+ t = node.data.title
200
+ p = node.data.params
201
+ for b in dependencies[node_id]:
202
+ if b in in_loss:
203
+ in_loss.add(node_id)
204
+ ls = loss_layers if node_id in in_loss else layers
205
+ nid = _to_id(node_id)
206
+ match t:
207
+ case "Linear":
208
+ [(ib, ih)] = edges[node_id, "x"]
209
+ i = _to_id(ib) + "_" + ih
210
+ used_inputs.add(i)
211
+ isize = sizes[i]
212
+ osize = isize if p["output_dim"] == "same" else int(p["output_dim"])
213
+ ls.append((torch.nn.Linear(isize, osize), f"{i} -> {nid}_x"))
214
+ sizes[f"{nid}_x"] = osize
215
+ case "Activation":
216
+ [(ib, ih)] = edges[node_id, "x"]
217
+ i = _to_id(ib) + "_" + ih
218
+ used_inputs.add(i)
219
+ f = getattr(torch.nn.functional, p["type"].lower().replace(" ", "_"))
220
+ ls.append((f, f"{i} -> {nid}_x"))
221
+ sizes[f"{nid}_x"] = sizes[i]
222
+ case "MSE loss":
223
+ [(xb, xh)] = edges[node_id, "x"]
224
+ xi = _to_id(xb) + "_" + xh
225
+ [(yb, yh)] = edges[node_id, "y"]
226
+ yi = _to_id(yb) + "_" + yh
227
+ loss_inputs.add(xi)
228
+ loss_inputs.add(yi)
229
+ in_loss.add(node_id)
230
+ loss_layers.append(
231
+ (torch.nn.functional.mse_loss, f"{xi}, {yi} -> {nid}_loss")
232
+ )
233
+ cfg["model_inputs"] = used_inputs & inputs.keys()
234
+ cfg["model_outputs"] = loss_inputs - inputs.keys()
235
+ cfg["loss_inputs"] = loss_inputs
236
+ # Make sure the trained output is output from the last model layer.
237
+ outputs = ", ".join(cfg["model_outputs"])
238
+ layers.append((torch.nn.Identity(), f"{outputs} -> {outputs}"))
239
+ # Create model.
240
+ cfg["model"] = pyg.nn.Sequential(", ".join(used_inputs & inputs.keys()), layers)
241
+ # Make sure the loss is output from the last loss layer.
242
+ [(lossb, lossh)] = edges[optimizer, "loss"]
243
+ lossi = _to_id(lossb) + "_" + lossh
244
+ loss_layers.append((torch.nn.Identity(), f"{lossi} -> loss"))
245
+ # Create loss function.
246
+ cfg["loss"] = pyg.nn.Sequential(", ".join(loss_inputs), loss_layers)
247
+ assert not list(cfg["loss"].parameters()), (
248
+ f"loss should have no parameters: {list(cfg['loss'].parameters())}"
249
+ )
250
+ # Create optimizer.
251
+ p = nodes[optimizer].data.params
252
+ o = getattr(torch.optim, p["type"])
253
+ cfg["optimizer"] = o(cfg["model"].parameters(), lr=p["lr"])
254
+ return ModelConfig(**cfg)
lynxkite-graph-analytics/tests/test_pytorch_model_ops.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from lynxkite.core import workspace
2
+ from lynxkite_graph_analytics import pytorch_model_ops
3
+ import torch
4
+ import pytest
5
+
6
+
7
+ def make_ws(env, nodes: dict[str, dict], edges: list[tuple[str, str, str, str]]):
8
+ ws = workspace.Workspace(env=env)
9
+ for id, data in nodes.items():
10
+ ws.nodes.append(
11
+ workspace.WorkspaceNode(
12
+ id=id,
13
+ type="basic",
14
+ data=workspace.WorkspaceNodeData(title=data["title"], params=data),
15
+ position=workspace.Position(
16
+ x=data.get("x", 0),
17
+ y=data.get("y", 0),
18
+ ),
19
+ )
20
+ )
21
+ ws.edges = [
22
+ workspace.WorkspaceEdge(
23
+ id=f"{source}->{target}",
24
+ source=source.split(":")[0],
25
+ target=target.split(":")[0],
26
+ sourceHandle=source.split(":")[1],
27
+ targetHandle=target.split(":")[1],
28
+ )
29
+ for source, target in edges
30
+ ]
31
+ return ws
32
+
33
+
34
+ async def test_build_model():
35
+ ws = make_ws(
36
+ pytorch_model_ops.ENV,
37
+ {
38
+ "emb": {"title": "Input: embedding"},
39
+ "lin": {"title": "Linear", "output_dim": "same"},
40
+ "act": {"title": "Activation", "type": "Leaky ReLU"},
41
+ "label": {"title": "Input: label"},
42
+ "loss": {"title": "MSE loss"},
43
+ "optim": {"title": "Optimizer", "type": "SGD", "lr": 0.1},
44
+ },
45
+ [
46
+ ("emb:x", "lin:x"),
47
+ ("lin:x", "act:x"),
48
+ ("act:x", "loss:x"),
49
+ ("label:y", "loss:y"),
50
+ ("loss:loss", "optim:loss"),
51
+ ],
52
+ )
53
+ x = torch.rand(100, 4)
54
+ y = x + 1
55
+ m = pytorch_model_ops.build_model(ws, {"emb_x": x, "label_y": y})
56
+ for i in range(1000):
57
+ loss = m.train({"emb_x": x, "label_y": y})
58
+ assert loss < 0.1
59
+ o = m.inference({"emb_x": x[:1]})
60
+ error = torch.nn.functional.mse_loss(o["act_x"], x[:1] + 1)
61
+ assert error < 0.1
62
+
63
+
64
+ if __name__ == "__main__":
65
+ pytest.main()