Spaces:
Running
Running
Model creation and training for basic layers.
Browse files
lynxkite-graph-analytics/src/lynxkite_graph_analytics/pytorch_model_ops.py
CHANGED
@@ -1,9 +1,11 @@
|
|
1 |
"""Boxes for defining PyTorch models."""
|
2 |
|
|
|
3 |
from lynxkite.core import ops, workspace
|
4 |
from lynxkite.core.ops import Parameter as P
|
5 |
import torch
|
6 |
import torch_geometric as pyg
|
|
|
7 |
|
8 |
ENV = "PyTorch model"
|
9 |
|
@@ -70,7 +72,7 @@ reg(
|
|
70 |
reg(
|
71 |
"Activation",
|
72 |
inputs=["x"],
|
73 |
-
params=[P.options("type", ["ReLU", "
|
74 |
)
|
75 |
reg("Concatenate", inputs=["a", "b"], outputs=["x"])
|
76 |
reg("Add", inputs=["a", "b"], outputs=["x"])
|
@@ -105,7 +107,10 @@ ops.register_passive_op(
|
|
105 |
"Repeat",
|
106 |
inputs=[ops.Input(name="input", position="top", type="tensor")],
|
107 |
outputs=[ops.Output(name="output", position="bottom", type="tensor")],
|
108 |
-
params=[
|
|
|
|
|
|
|
109 |
)
|
110 |
|
111 |
ops.register_passive_op(
|
@@ -117,24 +122,133 @@ ops.register_passive_op(
|
|
117 |
)
|
118 |
|
119 |
|
120 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
121 |
"""Builds the model described in the workspace."""
|
122 |
optimizers = []
|
|
|
123 |
for node in ws.nodes:
|
124 |
-
|
125 |
-
|
|
|
126 |
assert optimizers, "No optimizer found."
|
127 |
assert len(optimizers) == 1, f"More than one optimizer found: {optimizers}"
|
128 |
[optimizer] = optimizers
|
129 |
-
|
|
|
|
|
130 |
for e in ws.edges:
|
131 |
-
|
132 |
-
|
133 |
-
|
|
|
134 |
sizes = {}
|
135 |
-
for k,
|
136 |
-
sizes[k] =
|
137 |
-
|
138 |
-
layers
|
139 |
-
|
140 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
"""Boxes for defining PyTorch models."""
|
2 |
|
3 |
+
import graphlib
|
4 |
from lynxkite.core import ops, workspace
|
5 |
from lynxkite.core.ops import Parameter as P
|
6 |
import torch
|
7 |
import torch_geometric as pyg
|
8 |
+
from dataclasses import dataclass
|
9 |
|
10 |
ENV = "PyTorch model"
|
11 |
|
|
|
72 |
reg(
|
73 |
"Activation",
|
74 |
inputs=["x"],
|
75 |
+
params=[P.options("type", ["ReLU", "Leaky ReLU", "Tanh", "Mish"])],
|
76 |
)
|
77 |
reg("Concatenate", inputs=["a", "b"], outputs=["x"])
|
78 |
reg("Add", inputs=["a", "b"], outputs=["x"])
|
|
|
107 |
"Repeat",
|
108 |
inputs=[ops.Input(name="input", position="top", type="tensor")],
|
109 |
outputs=[ops.Output(name="output", position="bottom", type="tensor")],
|
110 |
+
params=[
|
111 |
+
ops.Parameter.basic("times", 1, int),
|
112 |
+
ops.Parameter.basic("same_weights", True, bool),
|
113 |
+
],
|
114 |
)
|
115 |
|
116 |
ops.register_passive_op(
|
|
|
122 |
)
|
123 |
|
124 |
|
125 |
+
def _to_id(s: str) -> str:
|
126 |
+
"""Replaces all non-alphanumeric characters with underscores."""
|
127 |
+
return "".join(c if c.isalnum() else "_" for c in s)
|
128 |
+
|
129 |
+
|
130 |
+
@dataclass
|
131 |
+
class ModelConfig:
|
132 |
+
model: torch.nn.Module
|
133 |
+
model_inputs: list[str]
|
134 |
+
model_outputs: list[str]
|
135 |
+
loss_inputs: list[str]
|
136 |
+
loss: torch.nn.Module
|
137 |
+
optimizer: torch.optim.Optimizer
|
138 |
+
|
139 |
+
def _forward(self, inputs: dict[str, torch.Tensor]) -> dict[str, torch.Tensor]:
|
140 |
+
model_inputs = [inputs[i] for i in self.model_inputs]
|
141 |
+
output = self.model(*model_inputs)
|
142 |
+
if not isinstance(output, tuple):
|
143 |
+
output = (output,)
|
144 |
+
values = {k: v for k, v in zip(self.model_outputs, output)}
|
145 |
+
return values
|
146 |
+
|
147 |
+
def inference(self, inputs: dict[str, torch.Tensor]) -> dict[str, torch.Tensor]:
|
148 |
+
# TODO: Do multiple batches.
|
149 |
+
self.model.eval()
|
150 |
+
return self._forward(inputs)
|
151 |
+
|
152 |
+
def train(self, inputs: dict[str, torch.Tensor]) -> float:
|
153 |
+
"""Train the model for one epoch. Returns the loss."""
|
154 |
+
# TODO: Do multiple batches.
|
155 |
+
self.model.train()
|
156 |
+
self.optimizer.zero_grad()
|
157 |
+
values = self._forward(inputs)
|
158 |
+
values.update(inputs)
|
159 |
+
loss_inputs = [values[i] for i in self.loss_inputs]
|
160 |
+
loss = self.loss(*loss_inputs)
|
161 |
+
loss.backward()
|
162 |
+
self.optimizer.step()
|
163 |
+
return loss.item()
|
164 |
+
|
165 |
+
|
166 |
+
def build_model(
|
167 |
+
ws: workspace.Workspace, inputs: dict[str, torch.Tensor]
|
168 |
+
) -> ModelConfig:
|
169 |
"""Builds the model described in the workspace."""
|
170 |
optimizers = []
|
171 |
+
nodes = {}
|
172 |
for node in ws.nodes:
|
173 |
+
nodes[node.id] = node
|
174 |
+
if node.data.title == "Optimizer":
|
175 |
+
optimizers.append(node.id)
|
176 |
assert optimizers, "No optimizer found."
|
177 |
assert len(optimizers) == 1, f"More than one optimizer found: {optimizers}"
|
178 |
[optimizer] = optimizers
|
179 |
+
dependencies = {n.id: [] for n in ws.nodes}
|
180 |
+
edges = {}
|
181 |
+
# TODO: Dissolve repeat boxes here.
|
182 |
for e in ws.edges:
|
183 |
+
dependencies[e.target].append(e.source)
|
184 |
+
edges.setdefault((e.target, e.targetHandle), []).append(
|
185 |
+
(e.source, e.sourceHandle)
|
186 |
+
)
|
187 |
sizes = {}
|
188 |
+
for k, i in inputs.items():
|
189 |
+
sizes[k] = i.shape[-1]
|
190 |
+
ts = graphlib.TopologicalSorter(dependencies)
|
191 |
+
layers = []
|
192 |
+
loss_layers = []
|
193 |
+
in_loss = set()
|
194 |
+
cfg = {}
|
195 |
+
loss_inputs = set()
|
196 |
+
used_inputs = set()
|
197 |
+
for node_id in ts.static_order():
|
198 |
+
node = nodes[node_id]
|
199 |
+
t = node.data.title
|
200 |
+
p = node.data.params
|
201 |
+
for b in dependencies[node_id]:
|
202 |
+
if b in in_loss:
|
203 |
+
in_loss.add(node_id)
|
204 |
+
ls = loss_layers if node_id in in_loss else layers
|
205 |
+
nid = _to_id(node_id)
|
206 |
+
match t:
|
207 |
+
case "Linear":
|
208 |
+
[(ib, ih)] = edges[node_id, "x"]
|
209 |
+
i = _to_id(ib) + "_" + ih
|
210 |
+
used_inputs.add(i)
|
211 |
+
isize = sizes[i]
|
212 |
+
osize = isize if p["output_dim"] == "same" else int(p["output_dim"])
|
213 |
+
ls.append((torch.nn.Linear(isize, osize), f"{i} -> {nid}_x"))
|
214 |
+
sizes[f"{nid}_x"] = osize
|
215 |
+
case "Activation":
|
216 |
+
[(ib, ih)] = edges[node_id, "x"]
|
217 |
+
i = _to_id(ib) + "_" + ih
|
218 |
+
used_inputs.add(i)
|
219 |
+
f = getattr(torch.nn.functional, p["type"].lower().replace(" ", "_"))
|
220 |
+
ls.append((f, f"{i} -> {nid}_x"))
|
221 |
+
sizes[f"{nid}_x"] = sizes[i]
|
222 |
+
case "MSE loss":
|
223 |
+
[(xb, xh)] = edges[node_id, "x"]
|
224 |
+
xi = _to_id(xb) + "_" + xh
|
225 |
+
[(yb, yh)] = edges[node_id, "y"]
|
226 |
+
yi = _to_id(yb) + "_" + yh
|
227 |
+
loss_inputs.add(xi)
|
228 |
+
loss_inputs.add(yi)
|
229 |
+
in_loss.add(node_id)
|
230 |
+
loss_layers.append(
|
231 |
+
(torch.nn.functional.mse_loss, f"{xi}, {yi} -> {nid}_loss")
|
232 |
+
)
|
233 |
+
cfg["model_inputs"] = used_inputs & inputs.keys()
|
234 |
+
cfg["model_outputs"] = loss_inputs - inputs.keys()
|
235 |
+
cfg["loss_inputs"] = loss_inputs
|
236 |
+
# Make sure the trained output is output from the last model layer.
|
237 |
+
outputs = ", ".join(cfg["model_outputs"])
|
238 |
+
layers.append((torch.nn.Identity(), f"{outputs} -> {outputs}"))
|
239 |
+
# Create model.
|
240 |
+
cfg["model"] = pyg.nn.Sequential(", ".join(used_inputs & inputs.keys()), layers)
|
241 |
+
# Make sure the loss is output from the last loss layer.
|
242 |
+
[(lossb, lossh)] = edges[optimizer, "loss"]
|
243 |
+
lossi = _to_id(lossb) + "_" + lossh
|
244 |
+
loss_layers.append((torch.nn.Identity(), f"{lossi} -> loss"))
|
245 |
+
# Create loss function.
|
246 |
+
cfg["loss"] = pyg.nn.Sequential(", ".join(loss_inputs), loss_layers)
|
247 |
+
assert not list(cfg["loss"].parameters()), (
|
248 |
+
f"loss should have no parameters: {list(cfg['loss'].parameters())}"
|
249 |
+
)
|
250 |
+
# Create optimizer.
|
251 |
+
p = nodes[optimizer].data.params
|
252 |
+
o = getattr(torch.optim, p["type"])
|
253 |
+
cfg["optimizer"] = o(cfg["model"].parameters(), lr=p["lr"])
|
254 |
+
return ModelConfig(**cfg)
|
lynxkite-graph-analytics/tests/test_pytorch_model_ops.py
ADDED
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from lynxkite.core import workspace
|
2 |
+
from lynxkite_graph_analytics import pytorch_model_ops
|
3 |
+
import torch
|
4 |
+
import pytest
|
5 |
+
|
6 |
+
|
7 |
+
def make_ws(env, nodes: dict[str, dict], edges: list[tuple[str, str, str, str]]):
|
8 |
+
ws = workspace.Workspace(env=env)
|
9 |
+
for id, data in nodes.items():
|
10 |
+
ws.nodes.append(
|
11 |
+
workspace.WorkspaceNode(
|
12 |
+
id=id,
|
13 |
+
type="basic",
|
14 |
+
data=workspace.WorkspaceNodeData(title=data["title"], params=data),
|
15 |
+
position=workspace.Position(
|
16 |
+
x=data.get("x", 0),
|
17 |
+
y=data.get("y", 0),
|
18 |
+
),
|
19 |
+
)
|
20 |
+
)
|
21 |
+
ws.edges = [
|
22 |
+
workspace.WorkspaceEdge(
|
23 |
+
id=f"{source}->{target}",
|
24 |
+
source=source.split(":")[0],
|
25 |
+
target=target.split(":")[0],
|
26 |
+
sourceHandle=source.split(":")[1],
|
27 |
+
targetHandle=target.split(":")[1],
|
28 |
+
)
|
29 |
+
for source, target in edges
|
30 |
+
]
|
31 |
+
return ws
|
32 |
+
|
33 |
+
|
34 |
+
async def test_build_model():
|
35 |
+
ws = make_ws(
|
36 |
+
pytorch_model_ops.ENV,
|
37 |
+
{
|
38 |
+
"emb": {"title": "Input: embedding"},
|
39 |
+
"lin": {"title": "Linear", "output_dim": "same"},
|
40 |
+
"act": {"title": "Activation", "type": "Leaky ReLU"},
|
41 |
+
"label": {"title": "Input: label"},
|
42 |
+
"loss": {"title": "MSE loss"},
|
43 |
+
"optim": {"title": "Optimizer", "type": "SGD", "lr": 0.1},
|
44 |
+
},
|
45 |
+
[
|
46 |
+
("emb:x", "lin:x"),
|
47 |
+
("lin:x", "act:x"),
|
48 |
+
("act:x", "loss:x"),
|
49 |
+
("label:y", "loss:y"),
|
50 |
+
("loss:loss", "optim:loss"),
|
51 |
+
],
|
52 |
+
)
|
53 |
+
x = torch.rand(100, 4)
|
54 |
+
y = x + 1
|
55 |
+
m = pytorch_model_ops.build_model(ws, {"emb_x": x, "label_y": y})
|
56 |
+
for i in range(1000):
|
57 |
+
loss = m.train({"emb_x": x, "label_y": y})
|
58 |
+
assert loss < 0.1
|
59 |
+
o = m.inference({"emb_x": x[:1]})
|
60 |
+
error = torch.nn.functional.mse_loss(o["act_x"], x[:1] + 1)
|
61 |
+
assert error < 0.1
|
62 |
+
|
63 |
+
|
64 |
+
if __name__ == "__main__":
|
65 |
+
pytest.main()
|