abhik1368 darabos commited on
Commit
9d40ea2
·
unverified ·
1 Parent(s): 11146c1

Update Cheminformatrics use Cases (#155)

Browse files

* Update Cheminformatrics use Cases

Add _cheminfo_tools.py with lipinksi filter , View Mol Image , View mol filter with smarts and smiles and highlights are done .

* Delete binary files. Create "Cheminformatics" folder for examples.

* MACCSkeys was undefined. rows was unused.

* Give ops human-readable names.

* Example workspace without images.

* Collapse parameters on "image" type boxes.

* Allow slow visualization boxes too.

* Simpler gallery drawing.

* Output for visualizations is now left empty.

---------

Co-authored-by: Daniel Darabos <[email protected]>

examples/Cheminformatics/Example workspace.lynxkite.json ADDED
@@ -0,0 +1,986 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "edges": [
3
+ {
4
+ "id": "Import CSV 1 Draw molecules 1",
5
+ "source": "Import CSV 1",
6
+ "sourceHandle": "output",
7
+ "target": "Draw molecules 1",
8
+ "targetHandle": "df"
9
+ },
10
+ {
11
+ "id": "Draw molecules 1 View tables 1",
12
+ "source": "Draw molecules 1",
13
+ "sourceHandle": "output",
14
+ "target": "View tables 1",
15
+ "targetHandle": "bundle"
16
+ },
17
+ {
18
+ "id": "Import file 1 View mol filter 1",
19
+ "source": "Import file 1",
20
+ "sourceHandle": "output",
21
+ "target": "View mol filter 1",
22
+ "targetHandle": "bundle"
23
+ },
24
+ {
25
+ "id": "Draw molecules 2 View tables 2",
26
+ "source": "Draw molecules 2",
27
+ "sourceHandle": "output",
28
+ "target": "View tables 2",
29
+ "targetHandle": "bundle"
30
+ },
31
+ {
32
+ "id": "Import file 1 Train QSAR model 1",
33
+ "source": "Import file 1",
34
+ "sourceHandle": "output",
35
+ "target": "Train QSAR model 1",
36
+ "targetHandle": "bundle"
37
+ },
38
+ {
39
+ "id": "Train QSAR model 1 View tables 3",
40
+ "source": "Train QSAR model 1",
41
+ "sourceHandle": "output",
42
+ "target": "View tables 3",
43
+ "targetHandle": "bundle"
44
+ },
45
+ {
46
+ "id": "Import file 1 Lipinski filter 1",
47
+ "source": "Import file 1",
48
+ "sourceHandle": "output",
49
+ "target": "Lipinski filter 1",
50
+ "targetHandle": "bundle"
51
+ },
52
+ {
53
+ "id": "Lipinski filter 1 Draw molecules 2",
54
+ "source": "Lipinski filter 1",
55
+ "sourceHandle": "output",
56
+ "target": "Draw molecules 2",
57
+ "targetHandle": "df"
58
+ },
59
+ {
60
+ "id": "Import file 1 View mol image 1",
61
+ "source": "Import file 1",
62
+ "sourceHandle": "output",
63
+ "target": "View mol image 1",
64
+ "targetHandle": "bundle"
65
+ }
66
+ ],
67
+ "env": "LynxKite Graph Analytics",
68
+ "nodes": [
69
+ {
70
+ "data": {
71
+ "__execution_delay": 0.0,
72
+ "collapsed": false,
73
+ "display": null,
74
+ "error": null,
75
+ "input_metadata": [],
76
+ "meta": {
77
+ "color": "orange",
78
+ "inputs": [],
79
+ "name": "Import CSV",
80
+ "outputs": [
81
+ {
82
+ "name": "output",
83
+ "position": "right",
84
+ "type": {
85
+ "type": "None"
86
+ }
87
+ }
88
+ ],
89
+ "params": [
90
+ {
91
+ "default": null,
92
+ "name": "filename",
93
+ "type": {
94
+ "type": "<class 'str'>"
95
+ }
96
+ },
97
+ {
98
+ "default": "<from file>",
99
+ "name": "columns",
100
+ "type": {
101
+ "type": "<class 'str'>"
102
+ }
103
+ },
104
+ {
105
+ "default": "<auto>",
106
+ "name": "separator",
107
+ "type": {
108
+ "type": "<class 'str'>"
109
+ }
110
+ }
111
+ ],
112
+ "type": "basic"
113
+ },
114
+ "params": {
115
+ "columns": "<from file>",
116
+ "filename": "uploads/CHEMBL313_sel.csv",
117
+ "separator": "<auto>"
118
+ },
119
+ "status": "done",
120
+ "title": "Import CSV"
121
+ },
122
+ "dragHandle": ".bg-primary",
123
+ "height": 206.0,
124
+ "id": "Import CSV 1",
125
+ "position": {
126
+ "x": -1645.1133255451734,
127
+ "y": -601.1570975227908
128
+ },
129
+ "type": "basic",
130
+ "width": 314.0
131
+ },
132
+ {
133
+ "data": {
134
+ "__execution_delay": null,
135
+ "collapsed": false,
136
+ "error": null,
137
+ "input_metadata": [
138
+ {
139
+ "dataframes": {
140
+ "df": {
141
+ "columns": [
142
+ "Name",
143
+ "SMILES",
144
+ "image",
145
+ "pIC50"
146
+ ]
147
+ }
148
+ },
149
+ "other": {},
150
+ "relations": []
151
+ }
152
+ ],
153
+ "meta": {
154
+ "color": "orange",
155
+ "inputs": [
156
+ {
157
+ "name": "bundle",
158
+ "position": "left",
159
+ "type": {
160
+ "type": "<class 'lynxkite_graph_analytics.core.Bundle'>"
161
+ }
162
+ }
163
+ ],
164
+ "name": "View tables",
165
+ "outputs": [],
166
+ "params": [
167
+ {
168
+ "default": 100,
169
+ "name": "limit",
170
+ "type": {
171
+ "type": "<class 'int'>"
172
+ }
173
+ }
174
+ ],
175
+ "type": "table_view"
176
+ },
177
+ "params": {},
178
+ "status": "done",
179
+ "title": "View tables"
180
+ },
181
+ "dragHandle": ".bg-primary",
182
+ "height": 296.0,
183
+ "id": "View tables 1",
184
+ "position": {
185
+ "x": -870.3210345250492,
186
+ "y": -671.6118451917321
187
+ },
188
+ "type": "table_view",
189
+ "width": 682.0
190
+ },
191
+ {
192
+ "data": {
193
+ "__execution_delay": 0.0,
194
+ "collapsed": null,
195
+ "display": null,
196
+ "error": null,
197
+ "input_metadata": [],
198
+ "meta": {
199
+ "color": "orange",
200
+ "inputs": [],
201
+ "name": "Import file",
202
+ "outputs": [
203
+ {
204
+ "name": "output",
205
+ "position": "right",
206
+ "type": {
207
+ "type": "None"
208
+ }
209
+ }
210
+ ],
211
+ "params": [
212
+ {
213
+ "default": null,
214
+ "name": "file_path",
215
+ "type": {
216
+ "type": "<class 'str'>"
217
+ }
218
+ },
219
+ {
220
+ "default": null,
221
+ "name": "table_name",
222
+ "type": {
223
+ "type": "<class 'str'>"
224
+ }
225
+ },
226
+ {
227
+ "default": null,
228
+ "name": "file_format",
229
+ "type": {
230
+ "enum": [
231
+ "csv",
232
+ "parquet",
233
+ "json",
234
+ "excel"
235
+ ]
236
+ }
237
+ },
238
+ {
239
+ "default": "csv",
240
+ "groups": {
241
+ "csv": [
242
+ {
243
+ "default": "<from file>",
244
+ "name": "columns",
245
+ "type": {
246
+ "type": "<class 'str'>"
247
+ }
248
+ },
249
+ {
250
+ "default": "<auto>",
251
+ "name": "separator",
252
+ "type": {
253
+ "type": "<class 'str'>"
254
+ }
255
+ }
256
+ ],
257
+ "excel": [
258
+ {
259
+ "default": "Sheet1",
260
+ "name": "sheet_name",
261
+ "type": {
262
+ "type": "<class 'str'>"
263
+ }
264
+ }
265
+ ],
266
+ "json": [],
267
+ "parquet": []
268
+ },
269
+ "name": "file_format_group",
270
+ "selector": {
271
+ "default": "csv",
272
+ "name": "file_format",
273
+ "type": {
274
+ "enum": [
275
+ "csv",
276
+ "parquet",
277
+ "json",
278
+ "excel"
279
+ ]
280
+ }
281
+ },
282
+ "type": "group"
283
+ }
284
+ ],
285
+ "type": "basic"
286
+ },
287
+ "params": {
288
+ "columns": "<from file>",
289
+ "file_format": "csv",
290
+ "file_format_group": "csv",
291
+ "file_path": "uploads/CHEMBL313_sel.csv",
292
+ "table_name": "data"
293
+ },
294
+ "status": "done",
295
+ "title": "Import file"
296
+ },
297
+ "dragHandle": ".bg-primary",
298
+ "height": 331.0,
299
+ "id": "Import file 1",
300
+ "position": {
301
+ "x": -1690.536661950572,
302
+ "y": -192.47882875357882
303
+ },
304
+ "type": "basic",
305
+ "width": 326.0
306
+ },
307
+ {
308
+ "data": {
309
+ "__execution_delay": 0.0,
310
+ "collapsed": null,
311
+ "display": null,
312
+ "error": null,
313
+ "input_metadata": [
314
+ {}
315
+ ],
316
+ "meta": {
317
+ "color": "orange",
318
+ "inputs": [
319
+ {
320
+ "name": "df",
321
+ "position": "left",
322
+ "type": {
323
+ "type": "<class 'pandas.core.frame.DataFrame'>"
324
+ }
325
+ }
326
+ ],
327
+ "name": "Draw molecules",
328
+ "outputs": [
329
+ {
330
+ "name": "output",
331
+ "position": "right",
332
+ "type": {
333
+ "type": "None"
334
+ }
335
+ }
336
+ ],
337
+ "params": [
338
+ {
339
+ "default": null,
340
+ "name": "smiles_column",
341
+ "type": {
342
+ "type": "<class 'str'>"
343
+ }
344
+ },
345
+ {
346
+ "default": "image",
347
+ "name": "image_column",
348
+ "type": {
349
+ "type": "<class 'str'>"
350
+ }
351
+ }
352
+ ],
353
+ "type": "basic"
354
+ },
355
+ "params": {
356
+ "image_column": "image",
357
+ "smiles_column": "SMILES"
358
+ },
359
+ "status": "done",
360
+ "title": "Draw molecules"
361
+ },
362
+ "dragHandle": ".bg-primary",
363
+ "height": 225.0,
364
+ "id": "Draw molecules 1",
365
+ "position": {
366
+ "x": -1168.2624512141447,
367
+ "y": -616.8610881973451
368
+ },
369
+ "type": "basic",
370
+ "width": 234.0
371
+ },
372
+ {
373
+ "data": {
374
+ "__execution_delay": 0.0,
375
+ "collapsed": null,
376
+ "error": null,
377
+ "input_metadata": [
378
+ {
379
+ "dataframes": {
380
+ "data": {
381
+ "columns": [
382
+ "Name",
383
+ "SMILES",
384
+ "mol",
385
+ "pIC50"
386
+ ]
387
+ }
388
+ },
389
+ "other": {},
390
+ "relations": []
391
+ }
392
+ ],
393
+ "meta": {
394
+ "color": "orange",
395
+ "inputs": [
396
+ {
397
+ "name": "bundle",
398
+ "position": "left",
399
+ "type": {
400
+ "type": "<class 'inspect._empty'>"
401
+ }
402
+ }
403
+ ],
404
+ "name": "View mol filter",
405
+ "outputs": [],
406
+ "params": [
407
+ {
408
+ "default": null,
409
+ "name": "table_name",
410
+ "type": {
411
+ "type": "<class 'str'>"
412
+ }
413
+ },
414
+ {
415
+ "default": null,
416
+ "name": "SMILES_Column",
417
+ "type": {
418
+ "type": "<class 'str'>"
419
+ }
420
+ },
421
+ {
422
+ "default": null,
423
+ "name": "mols_per_row",
424
+ "type": {
425
+ "type": "<class 'int'>"
426
+ }
427
+ },
428
+ {
429
+ "default": null,
430
+ "name": "filter_smarts",
431
+ "type": {
432
+ "type": "<class 'str'>"
433
+ }
434
+ },
435
+ {
436
+ "default": null,
437
+ "name": "filter_smiles",
438
+ "type": {
439
+ "type": "<class 'str'>"
440
+ }
441
+ },
442
+ {
443
+ "default": true,
444
+ "name": "highlight",
445
+ "type": {
446
+ "type": "<class 'bool'>"
447
+ }
448
+ }
449
+ ],
450
+ "type": "image"
451
+ },
452
+ "params": {
453
+ "SMILES_Column": "SMILES",
454
+ "filter_smarts": "",
455
+ "filter_smiles": "CN1C2CC(CC1CC2)c1ccccc1",
456
+ "highlight": true,
457
+ "mols_per_row": "4",
458
+ "table_name": "data"
459
+ },
460
+ "status": "done",
461
+ "title": "View mol filter"
462
+ },
463
+ "dragHandle": ".bg-primary",
464
+ "height": 229.0,
465
+ "id": "View mol filter 1",
466
+ "position": {
467
+ "x": -796.5179679488858,
468
+ "y": -20.974048336481403
469
+ },
470
+ "type": "image",
471
+ "width": 416.0
472
+ },
473
+ {
474
+ "data": {
475
+ "__execution_delay": 0.0,
476
+ "collapsed": null,
477
+ "display": null,
478
+ "error": null,
479
+ "input_metadata": [
480
+ {}
481
+ ],
482
+ "meta": {
483
+ "color": "orange",
484
+ "inputs": [
485
+ {
486
+ "name": "df",
487
+ "position": "left",
488
+ "type": {
489
+ "type": "<class 'pandas.core.frame.DataFrame'>"
490
+ }
491
+ }
492
+ ],
493
+ "name": "Draw molecules",
494
+ "outputs": [
495
+ {
496
+ "name": "output",
497
+ "position": "right",
498
+ "type": {
499
+ "type": "None"
500
+ }
501
+ }
502
+ ],
503
+ "params": [
504
+ {
505
+ "default": null,
506
+ "name": "smiles_column",
507
+ "type": {
508
+ "type": "<class 'str'>"
509
+ }
510
+ },
511
+ {
512
+ "default": "image",
513
+ "name": "image_column",
514
+ "type": {
515
+ "type": "<class 'str'>"
516
+ }
517
+ }
518
+ ],
519
+ "type": "basic"
520
+ },
521
+ "params": {
522
+ "image_column": "image",
523
+ "smiles_column": "SMILES"
524
+ },
525
+ "status": "done",
526
+ "title": "Draw molecules"
527
+ },
528
+ "dragHandle": ".bg-primary",
529
+ "height": 184.0,
530
+ "id": "Draw molecules 2",
531
+ "position": {
532
+ "x": -280.6043043496203,
533
+ "y": 151.45994649399827
534
+ },
535
+ "type": "basic",
536
+ "width": 234.0
537
+ },
538
+ {
539
+ "data": {
540
+ "error": null,
541
+ "input_metadata": [
542
+ {
543
+ "dataframes": {
544
+ "df": {
545
+ "columns": [
546
+ "HBA",
547
+ "HBD",
548
+ "MW",
549
+ "Name",
550
+ "SMILES",
551
+ "image",
552
+ "logP",
553
+ "pIC50",
554
+ "pass_lipinski"
555
+ ]
556
+ }
557
+ },
558
+ "other": {},
559
+ "relations": []
560
+ }
561
+ ],
562
+ "meta": {
563
+ "color": "orange",
564
+ "inputs": [
565
+ {
566
+ "name": "bundle",
567
+ "position": "left",
568
+ "type": {
569
+ "type": "<class 'lynxkite_graph_analytics.core.Bundle'>"
570
+ }
571
+ }
572
+ ],
573
+ "name": "View tables",
574
+ "outputs": [],
575
+ "params": [
576
+ {
577
+ "default": 100,
578
+ "name": "limit",
579
+ "type": {
580
+ "type": "<class 'int'>"
581
+ }
582
+ }
583
+ ],
584
+ "type": "table_view"
585
+ },
586
+ "params": {
587
+ "limit": 100.0
588
+ },
589
+ "status": "done",
590
+ "title": "View tables"
591
+ },
592
+ "dragHandle": ".bg-primary",
593
+ "height": 429.0,
594
+ "id": "View tables 2",
595
+ "position": {
596
+ "x": 50.96521878343134,
597
+ "y": 102.82896600710055
598
+ },
599
+ "type": "table_view",
600
+ "width": 821.0
601
+ },
602
+ {
603
+ "data": {
604
+ "__execution_delay": 0.0,
605
+ "collapsed": null,
606
+ "display": null,
607
+ "error": null,
608
+ "input_metadata": [
609
+ {
610
+ "dataframes": {
611
+ "data": {
612
+ "columns": [
613
+ "Name",
614
+ "SMILES",
615
+ "pIC50"
616
+ ]
617
+ }
618
+ },
619
+ "other": {},
620
+ "relations": []
621
+ }
622
+ ],
623
+ "meta": {
624
+ "color": "orange",
625
+ "inputs": [
626
+ {
627
+ "name": "bundle",
628
+ "position": "left",
629
+ "type": {
630
+ "type": "<class 'inspect._empty'>"
631
+ }
632
+ }
633
+ ],
634
+ "name": "Train QSAR model",
635
+ "outputs": [
636
+ {
637
+ "name": "output",
638
+ "position": "right",
639
+ "type": {
640
+ "type": "None"
641
+ }
642
+ }
643
+ ],
644
+ "params": [
645
+ {
646
+ "default": null,
647
+ "name": "table_name",
648
+ "type": {
649
+ "type": "<class 'str'>"
650
+ }
651
+ },
652
+ {
653
+ "default": null,
654
+ "name": "smiles_col",
655
+ "type": {
656
+ "type": "<class 'str'>"
657
+ }
658
+ },
659
+ {
660
+ "default": null,
661
+ "name": "target_col",
662
+ "type": {
663
+ "type": "<class 'str'>"
664
+ }
665
+ },
666
+ {
667
+ "default": null,
668
+ "name": "fp_type",
669
+ "type": {
670
+ "type": "<class 'str'>"
671
+ }
672
+ },
673
+ {
674
+ "default": 2,
675
+ "name": "radius",
676
+ "type": {
677
+ "type": "<class 'int'>"
678
+ }
679
+ },
680
+ {
681
+ "default": 2048,
682
+ "name": "n_bits",
683
+ "type": {
684
+ "type": "<class 'int'>"
685
+ }
686
+ },
687
+ {
688
+ "default": 0.2,
689
+ "name": "test_size",
690
+ "type": {
691
+ "type": "<class 'float'>"
692
+ }
693
+ },
694
+ {
695
+ "default": 42,
696
+ "name": "random_state",
697
+ "type": {
698
+ "type": "<class 'int'>"
699
+ }
700
+ },
701
+ {
702
+ "default": "Models",
703
+ "name": "out_dir",
704
+ "type": {
705
+ "type": "<class 'str'>"
706
+ }
707
+ }
708
+ ],
709
+ "type": "basic"
710
+ },
711
+ "params": {
712
+ "fp_type": "ecfp",
713
+ "n_bits": 2048.0,
714
+ "out_dir": "Models",
715
+ "radius": 2.0,
716
+ "random_state": 42.0,
717
+ "smiles_col": "SMILES",
718
+ "table_name": "data",
719
+ "target_col": "pIC50",
720
+ "test_size": 0.2
721
+ },
722
+ "status": "done",
723
+ "title": "Train QSAR model"
724
+ },
725
+ "dragHandle": ".bg-primary",
726
+ "height": 288.0,
727
+ "id": "Train QSAR model 1",
728
+ "position": {
729
+ "x": -1159.342067985799,
730
+ "y": -309.43299496669476
731
+ },
732
+ "type": "basic",
733
+ "width": 329.0
734
+ },
735
+ {
736
+ "data": {
737
+ "display": {
738
+ "dataframes": {
739
+ "df": {
740
+ "columns": [
741
+ "split",
742
+ "R2",
743
+ "MAE",
744
+ "RMSE"
745
+ ],
746
+ "data": [
747
+ [
748
+ "train",
749
+ 0.9417380311136551,
750
+ 0.24223013565891363,
751
+ 0.3009354647304392
752
+ ],
753
+ [
754
+ "test",
755
+ 0.27586659127848734,
756
+ 0.8443154545454546,
757
+ 1.098891477150744
758
+ ]
759
+ ]
760
+ }
761
+ },
762
+ "other": {},
763
+ "relations": []
764
+ },
765
+ "error": null,
766
+ "input_metadata": [
767
+ {
768
+ "dataframes": {
769
+ "df": {
770
+ "columns": [
771
+ "MAE",
772
+ "R2",
773
+ "RMSE",
774
+ "split"
775
+ ]
776
+ }
777
+ },
778
+ "other": {},
779
+ "relations": []
780
+ }
781
+ ],
782
+ "meta": {
783
+ "color": "orange",
784
+ "inputs": [
785
+ {
786
+ "name": "bundle",
787
+ "position": "left",
788
+ "type": {
789
+ "type": "<class 'lynxkite_graph_analytics.core.Bundle'>"
790
+ }
791
+ }
792
+ ],
793
+ "name": "View tables",
794
+ "outputs": [],
795
+ "params": [
796
+ {
797
+ "default": 100,
798
+ "name": "limit",
799
+ "type": {
800
+ "type": "<class 'int'>"
801
+ }
802
+ }
803
+ ],
804
+ "type": "table_view"
805
+ },
806
+ "params": {
807
+ "limit": 100.0
808
+ },
809
+ "status": "done",
810
+ "title": "View tables"
811
+ },
812
+ "dragHandle": ".bg-primary",
813
+ "height": 291.0,
814
+ "id": "View tables 3",
815
+ "position": {
816
+ "x": -757.0926975107354,
817
+ "y": -321.469271323077
818
+ },
819
+ "type": "table_view",
820
+ "width": 496.0
821
+ },
822
+ {
823
+ "data": {
824
+ "__execution_delay": 0.0,
825
+ "collapsed": null,
826
+ "display": null,
827
+ "error": null,
828
+ "input_metadata": [
829
+ {
830
+ "dataframes": {
831
+ "data": {
832
+ "columns": [
833
+ "Name",
834
+ "SMILES",
835
+ "mol",
836
+ "pIC50"
837
+ ]
838
+ }
839
+ },
840
+ "other": {},
841
+ "relations": []
842
+ }
843
+ ],
844
+ "meta": {
845
+ "color": "orange",
846
+ "inputs": [
847
+ {
848
+ "name": "bundle",
849
+ "position": "left",
850
+ "type": {
851
+ "type": "<class 'inspect._empty'>"
852
+ }
853
+ }
854
+ ],
855
+ "name": "Lipinski filter",
856
+ "outputs": [
857
+ {
858
+ "name": "output",
859
+ "position": "right",
860
+ "type": {
861
+ "type": "None"
862
+ }
863
+ }
864
+ ],
865
+ "params": [
866
+ {
867
+ "default": null,
868
+ "name": "table_name",
869
+ "type": {
870
+ "type": "<class 'str'>"
871
+ }
872
+ },
873
+ {
874
+ "default": null,
875
+ "name": "column_name",
876
+ "type": {
877
+ "type": "<class 'str'>"
878
+ }
879
+ },
880
+ {
881
+ "default": true,
882
+ "name": "strict_lipinski",
883
+ "type": {
884
+ "type": "<class 'bool'>"
885
+ }
886
+ }
887
+ ],
888
+ "type": "basic"
889
+ },
890
+ "params": {
891
+ "column_name": "SMILES",
892
+ "strict_lipinski": true,
893
+ "table_name": "data"
894
+ },
895
+ "status": "done",
896
+ "title": "Lipinski filter"
897
+ },
898
+ "dragHandle": ".bg-primary",
899
+ "height": 299.0,
900
+ "id": "Lipinski filter 1",
901
+ "position": {
902
+ "x": -720.0507400376052,
903
+ "y": 276.1650594718383
904
+ },
905
+ "type": "basic",
906
+ "width": 402.0
907
+ },
908
+ {
909
+ "data": {
910
+ "__execution_delay": 0.0,
911
+ "collapsed": null,
912
+ "error": null,
913
+ "input_metadata": [
914
+ {
915
+ "dataframes": {
916
+ "data": {
917
+ "columns": [
918
+ "Name",
919
+ "SMILES",
920
+ "mol",
921
+ "pIC50"
922
+ ]
923
+ }
924
+ },
925
+ "other": {},
926
+ "relations": []
927
+ }
928
+ ],
929
+ "meta": {
930
+ "color": "orange",
931
+ "inputs": [
932
+ {
933
+ "name": "bundle",
934
+ "position": "left",
935
+ "type": {
936
+ "type": "<class 'inspect._empty'>"
937
+ }
938
+ }
939
+ ],
940
+ "name": "View mol image",
941
+ "outputs": [],
942
+ "params": [
943
+ {
944
+ "default": null,
945
+ "name": "table_name",
946
+ "type": {
947
+ "type": "<class 'str'>"
948
+ }
949
+ },
950
+ {
951
+ "default": null,
952
+ "name": "smiles_column",
953
+ "type": {
954
+ "type": "<class 'str'>"
955
+ }
956
+ },
957
+ {
958
+ "default": null,
959
+ "name": "mols_per_row",
960
+ "type": {
961
+ "type": "<class 'int'>"
962
+ }
963
+ }
964
+ ],
965
+ "type": "image"
966
+ },
967
+ "params": {
968
+ "mols_per_row": "4",
969
+ "smiles_column": "SMILES",
970
+ "table_name": "data"
971
+ },
972
+ "status": "done",
973
+ "title": "View mol image"
974
+ },
975
+ "dragHandle": ".bg-primary",
976
+ "height": 309.0,
977
+ "id": "View mol image 1",
978
+ "position": {
979
+ "x": -1378.339896172849,
980
+ "y": 280.2327514185724
981
+ },
982
+ "type": "image",
983
+ "width": 363.0
984
+ }
985
+ ]
986
+ }
examples/Cheminformatics/cheminfo_tools.py ADDED
@@ -0,0 +1,305 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import pickle
3
+ from lynxkite.core.ops import op
4
+ from matplotlib import pyplot as plt
5
+ import pandas as pd
6
+ from rdkit.Chem.Draw import rdMolDraw2D
7
+ from PIL import Image
8
+ from rdkit import Chem
9
+ from rdkit.Chem import Descriptors
10
+ from rdkit.Chem import Crippen, Lipinski
11
+ from rdkit import DataStructs
12
+ import math
13
+ import io
14
+ from rdkit.Chem import AllChem
15
+ from sklearn.ensemble import RandomForestRegressor
16
+ from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
17
+ from sklearn.model_selection import train_test_split
18
+ import numpy as np
19
+
20
+
21
+ @op("LynxKite Graph Analytics", "View mol filter", view="matplotlib", slow=True)
22
+ def mol_filter(
23
+ bundle,
24
+ *,
25
+ table_name: str,
26
+ SMILES_Column: str,
27
+ mols_per_row: int,
28
+ filter_smarts: str = None,
29
+ filter_smiles: str = None,
30
+ highlight: bool = True,
31
+ ):
32
+ """
33
+ Draws a grid of molecules in square boxes, with optional filtering and substructure highlighting.
34
+
35
+ Parameters:
36
+ - bundle: data bundle containing a DataFrame in bundle.dfs[table_name]
37
+ - table_name: name of the table in bundle.dfs
38
+ - column_name: column containing SMILES strings
39
+ - mols_per_row: number of molecules per row in the grid
40
+ - filter_smarts: SMARTS pattern to filter and highlight
41
+ - filter_smiles: SMILES substructure to filter and highlight (if filter_smarts is None)
42
+ - highlight: whether to highlight matching substructures
43
+ """
44
+ # get DataFrame
45
+ df = bundle.dfs[table_name].copy()
46
+ df["mol"] = df[SMILES_Column].apply(Chem.MolFromSmiles)
47
+ df = df[df["mol"].notnull()].reset_index(drop=True)
48
+
49
+ # compile substructure query if provided
50
+ query = None
51
+ if filter_smarts:
52
+ query = Chem.MolFromSmarts(filter_smarts)
53
+ elif filter_smiles:
54
+ query = Chem.MolFromSmiles(filter_smiles)
55
+
56
+ # compute properties and legends
57
+ df["MW"] = df["mol"].apply(Descriptors.MolWt)
58
+ df["logP"] = df["mol"].apply(Crippen.MolLogP)
59
+ df["HBD"] = df["mol"].apply(Lipinski.NumHDonors)
60
+ df["HBA"] = df["mol"].apply(Lipinski.NumHAcceptors)
61
+
62
+ legends = []
63
+ for _, row in df.iterrows():
64
+ mol = row["mol"]
65
+ # filter by substructure
66
+ if query and not mol.HasSubstructMatch(query):
67
+ continue
68
+
69
+ # find atom and bond matches
70
+ atom_ids, bond_ids = [], []
71
+ if highlight and query:
72
+ atom_ids = list(mol.GetSubstructMatch(query))
73
+ # find bonds where both ends are in atom_ids
74
+ for bond in mol.GetBonds():
75
+ a1 = bond.GetBeginAtomIdx()
76
+ a2 = bond.GetEndAtomIdx()
77
+ if a1 in atom_ids and a2 in atom_ids:
78
+ bond_ids.append(bond.GetIdx())
79
+
80
+ legend = (
81
+ f"{row['Name']} pIC50={row['pIC50']:.2f}\n"
82
+ f"MW={row['MW']:.1f}, logP={row['logP']:.2f}\n"
83
+ f"HBD={row['HBD']}, HBA={row['HBA']}"
84
+ )
85
+ legends.append((mol, legend, atom_ids, bond_ids))
86
+
87
+ if not legends:
88
+ raise ValueError("No molecules passed the filter.")
89
+
90
+ # draw each filtered molecule
91
+ images = []
92
+ for mol, legend, atom_ids, bond_ids in legends:
93
+ drawer = rdMolDraw2D.MolDraw2DCairo(400, 350)
94
+ opts = drawer.drawOptions()
95
+ opts.legendFontSize = 200
96
+ drawer.DrawMolecule(mol, legend=legend, highlightAtoms=atom_ids, highlightBonds=bond_ids)
97
+ drawer.FinishDrawing()
98
+
99
+ sub_png = drawer.GetDrawingText()
100
+ sub_img = Image.open(io.BytesIO(sub_png))
101
+ images.append(sub_img)
102
+
103
+ plot_gallery(images, num_cols=mols_per_row)
104
+
105
+
106
+ @op("LynxKite Graph Analytics", "Lipinski filter")
107
+ def lipinski_filter(bundle, *, table_name: str, column_name: str, strict_lipinski: bool = True):
108
+ # copy bundle and get DataFrame
109
+ bundle = bundle.copy()
110
+ df = bundle.dfs[table_name].copy()
111
+ df["mol"] = df[column_name].apply(Chem.MolFromSmiles)
112
+ df = df[df["mol"].notnull()].reset_index(drop=True)
113
+
114
+ # compute properties
115
+ df["MW"] = df["mol"].apply(Descriptors.MolWt)
116
+ df["logP"] = df["mol"].apply(Crippen.MolLogP)
117
+ df["HBD"] = df["mol"].apply(Lipinski.NumHDonors)
118
+ df["HBA"] = df["mol"].apply(Lipinski.NumHAcceptors)
119
+
120
+ # compute a boolean pass/fail for Lipinski
121
+ df["pass_lipinski"] = (
122
+ (df["MW"] <= 500) & (df["logP"] <= 5) & (df["HBD"] <= 5) & (df["HBA"] <= 10)
123
+ )
124
+ df = df.drop("mol", axis=1)
125
+
126
+ # if strict_lipinski, drop those that fail
127
+ if strict_lipinski:
128
+ failed = df.loc[~df["pass_lipinski"], column_name].tolist()
129
+ df = df[df["pass_lipinski"]].reset_index(drop=True)
130
+ if failed:
131
+ print(f"Dropped {len(failed)} molecules that failed Lipinski: {failed}")
132
+
133
+ return df
134
+
135
+
136
+ @op("LynxKite Graph Analytics", "View mol image", view="matplotlib", slow=True)
137
+ def mol_image(bundle, *, table_name: str, smiles_column: str, mols_per_row: int):
138
+ df = bundle.dfs[table_name].copy()
139
+ df["mol"] = df[smiles_column].apply(Chem.MolFromSmiles)
140
+ df = df[df["mol"].notnull()].reset_index(drop=True)
141
+ df["MW"] = df["mol"].apply(Descriptors.MolWt)
142
+ df["logP"] = df["mol"].apply(Crippen.MolLogP)
143
+ df["HBD"] = df["mol"].apply(Lipinski.NumHDonors)
144
+ df["HBA"] = df["mol"].apply(Lipinski.NumHAcceptors)
145
+
146
+ legends = []
147
+ for _, row in df.iterrows():
148
+ legends.append(
149
+ f"{row['Name']} pIC50={row['pIC50']:.2f}\n"
150
+ f"MW={row['MW']:.1f}, logP={row['logP']:.2f}\n"
151
+ f"HBD={row['HBD']}, HBA={row['HBA']}"
152
+ )
153
+
154
+ mols = df["mol"].tolist()
155
+ if not mols:
156
+ raise ValueError("No valid molecules to draw.")
157
+
158
+ # --- draw each molecule into its own sub‐image and paste ---
159
+ images = []
160
+ for mol, legend in zip(mols, legends):
161
+ # draw one molecule
162
+ drawer = rdMolDraw2D.MolDraw2DCairo(400, 350)
163
+ opts = drawer.drawOptions()
164
+ opts.legendFontSize = 200
165
+ drawer.DrawMolecule(mol, legend=legend)
166
+ drawer.FinishDrawing()
167
+ sub_png = drawer.GetDrawingText()
168
+ sub_img = Image.open(io.BytesIO(sub_png))
169
+ images.append(sub_img)
170
+
171
+ plot_gallery(images, num_cols=mols_per_row)
172
+
173
+
174
+ def plot_gallery(images, num_cols):
175
+ num_rows = math.ceil(len(images) / num_cols)
176
+ fig, axes = plt.subplots(num_rows, num_cols, figsize=(num_cols * 4, num_rows * 3.5))
177
+ axes = axes.flatten()
178
+ for i, ax in enumerate(axes):
179
+ if i < len(images):
180
+ ax.imshow(images[i])
181
+ ax.set_xticks([])
182
+ ax.set_yticks([])
183
+ plt.tight_layout()
184
+
185
+
186
+ @op("LynxKite Graph Analytics", "Train QSAR model")
187
+ def build_qsar_model(
188
+ bundle,
189
+ *,
190
+ table_name: str,
191
+ smiles_col: str,
192
+ target_col: str,
193
+ fp_type: str,
194
+ radius: int = 2,
195
+ n_bits: int = 2048,
196
+ test_size: float = 0.2,
197
+ random_state: int = 42,
198
+ out_dir: str = "Models",
199
+ ):
200
+ """
201
+ Train and save a RandomForest QSAR model using one fingerprint type.
202
+
203
+ Parameters
204
+ ----------
205
+ bundle : any
206
+ An object with a dict‐like attribute `.dfs` mapping table names to DataFrames.
207
+ table_name : str
208
+ Key into bundle.dfs to get the DataFrame.
209
+ smiles_col : str
210
+ Name of the column containing SMILES strings.
211
+ target_col : str
212
+ Name of the column containing the numeric response.
213
+ fp_type : str
214
+ Fingerprint to compute: "ecfp", "rdkit", "torsion", "atompair", or "maccs".
215
+ radius : int
216
+ Radius for the Morgan (ECFP) fingerprint.
217
+ n_bits : int
218
+ Bit‐vector length for all fp types except MACCS (167).
219
+ test_size : float
220
+ Fraction of data held out for testing.
221
+ random_state : int
222
+ Random seed for reproducibility.
223
+ out_dir : str
224
+ Directory in which to save `qsar_model_<fp_type>.pkl`.
225
+
226
+ Returns
227
+ -------
228
+ model : RandomForestRegressor
229
+ The trained QSAR model.
230
+ metrics_df : pandas.DataFrame
231
+ R², MAE and RMSE on train and test splits.
232
+ """
233
+ # 1) load and sanitize data
234
+ df = bundle.dfs.get(table_name)
235
+ if df is None:
236
+ raise KeyError(f"Table '{table_name}' not found in bundle.dfs")
237
+ df = df.copy()
238
+ df["mol"] = df[smiles_col].apply(Chem.MolFromSmiles)
239
+ df = df[df["mol"].notnull()].reset_index(drop=True)
240
+ if df.empty:
241
+ raise ValueError(f"No valid molecules in '{smiles_col}'")
242
+
243
+ # 2) create a fixed train/test split
244
+ indices = np.arange(len(df))
245
+ train_idx, test_idx = train_test_split(indices, test_size=test_size, random_state=random_state)
246
+
247
+ # 3) featurize
248
+ fps = []
249
+ for mol in df["mol"]:
250
+ if fp_type == "ecfp":
251
+ bv = AllChem.GetMorganFingerprintAsBitVect(mol, radius, nBits=n_bits)
252
+ arr = np.zeros((n_bits,), dtype=np.int8)
253
+ DataStructs.ConvertToNumpyArray(bv, arr)
254
+ elif fp_type == "rdkit":
255
+ bv = Chem.RDKFingerprint(mol, fpSize=n_bits)
256
+ arr = np.zeros((n_bits,), dtype=np.int8)
257
+ DataStructs.ConvertToNumpyArray(bv, arr)
258
+ elif fp_type == "torsion":
259
+ bv = AllChem.GetHashedTopologicalTorsionFingerprintAsBitVect(mol, nBits=n_bits)
260
+ arr = np.zeros((n_bits,), dtype=np.int8)
261
+ DataStructs.ConvertToNumpyArray(bv, arr)
262
+ elif fp_type == "atompair":
263
+ bv = AllChem.GetHashedAtomPairFingerprintAsBitVect(mol, nBits=n_bits)
264
+ arr = np.zeros((n_bits,), dtype=np.int8)
265
+ DataStructs.ConvertToNumpyArray(bv, arr)
266
+ elif fp_type == "maccs":
267
+ bv = Chem.MACCSkeys.GenMACCSKeys(mol) # 167 bits
268
+ arr = np.zeros((167,), dtype=np.int8)
269
+ DataStructs.ConvertToNumpyArray(bv, arr)
270
+ else:
271
+ raise ValueError(f"Unsupported fingerprint type: '{fp_type}'")
272
+ fps.append(arr)
273
+
274
+ X = np.vstack(fps)
275
+ y = df[target_col].values
276
+
277
+ # 4) split features/labels
278
+ X_train, y_train = X[train_idx], y[train_idx]
279
+ X_test, y_test = X[test_idx], y[test_idx]
280
+
281
+ # 5) train RandomForest
282
+ model = RandomForestRegressor(random_state=random_state)
283
+ model.fit(X_train, y_train)
284
+
285
+ # 6) compute performance metrics
286
+ def _metrics(y_true, y_pred):
287
+ mse = mean_squared_error(y_true, y_pred)
288
+ return {
289
+ "R2": r2_score(y_true, y_pred),
290
+ "MAE": mean_absolute_error(y_true, y_pred),
291
+ "RMSE": np.sqrt(mse),
292
+ }
293
+
294
+ train_m = _metrics(y_train, model.predict(X_train))
295
+ test_m = _metrics(y_test, model.predict(X_test))
296
+ metrics_df = pd.DataFrame([{"split": "train", **train_m}, {"split": "test", **test_m}])
297
+
298
+ # 7) save the model
299
+ os.makedirs(out_dir, exist_ok=True)
300
+ model_file = os.path.join(out_dir, f"qsar_model_{fp_type}.pkl")
301
+ with open(model_file, "wb") as fout:
302
+ pickle.dump(model, fout)
303
+
304
+ print(f"Trained & saved QSAR model for '{fp_type}' → {model_file}")
305
+ return metrics_df
examples/Image table.lynxkite.json CHANGED
@@ -27,7 +27,7 @@
27
  {
28
  "data": {
29
  "__execution_delay": null,
30
- "collapsed": true,
31
  "display": null,
32
  "error": null,
33
  "input_metadata": [],
@@ -55,8 +55,8 @@
55
  "height": 200.0,
56
  "id": "Example image table 1",
57
  "position": {
58
- "x": 213.60043945845376,
59
- "y": 306.98088700969373
60
  },
61
  "type": "basic",
62
  "width": 280.0
@@ -138,8 +138,8 @@
138
  "height": 440.0,
139
  "id": "View tables 1",
140
  "position": {
141
- "x": 626.2831607914023,
142
- "y": 109.55448939208392
143
  },
144
  "type": "table_view",
145
  "width": 376.0
@@ -198,14 +198,14 @@
198
  "title": "Import CSV"
199
  },
200
  "dragHandle": ".bg-primary",
201
- "height": 313.0,
202
  "id": "Import CSV 1",
203
  "position": {
204
  "x": 13.802068621055497,
205
  "y": -269.65065144888104
206
  },
207
  "type": "basic",
208
- "width": 216.0
209
  },
210
  {
211
  "data": {
@@ -282,15 +282,15 @@
282
  "params": {
283
  "limit": 100.0
284
  },
285
- "status": "done",
286
  "title": "View tables"
287
  },
288
  "dragHandle": ".bg-primary",
289
  "height": 418.0,
290
  "id": "View tables 2",
291
  "position": {
292
- "x": 548.7706661684929,
293
- "y": -316.9626796191875
294
  },
295
  "type": "table_view",
296
  "width": 1116.0
@@ -300,7 +300,7 @@
300
  "__execution_delay": 0.0,
301
  "collapsed": null,
302
  "display": null,
303
- "error": null,
304
  "input_metadata": [
305
  {}
306
  ],
@@ -354,8 +354,8 @@
354
  "height": 296.0,
355
  "id": "Draw molecules 1",
356
  "position": {
357
- "x": 289.42386787591244,
358
- "y": -258.0823121715324
359
  },
360
  "type": "basic",
361
  "width": 212.0
 
27
  {
28
  "data": {
29
  "__execution_delay": null,
30
+ "collapsed": false,
31
  "display": null,
32
  "error": null,
33
  "input_metadata": [],
 
55
  "height": 200.0,
56
  "id": "Example image table 1",
57
  "position": {
58
+ "x": 356.1935187064265,
59
+ "y": 224.8472733628614
60
  },
61
  "type": "basic",
62
  "width": 280.0
 
138
  "height": 440.0,
139
  "id": "View tables 1",
140
  "position": {
141
+ "x": 757.4687936995374,
142
+ "y": 116.39895719598661
143
  },
144
  "type": "table_view",
145
  "width": 376.0
 
198
  "title": "Import CSV"
199
  },
200
  "dragHandle": ".bg-primary",
201
+ "height": 339.0,
202
  "id": "Import CSV 1",
203
  "position": {
204
  "x": 13.802068621055497,
205
  "y": -269.65065144888104
206
  },
207
  "type": "basic",
208
+ "width": 296.0
209
  },
210
  {
211
  "data": {
 
282
  "params": {
283
  "limit": 100.0
284
  },
285
+ "status": "planned",
286
  "title": "View tables"
287
  },
288
  "dragHandle": ".bg-primary",
289
  "height": 418.0,
290
  "id": "View tables 2",
291
  "position": {
292
+ "x": 815.4121289519509,
293
+ "y": -330.8232285057863
294
  },
295
  "type": "table_view",
296
  "width": 1116.0
 
300
  "__execution_delay": 0.0,
301
  "collapsed": null,
302
  "display": null,
303
+ "error": "module 'rdkit.Chem' has no attribute 'Draw'",
304
  "input_metadata": [
305
  {}
306
  ],
 
354
  "height": 296.0,
355
  "id": "Draw molecules 1",
356
  "position": {
357
+ "x": 351.1956913898301,
358
+ "y": -235.00831568554486
359
  },
360
  "type": "basic",
361
  "width": 212.0
examples/draw_molecules.py CHANGED
@@ -15,6 +15,8 @@ def smiles_to_data(smiles):
15
  import rdkit
16
 
17
  m = rdkit.Chem.MolFromSmiles(smiles)
 
 
18
  img = rdkit.Chem.Draw.MolToImage(m)
19
  data = pil_to_data(img)
20
  return data
 
15
  import rdkit
16
 
17
  m = rdkit.Chem.MolFromSmiles(smiles)
18
+ if m is None:
19
+ return None
20
  img = rdkit.Chem.Draw.MolToImage(m)
21
  data = pil_to_data(img)
22
  return data
examples/uploads/CHEMBL313_sel.csv ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ SMILES,Name,pIC50
2
+ Cc1ccc(C2CC3CCC(C2C(=O)OC(C)C)N3C)cc1,CHEMBL321806,5.99
3
+ Cc1ccc(C2CC3CCC(C2C(=O)Oc2ccccc2)N3C)cc1,CHEMBL340912,5.81
4
+ CN1C2CCC1C(C(=O)Oc1ccccc1)C(c1ccc(I)cc1)C2,CHEMBL340761,7.29
5
+ CC(C)OC(=O)C1C(c2ccc(I)cc2)CC2CCC1N2C,CHEMBL127546,7.9
6
+ COC(=O)C1C(c2ccc(Br)cc2)CC2CCC1N2C,CHEMBL97887,8.31
7
+ CC(C)OC(=O)C1C(c2ccc(Cl)cc2)CC2CCC1N2C,CHEMBL127040,6.89
8
+ COC(=O)C1C2CCC(CC1c1ccc(F)cc1)N2,CHEMBL80515,8.14
9
+ COC(=O)C1C(c2cccc(I)c2)CC2CCC1N2C,CHEMBL67387,8.01
10
+ CCc1cc(C2C(c3ccc(C)cc3)CC3CCC2N3C)on1,CHEMBL317904,6.24
11
+ Cc1ccc(C2CC3CCC(C2c2cc(C(C)C)no2)N3C)cc1,CHEMBL322400,5.41
12
+ Cc1ccc(C2CC3CCC(C2c2cc(C)no2)N3C)cc1,CHEMBL103228,6.46
13
+ CC(C)c1cc(C2C(c3ccc(Cl)cc3)CC3CCC2N3C)on1,CHEMBL321780,6.68
14
+ Cc1ccc(C2CC3CCC(C2c2ccno2)N3C)cc1,CHEMBL317905,7.42
15
+ CN1C2CCC1C(c1ccno1)C(c1ccc(Cl)cc1)C2,CHEMBL103523,8.09
16
+ Cc1cc(C2C(c3ccc(Cl)cc3)CC3CCC2N3C)on1,CHEMBL316528,7.28
17
+ CCc1cc(C2C(c3ccc(Cl)cc3)CC3CCC2N3C)on1,CHEMBL103227,6.55
18
+ CN1C2CCC1C(c1cc(C(C)(C)C)no1)C(c1ccc(Cl)cc1)C2,CHEMBL100652,5.47
19
+ COC(=O)C1C(c2ccc(C)cc2)CC2CCC1N2CCF,CHEMBL105089,6.43
20
+ CN1C2CCC1C(C(=O)OCCF)C(c1ccc(I)cc1)C2,CHEMBL318961,8.59
21
+ O=C(OCCF)C1C(c2ccc(I)cc2)CC2CCC1N2CCCF,CHEMBL317444,7.88
22
+ Cc1ccc(C2CC3CCC(C2C(=O)OCCCF)N3C)cc1,CHEMBL430504,6.49
23
+ CN1C2CCC1C(C(=O)OCCCF)C(c1ccc(I)cc1)C2,CHEMBL105693,8.78
24
+ COC(=O)C1C(c2ccc(Br)cc2)CC2CCC1N2CCCF,CHEMBL433159,7.44
25
+ COC(=O)C1C2CCC(CC1c1ccc(I)cc1)N2,CHEMBL14613,10.215
26
+ COC(=O)C1C(c2ccc(C)cc2)CC2CCC1N2CCCF,CHEMBL319052,5.88
27
+ COC(=O)[C@@H]1C2CCC(C[C@@H]1c1ccc(C)cc1)N2C/C=C/I,CHEMBL2113648,6.3
28
+ C=Cc1ccc(C2CC3CCC(N3)C2C(=O)CC)cc1,CHEMBL85492,9.49
29
+ CCC(=O)C1C(c2ccc(C(C)C)cc2)CC2CCC1N2C,CHEMBL278122,7.443
30
+ C=C(C)c1ccc(C2CC3CCC(N3)C2C(=O)CC)cc1,CHEMBL85877,9.96
31
+ CCC(=O)C1C2CCC(CC1c1ccc(C)cc1)N2,CHEMBL87983,7.72
32
+ CCC(=O)C1C(c2ccc(C(CC)CC)cc2)CC2CCC1N2C,CHEMBL314919,6.27
33
+ C=C(C)c1ccc(C2CC3CCC(C2C(=O)CC)N3C)cc1,CHEMBL82807,9.09
34
+ C=Cc1ccc(C2CC3CCC(C2C(=O)CC)N3C)cc1,CHEMBL314361,8.49
35
+ CCC(=O)C1C2CCC(CC1c1ccc(C(CC)CC)cc1)N2,CHEMBL87678,6.82
36
+ CCC(=O)C1C(c2ccc(C3CCCCC3)cc2)CC2CCC1N2C,CHEMBL87739,7.01
37
+ CCC(=O)C1C2CCC(CC1c1ccc(C(C)C)cc1)N2,CHEMBL85256,8.28
38
+ O[C@H]1CCCC[C@@H]1N1C2CCC1CC(c1ccccc1)C2,CHEMBL338411,5.47
39
+ Cc1ccc(C2CC3CCC(C2c2cc(C(C)(C)C)no2)N3C)cc1,CHEMBL317909,4.59
40
+ COC(=O)C1C(c2cccc(-c3ccco3)c2)CC2CCC1N2C,CHEMBL303494,7.38
41
+ COC(=O)C1C2CCC(CC1c1cccc(I)c1)N2,CHEMBL66068,8.87
42
+ COC(=O)C1C(c2cccc(-c3ccccc3)c2)CC2CCC1N2C,CHEMBL294733,7.45
43
+ COC(=O)C1C(c2cccc(-c3ccsc3)c2)CC2CCC1N2C,CHEMBL303232,7.08
44
+ CC(=O)C1C(c2ccc(C)cc2)CC2CCC1N2C,CHEMBL23141,6.91
45
+ CC(=O)C1C(c2ccc(F)cc2)CC2CCC1N2C,CHEMBL22665,6.07
46
+ CCC(=O)C1C(c2ccc(-c3ccccc3)cc2)CC2CCC1N2C,CHEMBL22518,8.37
47
+ CCC(=O)C1C(c2ccccc2)CC2CCC1N2C,CHEMBL23875,6.0
48
+ CCC(=O)C1C(c2ccc(C(C)(C)C)cc2)CC2CCC1N2C,CHEMBL22377,5.75
49
+ CC(=O)C1C(c2ccccc2)CC2CCC1N2C,CHEMBL416007,5.87
50
+ CCc1ccc(C2CC3CCC(C2C(C)=O)N3C)cc1,CHEMBL23974,7.11
51
+ CCC(=O)C1C(c2ccc(F)cc2)CC2CCC1N2C,CHEMBL23655,6.2
52
+ COC(=O)C1C(c2ccc(I)cc2)CC2CCC1N2CCF,CHEMBL358006,9.85
53
+ COC(=O)C1C(c2ccc(I)cc2)CC2CCC1N2CCCI,CHEMBL153200,8.05
54
+ COC(=O)C1C(c2ccc(I)cc2)CC2CCC1N2CC1CC1,CHEMBL150084,8.89
55
+ COC(=O)C1C(c2ccc(I)cc2)CC2CCC1N2CC(F)F,CHEMBL356166,8.02
56
+ COC(=O)C1C(c2ccc(I)cc2)CC2CCC1N2CCCO,CHEMBL357300,8.6
57
+ COC(=O)C1C(c2ccc(I)cc2)CC2CCC1N2CCCCl,CHEMBL345553,9.49
58
+ CC(C)OC(=O)C1C(c2ccc(I)cc2)CC2CCC1N2CCF,CHEMBL346872,7.66
59
+ COC(=O)C1C(c2ccc(I)cc2)CC2CCC1N2CC(OC)OC,CHEMBL153361,8.77
60
+ COC(=O)CN1C2CCC1C(C(=O)OC)C(c1ccc(I)cc1)C2,CHEMBL149801,9.09
61
+ CC(C)OC(=O)C1C(c2ccc(I)cc2)CC2CCC1N2CCCF,CHEMBL345760,7.31
62
+ COC(=O)[C@@H]1C2CCC(C[C@@H]1c1ccc(I)cc1)N2CC(=O)N(C)C,CHEMBL2112890,8.19
63
+ COC(=O)C1C(c2ccc(I)cc2)CC2CCC1N2CCCBr,CHEMBL356652,9.46
64
+ C[C@H](CF)OC(=O)C1C(c2ccc(Cl)cc2)CC2CCC1N2C,CHEMBL2112915,6.88
65
+ C[C@@H](CF)OC(=O)C1C(c2ccc(Cl)cc2)CC2CCC1N2C,CHEMBL2112916,6.86
66
+ Fc1ccc(C2C3CCC(C[C@H]2c2ccc(F)cc2)N3)cc1,CHEMBL325330,7.39
67
+ CCC(=O)C1C2CCC(CC1c1cccc(I)c1)N2,CHEMBL171565,7.8
68
+ CCC(=O)C1C2CCC(CC1c1ccc(/C=C/I)cc1)N2,CHEMBL352913,9.21
69
+ Cc1ccc(C2CC3CCC(C2C(=O)CC/C=C/I)N3C)cc1,CHEMBL169320,7.29
70
+ CCC(=O)C1C(c2cccc(I)c2)CC2CCC1N2C,CHEMBL168305,7.71
71
+ CCC(=O)C1C2CCC(CC1c1ccc(I)c(Cl)c1)N2,CHEMBL169117,8.97
72
+ CCC(=O)C1C2CCC(CC1c1ccc(/C=C(\C)I)cc1)N2,CHEMBL423275,9.24
73
+ CCC(=O)C1C(c2ccc(I)cc2)CC2CCC1N2C,CHEMBL352698,8.73
74
+ CCC(=O)C1C2CCC(CC1c1ccc(I)c(F)c1)N2,CHEMBL355734,8.14
75
+ CCC(=O)C1C2CCC(CC1c1ccc(/C=C(/C)I)cc1)N2,CHEMBL355259,9.89
76
+ CCC(=O)C1C2CCC(CC1c1ccc(I)cc1)N2,CHEMBL354725,8.61
77
+ COC(=O)C1C(c2ccc(-c3cscc3Br)cc2)CC2CCC1N2C,CHEMBL433560,8.4
78
+ COC(=O)C1C(c2ccc(-c3ccc(I)s3)cc2)CC2CCC1N2C,CHEMBL178773,8.34
79
+ COC(=O)C1C(c2ccc(-c3ccc(N)s3)cc2)CC2CCC1N2C,CHEMBL181613,7.19
80
+ COC(=O)C1C(c2ccc(-c3ccsc3)cc2)CC2CCC1N2C,CHEMBL435287,10.77
81
+ COC(=O)C1C2CCC(CC1c1ccc(-c3cccs3)cc1)N2,CHEMBL181557,9.96
82
+ COC(=O)C1C(c2ccc(-c3cccs3)cc2)CC2CCC1N2C,CHEMBL181609,9.82
83
+ COC(=O)C1C2CCC(CC1c1ccc(-c3ccsc3)cc1)N2,CHEMBL179498,9.64
84
+ COC(=O)C1C(c2ccc(-c3ccc(Br)s3)cc2)CC2CCC1N2C,CHEMBL180918,9.42
85
+ COC(=O)C1C(c2ccc(-c3ccc(Cl)s3)cc2)CC2CCC1N2C,CHEMBL369098,9.19
86
+ COC(=O)C1C2CCC(CC1c1ccc(C)c(F)c1)N2,CHEMBL365738,7.62
87
+ COC(=O)C1C2CCC(CC1c1ccc(F)c(C)c1)N2,CHEMBL195738,7.77
88
+ COC(=O)C1C2CCC(CC1c1cccc(F)c1)N2,CHEMBL192924,7.57
89
+ COC(=O)C1C2CCC(CC1c1ccc(F)c(F)c1)N2,CHEMBL366159,7.26
90
+ COC(=O)C1C2CCC(CC1c1cc(F)cc(F)c1)N2,CHEMBL371607,7.86
91
+ CN1C2CCC1C(C(=O)OCCCF)C(c1ccc(Br)cc1)C2,CHEMBL365649,8.54
92
+ O=C(OCCCF)C1C2CCC(CC1c1ccc(Br)cc1)N2,CHEMBL184807,9.52
93
+ CN1C2CCC1C(C(=O)OCCF)C(c1ccc(Br)cc1)C2,CHEMBL185608,8.29
94
+ O=C(OCCF)C1C2CCC(CC1c1ccc(Br)cc1)N2,CHEMBL186119,9.62
95
+ O=C(OCCF)C1C2CCC(CC1c1ccc(I)cc1)N2,CHEMBL186306,9.74
96
+ O=C(OCCF)C1C(c2ccc(Br)cc2)CC2CCC1N2CCCF,CHEMBL184123,7.74
97
+ CN1C2CCC1C(C(=O)NCCF)C(c1ccc(Br)cc1)C2,CHEMBL365413,7.41
98
+ CN1C2CCC1C(C(=O)NCCF)C(c1ccc(I)cc1)C2,CHEMBL183259,7.51
99
+ COC(=O)C1C2CCC(CC1c1ccc(Br)cc1)N2,CHEMBL365623,9.02
100
+ COC(=O)C1C2CCC(CC1c1ccc(-c3ccco3)cc1)N2,CHEMBL200044,9.82
101
+ COC(=O)C1C(c2ccc(-c3ccoc3)cc2)CC2CCC1N2C,CHEMBL200698,9.46
102
+ COC(=O)C1C(c2ccc(-c3ccco3)cc2)CC2CCC1N2C,CHEMBL199704,8.95
103
+ COC(=O)C1C(c2ccc(-c3nccs3)cc2)CC2CCC1N2C,CHEMBL382943,8.78
104
+ COC(=O)C1C(c2ccc(-c3cccnc3)cc2)CC2CCC1N2C,CHEMBL199634,8.45
105
+ COC(=O)C1C2CCC(CC1c1ccc(-c3nccs3)cc1)N2,CHEMBL381418,8.29
106
+ COC(=O)C1C(c2ccc(-c3cnccn3)cc2)CC2CCC1N2C,CHEMBL383572,7.95
107
+ COC(=O)C1C(c2ccc(-c3cncnc3)cc2)CC2CCC1N2C,CHEMBL372121,7.48
108
+ COC(=O)C1C(c2ccc(-c3ccccn3)cc2)CC2CCC1N2C,CHEMBL199407,6.79
109
+ COC(=O)[C@@H]1C2CCC(C[C@@H]1c1ccc(Br)cc1)N2C,CHEMBL218082,8.39
lynxkite-app/web/src/workspace/nodes/NodeWithImage.tsx CHANGED
@@ -3,7 +3,7 @@ import { NodeWithParams } from "./NodeWithParams";
3
 
4
  const NodeWithImage = (props: any) => {
5
  return (
6
- <NodeWithParams {...props}>
7
  {props.data.display && <img src={props.data.display} alt="Node Display" />}
8
  </NodeWithParams>
9
  );
 
3
 
4
  const NodeWithImage = (props: any) => {
5
  return (
6
+ <NodeWithParams collapsed {...props}>
7
  {props.data.display && <img src={props.data.display} alt="Node Display" />}
8
  </NodeWithParams>
9
  );
lynxkite-core/src/lynxkite/core/ops.py CHANGED
@@ -129,7 +129,7 @@ class Result:
129
  `input_metadata` is a list of JSON objects describing each input.
130
  """
131
 
132
- output: typing.Any = None
133
  display: ReadOnlyJSON | None = None
134
  error: str | None = None
135
  input_metadata: ReadOnlyJSON | None = None
@@ -187,7 +187,6 @@ class Op(BaseConfig):
187
  res = self.func(*inputs, **params)
188
  if not isinstance(res, Result):
189
  # Automatically wrap the result in a Result object, if it isn't already.
190
- res = Result(output=res)
191
  if self.type in [
192
  "visualization",
193
  "table_view",
@@ -195,9 +194,10 @@ class Op(BaseConfig):
195
  "image",
196
  "molecule",
197
  ]:
198
- # If the operation is some kind of visualization, we use the output as the
199
- # value to display by default.
200
- res.display = res.output
 
201
  return res
202
 
203
  def get_input(self, name: str):
@@ -237,6 +237,10 @@ def op(
237
 
238
  def decorator(func):
239
  sig = inspect.signature(func)
 
 
 
 
240
  if slow:
241
  func = mem.cache(func)
242
  func = _global_slow(func)
@@ -256,10 +260,6 @@ def op(
256
  _outputs = [Output(name=name, type=None) for name in outputs]
257
  else:
258
  _outputs = [Output(name="output", type=None)] if view == "basic" else []
259
- _view = view
260
- if view == "matplotlib":
261
- _view = "image"
262
- func = matplotlib_to_image(func)
263
  op = Op(
264
  func=func,
265
  name=name,
 
129
  `input_metadata` is a list of JSON objects describing each input.
130
  """
131
 
132
+ output: typing.Any | None = None
133
  display: ReadOnlyJSON | None = None
134
  error: str | None = None
135
  input_metadata: ReadOnlyJSON | None = None
 
187
  res = self.func(*inputs, **params)
188
  if not isinstance(res, Result):
189
  # Automatically wrap the result in a Result object, if it isn't already.
 
190
  if self.type in [
191
  "visualization",
192
  "table_view",
 
194
  "image",
195
  "molecule",
196
  ]:
197
+ # If the operation is a visualization, we use the returned value for display.
198
+ res = Result(display=res)
199
+ else:
200
+ res = Result(output=res)
201
  return res
202
 
203
  def get_input(self, name: str):
 
237
 
238
  def decorator(func):
239
  sig = inspect.signature(func)
240
+ _view = view
241
+ if view == "matplotlib":
242
+ _view = "image"
243
+ func = matplotlib_to_image(func)
244
  if slow:
245
  func = mem.cache(func)
246
  func = _global_slow(func)
 
260
  _outputs = [Output(name=name, type=None) for name in outputs]
261
  else:
262
  _outputs = [Output(name="output", type=None)] if view == "basic" else []
 
 
 
 
263
  op = Op(
264
  func=func,
265
  name=name,
lynxkite-core/src/lynxkite/core/workspace.py CHANGED
@@ -65,10 +65,14 @@ class WorkspaceNode(BaseConfig):
65
  self.data.status = NodeStatus.done
66
  if hasattr(self, "_crdt"):
67
  with self._crdt.doc.transaction():
68
- self._crdt["data"]["display"] = self.data.display
69
- self._crdt["data"]["input_metadata"] = self.data.input_metadata
70
- self._crdt["data"]["error"] = self.data.error
71
- self._crdt["data"]["status"] = NodeStatus.done
 
 
 
 
72
 
73
  def publish_error(self, error: Exception | str | None):
74
  """Can be called with None to clear the error state."""
@@ -176,7 +180,6 @@ class Workspace(BaseConfig):
176
  # If the node is connected to a CRDT, update that too.
177
  if hasattr(node, "_crdt"):
178
  node._crdt["data"]["meta"] = op.model_dump()
179
- print("set metadata to", op)
180
  if node.type != op.type:
181
  node.type = op.type
182
  if hasattr(node, "_crdt"):
 
65
  self.data.status = NodeStatus.done
66
  if hasattr(self, "_crdt"):
67
  with self._crdt.doc.transaction():
68
+ try:
69
+ self._crdt["data"]["status"] = NodeStatus.done
70
+ self._crdt["data"]["display"] = self.data.display
71
+ self._crdt["data"]["input_metadata"] = self.data.input_metadata
72
+ self._crdt["data"]["error"] = self.data.error
73
+ except Exception as e:
74
+ self._crdt["data"]["error"] = str(e)
75
+ raise e
76
 
77
  def publish_error(self, error: Exception | str | None):
78
  """Can be called with None to clear the error state."""
 
180
  # If the node is connected to a CRDT, update that too.
181
  if hasattr(node, "_crdt"):
182
  node._crdt["data"]["meta"] = op.model_dump()
 
183
  if node.type != op.type:
184
  node.type = op.type
185
  if hasattr(node, "_crdt"):
lynxkite-core/tests/test_ops.py CHANGED
@@ -104,4 +104,4 @@ def test_visualization_operations_display_is_populated_automatically():
104
 
105
  result = ops.CATALOGS["test"]["display_op"]()
106
  assert isinstance(result, ops.Result)
107
- assert result.output == result.display == {"display_value": 1}
 
104
 
105
  result = ops.CATALOGS["test"]["display_op"]()
106
  assert isinstance(result, ops.Result)
107
+ assert result.display == {"display_value": 1}
lynxkite-graph-analytics/src/lynxkite_graph_analytics/core.py CHANGED
@@ -222,6 +222,7 @@ async def _execute_node(node, ws, catalog, outputs):
222
  try:
223
  result = op(*inputs, **params)
224
  result.output = await await_if_needed(result.output)
 
225
  except Exception as e:
226
  if not os.environ.get("LYNXKITE_SUPPRESS_OP_ERRORS"):
227
  traceback.print_exc()
 
222
  try:
223
  result = op(*inputs, **params)
224
  result.output = await await_if_needed(result.output)
225
+ result.display = await await_if_needed(result.display)
226
  except Exception as e:
227
  if not os.environ.get("LYNXKITE_SUPPRESS_OP_ERRORS"):
228
  traceback.print_exc()