darabos commited on
Commit
63ea7d2
·
1 Parent(s): 814aca7

Some placeholders for NIMs.

Browse files
examples/Generative drug screening ADDED
@@ -0,0 +1,873 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "edges": [
3
+ {
4
+ "id": "Import file 2 Query GenMol 1",
5
+ "source": "Import file 2",
6
+ "sourceHandle": "output",
7
+ "target": "Query GenMol 1",
8
+ "targetHandle": "bundle"
9
+ },
10
+ {
11
+ "id": "Import file 1 MSA-search 1",
12
+ "source": "Import file 1",
13
+ "sourceHandle": "output",
14
+ "target": "MSA-search 1",
15
+ "targetHandle": "bundle"
16
+ },
17
+ {
18
+ "id": "Query GenMol 1 Query DiffDock 1",
19
+ "source": "Query GenMol 1",
20
+ "sourceHandle": "output",
21
+ "target": "Query DiffDock 1",
22
+ "targetHandle": "ligands"
23
+ },
24
+ {
25
+ "id": "Query DiffDock 1 View molecules 1",
26
+ "source": "Query DiffDock 1",
27
+ "sourceHandle": "output",
28
+ "target": "View molecules 1",
29
+ "targetHandle": "bundle"
30
+ },
31
+ {
32
+ "id": "MSA-search 1 Query OpenFold2 1",
33
+ "source": "MSA-search 1",
34
+ "sourceHandle": "output",
35
+ "target": "Query OpenFold2 1",
36
+ "targetHandle": "bundle"
37
+ },
38
+ {
39
+ "id": "Query OpenFold2 1 View molecules 3",
40
+ "source": "Query OpenFold2 1",
41
+ "sourceHandle": "output",
42
+ "target": "View molecules 3",
43
+ "targetHandle": "bundle"
44
+ },
45
+ {
46
+ "id": "Query OpenFold2 1 Query DiffDock 1",
47
+ "source": "Query OpenFold2 1",
48
+ "sourceHandle": "output",
49
+ "target": "Query DiffDock 1",
50
+ "targetHandle": "proteins"
51
+ }
52
+ ],
53
+ "env": "LynxKite Graph Analytics",
54
+ "nodes": [
55
+ {
56
+ "data": {
57
+ "__execution_delay": 0.0,
58
+ "collapsed": null,
59
+ "display": null,
60
+ "error": null,
61
+ "meta": {
62
+ "inputs": {},
63
+ "name": "Import file",
64
+ "outputs": {
65
+ "output": {
66
+ "name": "output",
67
+ "position": "right",
68
+ "type": {
69
+ "type": "None"
70
+ }
71
+ }
72
+ },
73
+ "params": {
74
+ "file_format": {
75
+ "default": "csv",
76
+ "groups": {
77
+ "csv": [
78
+ {
79
+ "default": "<from file>",
80
+ "name": "columns",
81
+ "type": {
82
+ "type": "<class 'str'>"
83
+ }
84
+ },
85
+ {
86
+ "default": "<auto>",
87
+ "name": "separator",
88
+ "type": {
89
+ "type": "<class 'str'>"
90
+ }
91
+ }
92
+ ],
93
+ "excel": [
94
+ {
95
+ "default": "Sheet1",
96
+ "name": "sheet_name",
97
+ "type": {
98
+ "type": "<class 'str'>"
99
+ }
100
+ }
101
+ ],
102
+ "json": [],
103
+ "parquet": []
104
+ },
105
+ "name": "file_format",
106
+ "selector": {
107
+ "default": "csv",
108
+ "name": "file_format",
109
+ "type": {
110
+ "enum": [
111
+ "csv",
112
+ "parquet",
113
+ "json",
114
+ "excel"
115
+ ]
116
+ }
117
+ },
118
+ "type": "group"
119
+ },
120
+ "file_path": {
121
+ "default": null,
122
+ "name": "file_path",
123
+ "type": {
124
+ "type": "<class 'str'>"
125
+ }
126
+ },
127
+ "table_name": {
128
+ "default": null,
129
+ "name": "table_name",
130
+ "type": {
131
+ "type": "<class 'str'>"
132
+ }
133
+ }
134
+ },
135
+ "type": "basic"
136
+ },
137
+ "params": {
138
+ "columns": "<from file>",
139
+ "file_format": "csv",
140
+ "file_path": "uploads/protein.csv",
141
+ "separator": "<auto>",
142
+ "table_name": ""
143
+ },
144
+ "status": "done",
145
+ "title": "Import file"
146
+ },
147
+ "dragHandle": ".bg-primary",
148
+ "height": 487.0,
149
+ "id": "Import file 1",
150
+ "position": {
151
+ "x": -755.0582906538923,
152
+ "y": 543.770372030674
153
+ },
154
+ "type": "basic",
155
+ "width": 439.0
156
+ },
157
+ {
158
+ "data": {
159
+ "__execution_delay": 0.0,
160
+ "collapsed": null,
161
+ "display": null,
162
+ "error": null,
163
+ "meta": {
164
+ "inputs": {},
165
+ "name": "Import file",
166
+ "outputs": {
167
+ "output": {
168
+ "name": "output",
169
+ "position": "right",
170
+ "type": {
171
+ "type": "None"
172
+ }
173
+ }
174
+ },
175
+ "params": {
176
+ "file_format": {
177
+ "default": "csv",
178
+ "groups": {
179
+ "csv": [
180
+ {
181
+ "default": "<from file>",
182
+ "name": "columns",
183
+ "type": {
184
+ "type": "<class 'str'>"
185
+ }
186
+ },
187
+ {
188
+ "default": "<auto>",
189
+ "name": "separator",
190
+ "type": {
191
+ "type": "<class 'str'>"
192
+ }
193
+ }
194
+ ],
195
+ "excel": [
196
+ {
197
+ "default": "Sheet1",
198
+ "name": "sheet_name",
199
+ "type": {
200
+ "type": "<class 'str'>"
201
+ }
202
+ }
203
+ ],
204
+ "json": [],
205
+ "parquet": []
206
+ },
207
+ "name": "file_format",
208
+ "selector": {
209
+ "default": "csv",
210
+ "name": "file_format",
211
+ "type": {
212
+ "enum": [
213
+ "csv",
214
+ "parquet",
215
+ "json",
216
+ "excel"
217
+ ]
218
+ }
219
+ },
220
+ "type": "group"
221
+ },
222
+ "file_path": {
223
+ "default": null,
224
+ "name": "file_path",
225
+ "type": {
226
+ "type": "<class 'str'>"
227
+ }
228
+ },
229
+ "table_name": {
230
+ "default": null,
231
+ "name": "table_name",
232
+ "type": {
233
+ "type": "<class 'str'>"
234
+ }
235
+ }
236
+ },
237
+ "type": "basic"
238
+ },
239
+ "params": {
240
+ "columns": "<from file>",
241
+ "file_format": "csv",
242
+ "file_path": "uploads/molecules.csv",
243
+ "separator": "<auto>",
244
+ "table_name": null
245
+ },
246
+ "status": "done",
247
+ "title": "Import file"
248
+ },
249
+ "dragHandle": ".bg-primary",
250
+ "height": 436.0,
251
+ "id": "Import file 2",
252
+ "position": {
253
+ "x": 62.887657256500006,
254
+ "y": 1380.6697994924546
255
+ },
256
+ "type": "basic",
257
+ "width": 311.0
258
+ },
259
+ {
260
+ "data": {
261
+ "display": null,
262
+ "error": null,
263
+ "meta": {
264
+ "inputs": {
265
+ "bundle": {
266
+ "name": "bundle",
267
+ "position": "left",
268
+ "type": {
269
+ "type": "<class 'lynxkite_graph_analytics.core.Bundle'>"
270
+ }
271
+ }
272
+ },
273
+ "name": "Query GenMol",
274
+ "outputs": {
275
+ "output": {
276
+ "name": "output",
277
+ "position": "right",
278
+ "type": {
279
+ "type": "None"
280
+ }
281
+ }
282
+ },
283
+ "params": {
284
+ "molecule_column": {
285
+ "default": null,
286
+ "name": "molecule_column",
287
+ "type": {
288
+ "type": "<class 'str'>"
289
+ }
290
+ },
291
+ "molecule_table": {
292
+ "default": null,
293
+ "name": "molecule_table",
294
+ "type": {
295
+ "type": "<class 'str'>"
296
+ }
297
+ },
298
+ "noise": {
299
+ "default": 0.2,
300
+ "name": "noise",
301
+ "type": {
302
+ "type": "<class 'float'>"
303
+ }
304
+ },
305
+ "num_molecules": {
306
+ "default": 5.0,
307
+ "name": "num_molecules",
308
+ "type": {
309
+ "type": "<class 'int'>"
310
+ }
311
+ },
312
+ "scoring": {
313
+ "default": "QED",
314
+ "name": "scoring",
315
+ "type": {
316
+ "type": "<class 'str'>"
317
+ }
318
+ },
319
+ "step_size": {
320
+ "default": 4.0,
321
+ "name": "step_size",
322
+ "type": {
323
+ "type": "<class 'int'>"
324
+ }
325
+ },
326
+ "temperature": {
327
+ "default": 1.0,
328
+ "name": "temperature",
329
+ "type": {
330
+ "type": "<class 'float'>"
331
+ }
332
+ }
333
+ },
334
+ "position": {
335
+ "x": 594.0,
336
+ "y": 633.0
337
+ },
338
+ "type": "basic"
339
+ },
340
+ "params": {
341
+ "molecule_column": null,
342
+ "molecule_table": null,
343
+ "noise": 0.2,
344
+ "num_molecules": 5.0,
345
+ "scoring": "QED",
346
+ "step_size": 4.0,
347
+ "temperature": 1.0
348
+ },
349
+ "status": "done",
350
+ "title": "Query GenMol"
351
+ },
352
+ "dragHandle": ".bg-primary",
353
+ "height": 601.0,
354
+ "id": "Query GenMol 1",
355
+ "position": {
356
+ "x": 663.3333333333335,
357
+ "y": 1283.3333333333335
358
+ },
359
+ "type": "basic",
360
+ "width": 358.0
361
+ },
362
+ {
363
+ "data": {
364
+ "collapsed": null,
365
+ "display": null,
366
+ "error": null,
367
+ "meta": {
368
+ "inputs": {
369
+ "bundle": {
370
+ "name": "bundle",
371
+ "position": "left",
372
+ "type": {
373
+ "type": "<class 'lynxkite_graph_analytics.core.Bundle'>"
374
+ }
375
+ }
376
+ },
377
+ "name": "MSA-search",
378
+ "outputs": {
379
+ "output": {
380
+ "name": "output",
381
+ "position": "right",
382
+ "type": {
383
+ "type": "None"
384
+ }
385
+ }
386
+ },
387
+ "params": {
388
+ "databases": {
389
+ "default": "[\"Uniref30_2302\", \"colabfold_envdb_202108\", \"PDB70_220313\"]",
390
+ "name": "databases",
391
+ "type": {
392
+ "type": "<class 'str'>"
393
+ }
394
+ },
395
+ "e_value": {
396
+ "default": 0.0001,
397
+ "name": "e_value",
398
+ "type": {
399
+ "type": "<class 'float'>"
400
+ }
401
+ },
402
+ "iterations": {
403
+ "default": 1.0,
404
+ "name": "iterations",
405
+ "type": {
406
+ "type": "<class 'int'>"
407
+ }
408
+ },
409
+ "output_alignment_formats": {
410
+ "default": [
411
+ "fasta",
412
+ "a3m"
413
+ ],
414
+ "name": "output_alignment_formats",
415
+ "type": {
416
+ "type": "list[lynxkite_bio.nims.AlignmentFormats]"
417
+ }
418
+ },
419
+ "protein_column": {
420
+ "default": null,
421
+ "name": "protein_column",
422
+ "type": {
423
+ "type": "<class 'str'>"
424
+ }
425
+ },
426
+ "protein_table": {
427
+ "default": null,
428
+ "name": "protein_table",
429
+ "type": {
430
+ "type": "<class 'str'>"
431
+ }
432
+ },
433
+ "search_type": {
434
+ "default": "ALPHAFOLD2",
435
+ "name": "search_type",
436
+ "type": {
437
+ "enum": [
438
+ "ALPHAFOLD2",
439
+ "ESM2"
440
+ ]
441
+ }
442
+ }
443
+ },
444
+ "position": {
445
+ "x": 576.0,
446
+ "y": 228.0
447
+ },
448
+ "type": "basic"
449
+ },
450
+ "params": {
451
+ "databases": "[\"Uniref30_2302\", \"colabfold_envdb_202108\", \"PDB70_220313\"]",
452
+ "e_value": 0.0001,
453
+ "iterations": 1.0,
454
+ "output_alignment_formats": [
455
+ "fasta",
456
+ "a3m"
457
+ ],
458
+ "protein_column": null,
459
+ "protein_table": null,
460
+ "search_type": "ALPHAFOLD2"
461
+ },
462
+ "status": "done",
463
+ "title": "MSA-search"
464
+ },
465
+ "dragHandle": ".bg-primary",
466
+ "height": 550.0,
467
+ "id": "MSA-search 1",
468
+ "position": {
469
+ "x": -45.0,
470
+ "y": 570.0
471
+ },
472
+ "type": "basic",
473
+ "width": 531.0
474
+ },
475
+ {
476
+ "data": {
477
+ "display": null,
478
+ "error": null,
479
+ "meta": {
480
+ "inputs": {
481
+ "ligands": {
482
+ "name": "ligands",
483
+ "position": "left",
484
+ "type": {
485
+ "type": "<class 'lynxkite_graph_analytics.core.Bundle'>"
486
+ }
487
+ },
488
+ "proteins": {
489
+ "name": "proteins",
490
+ "position": "left",
491
+ "type": {
492
+ "type": "<class 'lynxkite_graph_analytics.core.Bundle'>"
493
+ }
494
+ }
495
+ },
496
+ "name": "Query DiffDock",
497
+ "outputs": {
498
+ "output": {
499
+ "name": "output",
500
+ "position": "right",
501
+ "type": {
502
+ "type": "None"
503
+ }
504
+ }
505
+ },
506
+ "params": {
507
+ "ligand_column": {
508
+ "default": null,
509
+ "name": "ligand_column",
510
+ "type": {
511
+ "type": "<class 'str'>"
512
+ }
513
+ },
514
+ "ligand_table": {
515
+ "default": null,
516
+ "name": "ligand_table",
517
+ "type": {
518
+ "type": "<class 'str'>"
519
+ }
520
+ },
521
+ "num_poses": {
522
+ "default": 10.0,
523
+ "name": "num_poses",
524
+ "type": {
525
+ "type": "<class 'int'>"
526
+ }
527
+ },
528
+ "num_steps": {
529
+ "default": 18.0,
530
+ "name": "num_steps",
531
+ "type": {
532
+ "type": "<class 'int'>"
533
+ }
534
+ },
535
+ "protein_column": {
536
+ "default": null,
537
+ "name": "protein_column",
538
+ "type": {
539
+ "type": "<class 'str'>"
540
+ }
541
+ },
542
+ "protein_table": {
543
+ "default": null,
544
+ "name": "protein_table",
545
+ "type": {
546
+ "type": "<class 'str'>"
547
+ }
548
+ },
549
+ "time_divisions": {
550
+ "default": 20.0,
551
+ "name": "time_divisions",
552
+ "type": {
553
+ "type": "<class 'int'>"
554
+ }
555
+ }
556
+ },
557
+ "position": {
558
+ "x": 852.0,
559
+ "y": 432.0
560
+ },
561
+ "type": "basic"
562
+ },
563
+ "params": {
564
+ "ligand_column": null,
565
+ "ligand_table": null,
566
+ "num_poses": 10.0,
567
+ "num_steps": 18.0,
568
+ "protein_column": null,
569
+ "protein_table": null,
570
+ "time_divisions": 20.0
571
+ },
572
+ "status": "done",
573
+ "title": "Query DiffDock"
574
+ },
575
+ "dragHandle": ".bg-primary",
576
+ "height": 635.0,
577
+ "id": "Query DiffDock 1",
578
+ "position": {
579
+ "x": 1543.010053920781,
580
+ "y": 1167.386382170133
581
+ },
582
+ "type": "basic",
583
+ "width": 408.0
584
+ },
585
+ {
586
+ "data": {
587
+ "display": {
588
+ "series": [
589
+ {
590
+ "data": [
591
+ {
592
+ "name": "Hydrogen",
593
+ "value": 2
594
+ },
595
+ {
596
+ "name": "Sulfur",
597
+ "value": 1
598
+ },
599
+ {
600
+ "name": "Oxygen",
601
+ "value": 4
602
+ }
603
+ ],
604
+ "itemStyle": {
605
+ "borderColor": "#fff",
606
+ "borderRadius": 10,
607
+ "borderWidth": 2
608
+ },
609
+ "radius": [
610
+ "40%",
611
+ "70%"
612
+ ],
613
+ "type": "pie"
614
+ }
615
+ ]
616
+ },
617
+ "error": null,
618
+ "meta": {
619
+ "inputs": {
620
+ "bundle": {
621
+ "name": "bundle",
622
+ "position": "left",
623
+ "type": {
624
+ "type": "<class 'lynxkite_graph_analytics.core.Bundle'>"
625
+ }
626
+ }
627
+ },
628
+ "name": "View molecules",
629
+ "outputs": {},
630
+ "params": {
631
+ "color": {
632
+ "default": "spectrum",
633
+ "name": "color",
634
+ "type": {
635
+ "type": "<class 'str'>"
636
+ }
637
+ },
638
+ "molecule_column": {
639
+ "default": null,
640
+ "name": "molecule_column",
641
+ "type": {
642
+ "type": "<class 'str'>"
643
+ }
644
+ },
645
+ "molecule_table": {
646
+ "default": null,
647
+ "name": "molecule_table",
648
+ "type": {
649
+ "type": "<class 'str'>"
650
+ }
651
+ }
652
+ },
653
+ "position": {
654
+ "x": 1009.0,
655
+ "y": 124.0
656
+ },
657
+ "type": "visualization"
658
+ },
659
+ "params": {
660
+ "color": "spectrum",
661
+ "molecule_column": null,
662
+ "molecule_table": null
663
+ },
664
+ "status": "done",
665
+ "title": "View molecules"
666
+ },
667
+ "dragHandle": ".bg-primary",
668
+ "height": 200.0,
669
+ "id": "View molecules 3",
670
+ "position": {
671
+ "x": 1545.0,
672
+ "y": 585.0
673
+ },
674
+ "type": "visualization",
675
+ "width": 200.0
676
+ },
677
+ {
678
+ "data": {
679
+ "display": {
680
+ "series": [
681
+ {
682
+ "data": [
683
+ {
684
+ "name": "Hydrogen",
685
+ "value": 2
686
+ },
687
+ {
688
+ "name": "Sulfur",
689
+ "value": 1
690
+ },
691
+ {
692
+ "name": "Oxygen",
693
+ "value": 4
694
+ }
695
+ ],
696
+ "itemStyle": {
697
+ "borderColor": "#fff",
698
+ "borderRadius": 10,
699
+ "borderWidth": 2
700
+ },
701
+ "radius": [
702
+ "40%",
703
+ "70%"
704
+ ],
705
+ "type": "pie"
706
+ }
707
+ ]
708
+ },
709
+ "error": null,
710
+ "meta": {
711
+ "inputs": {
712
+ "bundle": {
713
+ "name": "bundle",
714
+ "position": "left",
715
+ "type": {
716
+ "type": "<class 'lynxkite_graph_analytics.core.Bundle'>"
717
+ }
718
+ }
719
+ },
720
+ "name": "View molecules",
721
+ "outputs": {},
722
+ "params": {
723
+ "color": {
724
+ "default": "spectrum",
725
+ "name": "color",
726
+ "type": {
727
+ "type": "<class 'str'>"
728
+ }
729
+ },
730
+ "molecule_column": {
731
+ "default": null,
732
+ "name": "molecule_column",
733
+ "type": {
734
+ "type": "<class 'str'>"
735
+ }
736
+ },
737
+ "molecule_table": {
738
+ "default": null,
739
+ "name": "molecule_table",
740
+ "type": {
741
+ "type": "<class 'str'>"
742
+ }
743
+ }
744
+ },
745
+ "position": {
746
+ "x": 859.0,
747
+ "y": 225.0
748
+ },
749
+ "type": "visualization"
750
+ },
751
+ "params": {
752
+ "color": "spectrum",
753
+ "molecule_column": null,
754
+ "molecule_table": null
755
+ },
756
+ "status": "done",
757
+ "title": "View molecules"
758
+ },
759
+ "dragHandle": ".bg-primary",
760
+ "height": 200.0,
761
+ "id": "View molecules 1",
762
+ "position": {
763
+ "x": 2230.0,
764
+ "y": 1598.3333333333333
765
+ },
766
+ "type": "visualization",
767
+ "width": 200.0
768
+ },
769
+ {
770
+ "data": {
771
+ "display": null,
772
+ "error": null,
773
+ "meta": {
774
+ "inputs": {
775
+ "bundle": {
776
+ "name": "bundle",
777
+ "position": "left",
778
+ "type": {
779
+ "type": "<class 'lynxkite_graph_analytics.core.Bundle'>"
780
+ }
781
+ }
782
+ },
783
+ "name": "Query OpenFold2",
784
+ "outputs": {
785
+ "output": {
786
+ "name": "output",
787
+ "position": "right",
788
+ "type": {
789
+ "type": "None"
790
+ }
791
+ }
792
+ },
793
+ "params": {
794
+ "alignment_column": {
795
+ "default": null,
796
+ "name": "alignment_column",
797
+ "type": {
798
+ "type": "<class 'str'>"
799
+ }
800
+ },
801
+ "alignment_table": {
802
+ "default": null,
803
+ "name": "alignment_table",
804
+ "type": {
805
+ "type": "<class 'str'>"
806
+ }
807
+ },
808
+ "databases": {
809
+ "default": "[\"Uniref30_2302\", \"colabfold_envdb_202108\", \"PDB70_220313\"]",
810
+ "name": "databases",
811
+ "type": {
812
+ "type": "<class 'str'>"
813
+ }
814
+ },
815
+ "protein_column": {
816
+ "default": null,
817
+ "name": "protein_column",
818
+ "type": {
819
+ "type": "<class 'str'>"
820
+ }
821
+ },
822
+ "protein_table": {
823
+ "default": null,
824
+ "name": "protein_table",
825
+ "type": {
826
+ "type": "<class 'str'>"
827
+ }
828
+ },
829
+ "relaxed_prediction": {
830
+ "default": false,
831
+ "name": "relaxed_prediction",
832
+ "type": {
833
+ "type": "<class 'bool'>"
834
+ }
835
+ },
836
+ "use_templates": {
837
+ "default": false,
838
+ "name": "use_templates",
839
+ "type": {
840
+ "type": "<class 'bool'>"
841
+ }
842
+ }
843
+ },
844
+ "position": {
845
+ "x": 628.0,
846
+ "y": 184.0
847
+ },
848
+ "type": "basic"
849
+ },
850
+ "params": {
851
+ "alignment_column": null,
852
+ "alignment_table": null,
853
+ "databases": "[\"Uniref30_2302\", \"colabfold_envdb_202108\", \"PDB70_220313\"]",
854
+ "protein_column": null,
855
+ "protein_table": null,
856
+ "relaxed_prediction": false,
857
+ "use_templates": false
858
+ },
859
+ "status": "done",
860
+ "title": "Query OpenFold2"
861
+ },
862
+ "dragHandle": ".bg-primary",
863
+ "height": 653.0,
864
+ "id": "Query OpenFold2 1",
865
+ "position": {
866
+ "x": 750.0,
867
+ "y": 480.0
868
+ },
869
+ "type": "basic",
870
+ "width": 523.0
871
+ }
872
+ ]
873
+ }
lynxkite-bio/src/lynxkite_bio/__init__.py CHANGED
@@ -1,68 +1,4 @@
1
  """An expansion for `lynxkite-graph-analytics` that provides algorithms for biological applications."""
2
 
3
- from lynxkite_graph_analytics import Bundle, RelationDefinition
4
- from lynxkite.core import ops
5
- import joblib
6
- import numpy as np
7
- import pandas as pd
8
- import rdkit.Chem
9
- import rdkit.Chem.rdFingerprintGenerator
10
- import rdkit.Chem.Fingerprints.ClusterMols
11
- import scipy
12
-
13
- mem = joblib.Memory(".joblib-cache")
14
- ENV = "LynxKite Graph Analytics"
15
- op = ops.op_registration(ENV)
16
-
17
-
18
- @op("Parse SMILES")
19
- def parse_smiles(bundle: Bundle, *, table="df", smiles_column="SMILES", save_as="mols"):
20
- """Parse SMILES strings into RDKit molecules."""
21
- df = bundle.dfs[table]
22
- mols = [rdkit.Chem.MolFromSmiles(smiles) for smiles in df[smiles_column].dropna()]
23
- mols = [mol for mol in mols if mol is not None]
24
- bundle = bundle.copy()
25
- bundle.dfs[table] = df.assign(**{save_as: mols})
26
- return bundle
27
-
28
-
29
- def _get_similarity_matrix(mols):
30
- mfpgen = rdkit.Chem.rdFingerprintGenerator.GetMorganGenerator(radius=2, fpSize=2048)
31
- fps = [(0, mfpgen.GetFingerprint(mol)) for mol in mols]
32
- similarity_matrix = rdkit.Chem.Fingerprints.ClusterMols.GetDistanceMatrix(
33
- fps, metric=rdkit.Chem.DataStructs.TanimotoSimilarity, isSimilarity=1
34
- )
35
- return scipy.spatial.distance.squareform(similarity_matrix)
36
-
37
-
38
- @op("Graph from molecule similarity")
39
- def graph_from_similarity(
40
- bundle: Bundle, *, table="df", mols_column="mols", average_degree=10
41
- ):
42
- """Creates edges for pairs of molecules that are the most similar."""
43
- df = bundle.dfs[table]
44
- mols = df[mols_column]
45
- similarity_matrix = _get_similarity_matrix(mols)
46
- i_idx, j_idx = np.triu_indices_from(similarity_matrix, k=1)
47
- sim_values = similarity_matrix[i_idx, j_idx]
48
- N = int(average_degree * len(mols))
49
- top_n_idx = np.argsort(sim_values)[-N:]
50
- top_n_pairs = [(i_idx[k], j_idx[k], sim_values[k]) for k in top_n_idx]
51
- edges = pd.DataFrame(top_n_pairs, columns=["source", "target", "similarity"])
52
- nodes = df.copy()
53
- nodes.index.name = "id"
54
- bundle = Bundle(
55
- dfs={"edges": edges, "nodes": nodes},
56
- relations=[
57
- RelationDefinition(
58
- df="edges",
59
- source_column="source",
60
- target_column="target",
61
- source_table="nodes",
62
- target_table="nodes",
63
- source_key="id",
64
- target_key="id",
65
- )
66
- ],
67
- )
68
- return bundle
 
1
  """An expansion for `lynxkite-graph-analytics` that provides algorithms for biological applications."""
2
 
3
+ from . import nims # noqa (imported to trigger registration)
4
+ from . import rdkit # noqa (imported to trigger registration)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
lynxkite-bio/src/lynxkite_bio/nims.py ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Wrappers for BioNeMo NIMs."""
2
+
3
+ from enum import Enum
4
+ from lynxkite_graph_analytics import Bundle
5
+ from lynxkite.core import ops
6
+ import joblib
7
+ import os
8
+
9
+ NIM_URLS = os.environ.get("NIM_URLS", "http://localhost:8000").split(",")
10
+
11
+ mem = joblib.Memory(".joblib-cache")
12
+ ENV = "LynxKite Graph Analytics"
13
+ op = ops.op_registration(ENV)
14
+
15
+
16
+ class MSASearchTypes(Enum):
17
+ ALPHAFOLD2 = "ALPHAFOLD2"
18
+ ESM2 = "ESM2"
19
+
20
+
21
+ class AlignmentFormats(Enum):
22
+ FASTA = "fasta"
23
+ A3M = "a3m"
24
+ STOCKHOLM = "stockholm"
25
+ CLUSTAL = "clustal"
26
+ PDB = "pdb"
27
+ PIR = "pir"
28
+ MSF = "msf"
29
+ TSV = "tsv"
30
+
31
+
32
+ @op("MSA-search")
33
+ @mem.cache
34
+ def msa_search(
35
+ bundle: Bundle,
36
+ *,
37
+ protein_table: str,
38
+ protein_column: str,
39
+ e_value: float = 0.0001,
40
+ iterations: int = 1,
41
+ search_type: MSASearchTypes = MSASearchTypes.ALPHAFOLD2,
42
+ output_alignment_formats: list[AlignmentFormats] = [
43
+ AlignmentFormats.FASTA,
44
+ AlignmentFormats.A3M,
45
+ ],
46
+ databases: str = '["Uniref30_2302", "colabfold_envdb_202108", "PDB70_220313"]',
47
+ ):
48
+ bundle = bundle.copy()
49
+ return bundle
50
+
51
+
52
+ @op("Query OpenFold2")
53
+ @mem.cache
54
+ def query_openfold2(
55
+ bundle: Bundle,
56
+ *,
57
+ protein_table: str,
58
+ protein_column: str,
59
+ alignment_table: str,
60
+ alignment_column: str,
61
+ use_templates: bool = False,
62
+ relaxed_prediction: bool = False,
63
+ databases: str = '["Uniref30_2302", "colabfold_envdb_202108", "PDB70_220313"]',
64
+ ):
65
+ bundle = bundle.copy()
66
+ return bundle
67
+
68
+
69
+ @op("View molecules", view="visualization")
70
+ def view_molecules(
71
+ bundle: Bundle,
72
+ *,
73
+ molecule_table: str,
74
+ molecule_column: str,
75
+ color="spectrum",
76
+ ):
77
+ return {
78
+ "series": [
79
+ {
80
+ "type": "pie",
81
+ "radius": ["40%", "70%"],
82
+ "itemStyle": {
83
+ "borderRadius": 10,
84
+ "borderColor": "#fff",
85
+ "borderWidth": 2,
86
+ },
87
+ "data": [
88
+ {"value": 2, "name": "Hydrogen"},
89
+ {"value": 1, "name": "Sulfur"},
90
+ {"value": 4, "name": "Oxygen"},
91
+ ],
92
+ }
93
+ ]
94
+ }
95
+
96
+
97
+ @op("Known drug")
98
+ def known_drug(*, drug_name: str):
99
+ return Bundle()
100
+
101
+
102
+ @op("Query GenMol")
103
+ @mem.cache
104
+ def query_genmol(
105
+ bundle: Bundle,
106
+ *,
107
+ molecule_table: str,
108
+ molecule_column: str,
109
+ num_molecules: int = 5,
110
+ temperature: float = 1.0,
111
+ noise: float = 0.2,
112
+ step_size: int = 4,
113
+ scoring: str = "QED",
114
+ ):
115
+ bundle = bundle.copy()
116
+ return bundle
117
+
118
+
119
+ @op("Query DiffDock")
120
+ @mem.cache
121
+ def query_diffdock(
122
+ proteins: Bundle,
123
+ ligands: Bundle,
124
+ *,
125
+ protein_table: str,
126
+ protein_column: str,
127
+ ligand_table: str,
128
+ ligand_column: str,
129
+ num_poses=10,
130
+ time_divisions=20,
131
+ num_steps=18,
132
+ ):
133
+ return proteins
lynxkite-bio/src/lynxkite_bio/rdkit.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """An expansion for `lynxkite-graph-analytics` that provides algorithms for biological applications."""
2
+
3
+ from lynxkite_graph_analytics import Bundle, RelationDefinition
4
+ from lynxkite.core import ops
5
+ import joblib
6
+ import numpy as np
7
+ import pandas as pd
8
+ import rdkit.Chem
9
+ import rdkit.Chem.rdFingerprintGenerator
10
+ import rdkit.Chem.Fingerprints.ClusterMols
11
+ import scipy
12
+
13
+ mem = joblib.Memory(".joblib-cache")
14
+ ENV = "LynxKite Graph Analytics"
15
+ op = ops.op_registration(ENV)
16
+
17
+
18
+ @op("Parse SMILES")
19
+ def parse_smiles(bundle: Bundle, *, table="df", smiles_column="SMILES", save_as="mols"):
20
+ """Parse SMILES strings into RDKit molecules."""
21
+ df = bundle.dfs[table]
22
+ mols = [rdkit.Chem.MolFromSmiles(smiles) for smiles in df[smiles_column].dropna()]
23
+ mols = [mol for mol in mols if mol is not None]
24
+ bundle = bundle.copy()
25
+ bundle.dfs[table] = df.assign(**{save_as: mols})
26
+ return bundle
27
+
28
+
29
+ def _get_similarity_matrix(mols):
30
+ mfpgen = rdkit.Chem.rdFingerprintGenerator.GetMorganGenerator(radius=2, fpSize=2048)
31
+ fps = [(0, mfpgen.GetFingerprint(mol)) for mol in mols]
32
+ similarity_matrix = rdkit.Chem.Fingerprints.ClusterMols.GetDistanceMatrix(
33
+ fps, metric=rdkit.Chem.DataStructs.TanimotoSimilarity, isSimilarity=1
34
+ )
35
+ return scipy.spatial.distance.squareform(similarity_matrix)
36
+
37
+
38
+ @op("Graph from molecule similarity")
39
+ def graph_from_similarity(
40
+ bundle: Bundle, *, table="df", mols_column="mols", average_degree=10
41
+ ):
42
+ """Creates edges for pairs of molecules that are the most similar."""
43
+ df = bundle.dfs[table]
44
+ mols = df[mols_column]
45
+ similarity_matrix = _get_similarity_matrix(mols)
46
+ i_idx, j_idx = np.triu_indices_from(similarity_matrix, k=1)
47
+ sim_values = similarity_matrix[i_idx, j_idx]
48
+ N = int(average_degree * len(mols))
49
+ top_n_idx = np.argsort(sim_values)[-N:]
50
+ top_n_pairs = [(i_idx[k], j_idx[k], sim_values[k]) for k in top_n_idx]
51
+ edges = pd.DataFrame(top_n_pairs, columns=["source", "target", "similarity"])
52
+ nodes = df.copy()
53
+ nodes.index.name = "id"
54
+ bundle = Bundle(
55
+ dfs={"edges": edges, "nodes": nodes},
56
+ relations=[
57
+ RelationDefinition(
58
+ df="edges",
59
+ source_column="source",
60
+ target_column="target",
61
+ source_table="nodes",
62
+ target_table="nodes",
63
+ source_key="id",
64
+ target_key="id",
65
+ )
66
+ ],
67
+ )
68
+ return bundle