Spaces:

lynx-analytics
/

lynxkite

Running

App Files Files Community

abhik1368

darabos commited on Apr 28

Commit

9d40ea2

unverified ·

1 Parent(s): 11146c1

Update Cheminformatrics use Cases (#155)

Browse files

* Update Cheminformatrics use Cases

Add _cheminfo_tools.py with lipinksi filter , View Mol Image , View mol filter with smarts and smiles and highlights are done .

* Delete binary files. Create "Cheminformatics" folder for examples.

* MACCSkeys was undefined. rows was unused.

* Give ops human-readable names.

* Example workspace without images.

* Collapse parameters on "image" type boxes.

* Allow slow visualization boxes too.

* Simpler gallery drawing.

* Output for visualizations is now left empty.

---------

Co-authored-by: Daniel Darabos <[email protected]>

Files changed (10) hide show

examples/Cheminformatics/Example workspace.lynxkite.json +986 -0
examples/Cheminformatics/cheminfo_tools.py +305 -0
examples/Image table.lynxkite.json +13 -13
examples/draw_molecules.py +2 -0
examples/uploads/CHEMBL313_sel.csv +109 -0
lynxkite-app/web/src/workspace/nodes/NodeWithImage.tsx +1 -1
lynxkite-core/src/lynxkite/core/ops.py +9 -9
lynxkite-core/src/lynxkite/core/workspace.py +8 -5
lynxkite-core/tests/test_ops.py +1 -1
lynxkite-graph-analytics/src/lynxkite_graph_analytics/core.py +1 -0

examples/Cheminformatics/Example workspace.lynxkite.json ADDED Viewed

	@@ -0,0 +1,986 @@

+{
+  "edges": [
+    {
+      "id": "Import CSV 1 Draw molecules 1",
+      "source": "Import CSV 1",
+      "sourceHandle": "output",
+      "target": "Draw molecules 1",
+      "targetHandle": "df"
+    },
+    {
+      "id": "Draw molecules 1 View tables 1",
+      "source": "Draw molecules 1",
+      "sourceHandle": "output",
+      "target": "View tables 1",
+      "targetHandle": "bundle"
+    },
+    {
+      "id": "Import file 1 View mol filter 1",
+      "source": "Import file 1",
+      "sourceHandle": "output",
+      "target": "View mol filter 1",
+      "targetHandle": "bundle"
+    },
+    {
+      "id": "Draw molecules 2 View tables 2",
+      "source": "Draw molecules 2",
+      "sourceHandle": "output",
+      "target": "View tables 2",
+      "targetHandle": "bundle"
+    },
+    {
+      "id": "Import file 1 Train QSAR model 1",
+      "source": "Import file 1",
+      "sourceHandle": "output",
+      "target": "Train QSAR model 1",
+      "targetHandle": "bundle"
+    },
+    {
+      "id": "Train QSAR model 1 View tables 3",
+      "source": "Train QSAR model 1",
+      "sourceHandle": "output",
+      "target": "View tables 3",
+      "targetHandle": "bundle"
+    },
+    {
+      "id": "Import file 1 Lipinski filter 1",
+      "source": "Import file 1",
+      "sourceHandle": "output",
+      "target": "Lipinski filter 1",
+      "targetHandle": "bundle"
+    },
+    {
+      "id": "Lipinski filter 1 Draw molecules 2",
+      "source": "Lipinski filter 1",
+      "sourceHandle": "output",
+      "target": "Draw molecules 2",
+      "targetHandle": "df"
+    },
+    {
+      "id": "Import file 1 View mol image 1",
+      "source": "Import file 1",
+      "sourceHandle": "output",
+      "target": "View mol image 1",
+      "targetHandle": "bundle"
+    }
+  ],
+  "env": "LynxKite Graph Analytics",
+  "nodes": [
+    {
+      "data": {
+        "__execution_delay": 0.0,
+        "collapsed": false,
+        "display": null,
+        "error": null,
+        "input_metadata": [],
+        "meta": {
+          "color": "orange",
+          "inputs": [],
+          "name": "Import CSV",
+          "outputs": [
+            {
+              "name": "output",
+              "position": "right",
+              "type": {
+                "type": "None"
+              }
+            }
+          ],
+          "params": [
+            {
+              "default": null,
+              "name": "filename",
+              "type": {
+                "type": "<class 'str'>"
+              }
+            },
+            {
+              "default": "<from file>",
+              "name": "columns",
+              "type": {
+                "type": "<class 'str'>"
+              }
+            },
+            {
+              "default": "<auto>",
+              "name": "separator",
+              "type": {
+                "type": "<class 'str'>"
+              }
+            }
+          ],
+          "type": "basic"
+        },
+        "params": {
+          "columns": "<from file>",
+          "filename": "uploads/CHEMBL313_sel.csv",
+          "separator": "<auto>"
+        },
+        "status": "done",
+        "title": "Import CSV"
+      },
+      "dragHandle": ".bg-primary",
+      "height": 206.0,
+      "id": "Import CSV 1",
+      "position": {
+        "x": -1645.1133255451734,
+        "y": -601.1570975227908
+      },
+      "type": "basic",
+      "width": 314.0
+    },
+    {
+      "data": {
+        "__execution_delay": null,
+        "collapsed": false,
+        "error": null,
+        "input_metadata": [
+          {
+            "dataframes": {
+              "df": {
+                "columns": [
+                  "Name",
+                  "SMILES",
+                  "image",
+                  "pIC50"
+                ]
+              }
+            },
+            "other": {},
+            "relations": []
+          }
+        ],
+        "meta": {
+          "color": "orange",
+          "inputs": [
+            {
+              "name": "bundle",
+              "position": "left",
+              "type": {
+                "type": "<class 'lynxkite_graph_analytics.core.Bundle'>"
+              }
+            }
+          ],
+          "name": "View tables",
+          "outputs": [],
+          "params": [
+            {
+              "default": 100,
+              "name": "limit",
+              "type": {
+                "type": "<class 'int'>"
+              }
+            }
+          ],
+          "type": "table_view"
+        },
+        "params": {},
+        "status": "done",
+        "title": "View tables"
+      },
+      "dragHandle": ".bg-primary",
+      "height": 296.0,
+      "id": "View tables 1",
+      "position": {
+        "x": -870.3210345250492,
+        "y": -671.6118451917321
+      },
+      "type": "table_view",
+      "width": 682.0
+    },
+    {
+      "data": {
+        "__execution_delay": 0.0,
+        "collapsed": null,
+        "display": null,
+        "error": null,
+        "input_metadata": [],
+        "meta": {
+          "color": "orange",
+          "inputs": [],
+          "name": "Import file",
+          "outputs": [
+            {
+              "name": "output",
+              "position": "right",
+              "type": {
+                "type": "None"
+              }
+            }
+          ],
+          "params": [
+            {
+              "default": null,
+              "name": "file_path",
+              "type": {
+                "type": "<class 'str'>"
+              }
+            },
+            {
+              "default": null,
+              "name": "table_name",
+              "type": {
+                "type": "<class 'str'>"
+              }
+            },
+            {
+              "default": null,
+              "name": "file_format",
+              "type": {
+                "enum": [
+                  "csv",
+                  "parquet",
+                  "json",
+                  "excel"
+                ]
+              }
+            },
+            {
+              "default": "csv",
+              "groups": {
+                "csv": [
+                  {
+                    "default": "<from file>",
+                    "name": "columns",
+                    "type": {
+                      "type": "<class 'str'>"
+                    }
+                  },
+                  {
+                    "default": "<auto>",
+                    "name": "separator",
+                    "type": {
+                      "type": "<class 'str'>"
+                    }
+                  }
+                ],
+                "excel": [
+                  {
+                    "default": "Sheet1",
+                    "name": "sheet_name",
+                    "type": {
+                      "type": "<class 'str'>"
+                    }
+                  }
+                ],
+                "json": [],
+                "parquet": []
+              },
+              "name": "file_format_group",
+              "selector": {
+                "default": "csv",
+                "name": "file_format",
+                "type": {
+                  "enum": [
+                    "csv",
+                    "parquet",
+                    "json",
+                    "excel"
+                  ]
+                }
+              },
+              "type": "group"
+            }
+          ],
+          "type": "basic"
+        },
+        "params": {
+          "columns": "<from file>",
+          "file_format": "csv",
+          "file_format_group": "csv",
+          "file_path": "uploads/CHEMBL313_sel.csv",
+          "table_name": "data"
+        },
+        "status": "done",
+        "title": "Import file"
+      },
+      "dragHandle": ".bg-primary",
+      "height": 331.0,
+      "id": "Import file 1",
+      "position": {
+        "x": -1690.536661950572,
+        "y": -192.47882875357882
+      },
+      "type": "basic",
+      "width": 326.0
+    },
+    {
+      "data": {
+        "__execution_delay": 0.0,
+        "collapsed": null,
+        "display": null,
+        "error": null,
+        "input_metadata": [
+          {}
+        ],
+        "meta": {
+          "color": "orange",
+          "inputs": [
+            {
+              "name": "df",
+              "position": "left",
+              "type": {
+                "type": "<class 'pandas.core.frame.DataFrame'>"
+              }
+            }
+          ],
+          "name": "Draw molecules",
+          "outputs": [
+            {
+              "name": "output",
+              "position": "right",
+              "type": {
+                "type": "None"
+              }
+            }
+          ],
+          "params": [
+            {
+              "default": null,
+              "name": "smiles_column",
+              "type": {
+                "type": "<class 'str'>"
+              }
+            },
+            {
+              "default": "image",
+              "name": "image_column",
+              "type": {
+                "type": "<class 'str'>"
+              }
+            }
+          ],
+          "type": "basic"
+        },
+        "params": {
+          "image_column": "image",
+          "smiles_column": "SMILES"
+        },
+        "status": "done",
+        "title": "Draw molecules"
+      },
+      "dragHandle": ".bg-primary",
+      "height": 225.0,
+      "id": "Draw molecules 1",
+      "position": {
+        "x": -1168.2624512141447,
+        "y": -616.8610881973451
+      },
+      "type": "basic",
+      "width": 234.0
+    },
+    {
+      "data": {
+        "__execution_delay": 0.0,
+        "collapsed": null,
+        "error": null,
+        "input_metadata": [
+          {
+            "dataframes": {
+              "data": {
+                "columns": [
+                  "Name",
+                  "SMILES",
+                  "mol",
+                  "pIC50"
+                ]
+              }
+            },
+            "other": {},
+            "relations": []
+          }
+        ],
+        "meta": {
+          "color": "orange",
+          "inputs": [
+            {
+              "name": "bundle",
+              "position": "left",
+              "type": {
+                "type": "<class 'inspect._empty'>"
+              }
+            }
+          ],
+          "name": "View mol filter",
+          "outputs": [],
+          "params": [
+            {
+              "default": null,
+              "name": "table_name",
+              "type": {
+                "type": "<class 'str'>"
+              }
+            },
+            {
+              "default": null,
+              "name": "SMILES_Column",
+              "type": {
+                "type": "<class 'str'>"
+              }
+            },
+            {
+              "default": null,
+              "name": "mols_per_row",
+              "type": {
+                "type": "<class 'int'>"
+              }
+            },
+            {
+              "default": null,
+              "name": "filter_smarts",
+              "type": {
+                "type": "<class 'str'>"
+              }
+            },
+            {
+              "default": null,
+              "name": "filter_smiles",
+              "type": {
+                "type": "<class 'str'>"
+              }
+            },
+            {
+              "default": true,
+              "name": "highlight",
+              "type": {
+                "type": "<class 'bool'>"
+              }
+            }
+          ],
+          "type": "image"
+        },
+        "params": {
+          "SMILES_Column": "SMILES",
+          "filter_smarts": "",
+          "filter_smiles": "CN1C2CC(CC1CC2)c1ccccc1",
+          "highlight": true,
+          "mols_per_row": "4",
+          "table_name": "data"
+        },
+        "status": "done",
+        "title": "View mol filter"
+      },
+      "dragHandle": ".bg-primary",
+      "height": 229.0,
+      "id": "View mol filter 1",
+      "position": {
+        "x": -796.5179679488858,
+        "y": -20.974048336481403
+      },
+      "type": "image",
+      "width": 416.0
+    },
+    {
+      "data": {
+        "__execution_delay": 0.0,
+        "collapsed": null,
+        "display": null,
+        "error": null,
+        "input_metadata": [
+          {}
+        ],
+        "meta": {
+          "color": "orange",
+          "inputs": [
+            {
+              "name": "df",
+              "position": "left",
+              "type": {
+                "type": "<class 'pandas.core.frame.DataFrame'>"
+              }
+            }
+          ],
+          "name": "Draw molecules",
+          "outputs": [
+            {
+              "name": "output",
+              "position": "right",
+              "type": {
+                "type": "None"
+              }
+            }
+          ],
+          "params": [
+            {
+              "default": null,
+              "name": "smiles_column",
+              "type": {
+                "type": "<class 'str'>"
+              }
+            },
+            {
+              "default": "image",
+              "name": "image_column",
+              "type": {
+                "type": "<class 'str'>"
+              }
+            }
+          ],
+          "type": "basic"
+        },
+        "params": {
+          "image_column": "image",
+          "smiles_column": "SMILES"
+        },
+        "status": "done",
+        "title": "Draw molecules"
+      },
+      "dragHandle": ".bg-primary",
+      "height": 184.0,
+      "id": "Draw molecules 2",
+      "position": {
+        "x": -280.6043043496203,
+        "y": 151.45994649399827
+      },
+      "type": "basic",
+      "width": 234.0
+    },
+    {
+      "data": {
+        "error": null,
+        "input_metadata": [
+          {
+            "dataframes": {
+              "df": {
+                "columns": [
+                  "HBA",
+                  "HBD",
+                  "MW",
+                  "Name",
+                  "SMILES",
+                  "image",
+                  "logP",
+                  "pIC50",
+                  "pass_lipinski"
+                ]
+              }
+            },
+            "other": {},
+            "relations": []
+          }
+        ],
+        "meta": {
+          "color": "orange",
+          "inputs": [
+            {
+              "name": "bundle",
+              "position": "left",
+              "type": {
+                "type": "<class 'lynxkite_graph_analytics.core.Bundle'>"
+              }
+            }
+          ],
+          "name": "View tables",
+          "outputs": [],
+          "params": [
+            {
+              "default": 100,
+              "name": "limit",
+              "type": {
+                "type": "<class 'int'>"
+              }
+            }
+          ],
+          "type": "table_view"
+        },
+        "params": {
+          "limit": 100.0
+        },
+        "status": "done",
+        "title": "View tables"
+      },
+      "dragHandle": ".bg-primary",
+      "height": 429.0,
+      "id": "View tables 2",
+      "position": {
+        "x": 50.96521878343134,
+        "y": 102.82896600710055
+      },
+      "type": "table_view",
+      "width": 821.0
+    },
+    {
+      "data": {
+        "__execution_delay": 0.0,
+        "collapsed": null,
+        "display": null,
+        "error": null,
+        "input_metadata": [
+          {
+            "dataframes": {
+              "data": {
+                "columns": [
+                  "Name",
+                  "SMILES",
+                  "pIC50"
+                ]
+              }
+            },
+            "other": {},
+            "relations": []
+          }
+        ],
+        "meta": {
+          "color": "orange",
+          "inputs": [
+            {
+              "name": "bundle",
+              "position": "left",
+              "type": {
+                "type": "<class 'inspect._empty'>"
+              }
+            }
+          ],
+          "name": "Train QSAR model",
+          "outputs": [
+            {
+              "name": "output",
+              "position": "right",
+              "type": {
+                "type": "None"
+              }
+            }
+          ],
+          "params": [
+            {
+              "default": null,
+              "name": "table_name",
+              "type": {
+                "type": "<class 'str'>"
+              }
+            },
+            {
+              "default": null,
+              "name": "smiles_col",
+              "type": {
+                "type": "<class 'str'>"
+              }
+            },
+            {
+              "default": null,
+              "name": "target_col",
+              "type": {
+                "type": "<class 'str'>"
+              }
+            },
+            {
+              "default": null,
+              "name": "fp_type",
+              "type": {
+                "type": "<class 'str'>"
+              }
+            },
+            {
+              "default": 2,
+              "name": "radius",
+              "type": {
+                "type": "<class 'int'>"
+              }
+            },
+            {
+              "default": 2048,
+              "name": "n_bits",
+              "type": {
+                "type": "<class 'int'>"
+              }
+            },
+            {
+              "default": 0.2,
+              "name": "test_size",
+              "type": {
+                "type": "<class 'float'>"
+              }
+            },
+            {
+              "default": 42,
+              "name": "random_state",
+              "type": {
+                "type": "<class 'int'>"
+              }
+            },
+            {
+              "default": "Models",
+              "name": "out_dir",
+              "type": {
+                "type": "<class 'str'>"
+              }
+            }
+          ],
+          "type": "basic"
+        },
+        "params": {
+          "fp_type": "ecfp",
+          "n_bits": 2048.0,
+          "out_dir": "Models",
+          "radius": 2.0,
+          "random_state": 42.0,
+          "smiles_col": "SMILES",
+          "table_name": "data",
+          "target_col": "pIC50",
+          "test_size": 0.2
+        },
+        "status": "done",
+        "title": "Train QSAR model"
+      },
+      "dragHandle": ".bg-primary",
+      "height": 288.0,
+      "id": "Train QSAR model 1",
+      "position": {
+        "x": -1159.342067985799,
+        "y": -309.43299496669476
+      },
+      "type": "basic",
+      "width": 329.0
+    },
+    {
+      "data": {
+        "display": {
+          "dataframes": {
+            "df": {
+              "columns": [
+                "split",
+                "R2",
+                "MAE",
+                "RMSE"
+              ],
+              "data": [
+                [
+                  "train",
+                  0.9417380311136551,
+                  0.24223013565891363,
+                  0.3009354647304392
+                ],
+                [
+                  "test",
+                  0.27586659127848734,
+                  0.8443154545454546,
+                  1.098891477150744
+                ]
+              ]
+            }
+          },
+          "other": {},
+          "relations": []
+        },
+        "error": null,
+        "input_metadata": [
+          {
+            "dataframes": {
+              "df": {
+                "columns": [
+                  "MAE",
+                  "R2",
+                  "RMSE",
+                  "split"
+                ]
+              }
+            },
+            "other": {},
+            "relations": []
+          }
+        ],
+        "meta": {
+          "color": "orange",
+          "inputs": [
+            {
+              "name": "bundle",
+              "position": "left",
+              "type": {
+                "type": "<class 'lynxkite_graph_analytics.core.Bundle'>"
+              }
+            }
+          ],
+          "name": "View tables",
+          "outputs": [],
+          "params": [
+            {
+              "default": 100,
+              "name": "limit",
+              "type": {
+                "type": "<class 'int'>"
+              }
+            }
+          ],
+          "type": "table_view"
+        },
+        "params": {
+          "limit": 100.0
+        },
+        "status": "done",
+        "title": "View tables"
+      },
+      "dragHandle": ".bg-primary",
+      "height": 291.0,
+      "id": "View tables 3",
+      "position": {
+        "x": -757.0926975107354,
+        "y": -321.469271323077
+      },
+      "type": "table_view",
+      "width": 496.0
+    },
+    {
+      "data": {
+        "__execution_delay": 0.0,
+        "collapsed": null,
+        "display": null,
+        "error": null,
+        "input_metadata": [
+          {
+            "dataframes": {
+              "data": {
+                "columns": [
+                  "Name",
+                  "SMILES",
+                  "mol",
+                  "pIC50"
+                ]
+              }
+            },
+            "other": {},
+            "relations": []
+          }
+        ],
+        "meta": {
+          "color": "orange",
+          "inputs": [
+            {
+              "name": "bundle",
+              "position": "left",
+              "type": {
+                "type": "<class 'inspect._empty'>"
+              }
+            }
+          ],
+          "name": "Lipinski filter",
+          "outputs": [
+            {
+              "name": "output",
+              "position": "right",
+              "type": {
+                "type": "None"
+              }
+            }
+          ],
+          "params": [
+            {
+              "default": null,
+              "name": "table_name",
+              "type": {
+                "type": "<class 'str'>"
+              }
+            },
+            {
+              "default": null,
+              "name": "column_name",
+              "type": {
+                "type": "<class 'str'>"
+              }
+            },
+            {
+              "default": true,
+              "name": "strict_lipinski",
+              "type": {
+                "type": "<class 'bool'>"
+              }
+            }
+          ],
+          "type": "basic"
+        },
+        "params": {
+          "column_name": "SMILES",
+          "strict_lipinski": true,
+          "table_name": "data"
+        },
+        "status": "done",
+        "title": "Lipinski filter"
+      },
+      "dragHandle": ".bg-primary",
+      "height": 299.0,
+      "id": "Lipinski filter 1",
+      "position": {
+        "x": -720.0507400376052,
+        "y": 276.1650594718383
+      },
+      "type": "basic",
+      "width": 402.0
+    },
+    {
+      "data": {
+        "__execution_delay": 0.0,
+        "collapsed": null,
+        "error": null,
+        "input_metadata": [
+          {
+            "dataframes": {
+              "data": {
+                "columns": [
+                  "Name",
+                  "SMILES",
+                  "mol",
+                  "pIC50"
+                ]
+              }
+            },
+            "other": {},
+            "relations": []
+          }
+        ],
+        "meta": {
+          "color": "orange",
+          "inputs": [
+            {
+              "name": "bundle",
+              "position": "left",
+              "type": {
+                "type": "<class 'inspect._empty'>"
+              }
+            }
+          ],
+          "name": "View mol image",
+          "outputs": [],
+          "params": [
+            {
+              "default": null,
+              "name": "table_name",
+              "type": {
+                "type": "<class 'str'>"
+              }
+            },
+            {
+              "default": null,
+              "name": "smiles_column",
+              "type": {
+                "type": "<class 'str'>"
+              }
+            },
+            {
+              "default": null,
+              "name": "mols_per_row",
+              "type": {
+                "type": "<class 'int'>"
+              }
+            }
+          ],
+          "type": "image"
+        },
+        "params": {
+          "mols_per_row": "4",
+          "smiles_column": "SMILES",
+          "table_name": "data"
+        },
+        "status": "done",
+        "title": "View mol image"
+      },
+      "dragHandle": ".bg-primary",
+      "height": 309.0,
+      "id": "View mol image 1",
+      "position": {
+        "x": -1378.339896172849,
+        "y": 280.2327514185724
+      },
+      "type": "image",
+      "width": 363.0
+    }
+  ]
+}

examples/Cheminformatics/cheminfo_tools.py ADDED Viewed

	@@ -0,0 +1,305 @@

+import os
+import pickle
+from lynxkite.core.ops import op
+from matplotlib import pyplot as plt
+import pandas as pd
+from rdkit.Chem.Draw import rdMolDraw2D
+from PIL import Image
+from rdkit import Chem
+from rdkit.Chem import Descriptors
+from rdkit.Chem import Crippen, Lipinski
+from rdkit import DataStructs
+import math
+import io
+from rdkit.Chem import AllChem
+from sklearn.ensemble import RandomForestRegressor
+from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
+from sklearn.model_selection import train_test_split
+import numpy as np
+@op("LynxKite Graph Analytics", "View mol filter", view="matplotlib", slow=True)
+def mol_filter(
+    bundle,
+    *,
+    table_name: str,
+    SMILES_Column: str,
+    mols_per_row: int,
+    filter_smarts: str = None,
+    filter_smiles: str = None,
+    highlight: bool = True,
+):
+    """
+    Draws a grid of molecules in square boxes, with optional filtering and substructure highlighting.
+    Parameters:
+    - bundle: data bundle containing a DataFrame in bundle.dfs[table_name]
+    - table_name: name of the table in bundle.dfs
+    - column_name: column containing SMILES strings
+    - mols_per_row: number of molecules per row in the grid
+    - filter_smarts: SMARTS pattern to filter and highlight
+    - filter_smiles: SMILES substructure to filter and highlight (if filter_smarts is None)
+    - highlight: whether to highlight matching substructures
+    """
+    # get DataFrame
+    df = bundle.dfs[table_name].copy()
+    df["mol"] = df[SMILES_Column].apply(Chem.MolFromSmiles)
+    df = df[df["mol"].notnull()].reset_index(drop=True)
+    # compile substructure query if provided
+    query = None
+    if filter_smarts:
+        query = Chem.MolFromSmarts(filter_smarts)
+    elif filter_smiles:
+        query = Chem.MolFromSmiles(filter_smiles)
+    # compute properties and legends
+    df["MW"] = df["mol"].apply(Descriptors.MolWt)
+    df["logP"] = df["mol"].apply(Crippen.MolLogP)
+    df["HBD"] = df["mol"].apply(Lipinski.NumHDonors)
+    df["HBA"] = df["mol"].apply(Lipinski.NumHAcceptors)
+    legends = []
+    for _, row in df.iterrows():
+        mol = row["mol"]
+        # filter by substructure
+        if query and not mol.HasSubstructMatch(query):
+            continue
+        # find atom and bond matches
+        atom_ids, bond_ids = [], []
+        if highlight and query:
+            atom_ids = list(mol.GetSubstructMatch(query))
+            # find bonds where both ends are in atom_ids
+            for bond in mol.GetBonds():
+                a1 = bond.GetBeginAtomIdx()
+                a2 = bond.GetEndAtomIdx()
+                if a1 in atom_ids and a2 in atom_ids:
+                    bond_ids.append(bond.GetIdx())
+        legend = (
+            f"{row['Name']}  pIC50={row['pIC50']:.2f}\n"
+            f"MW={row['MW']:.1f}, logP={row['logP']:.2f}\n"
+            f"HBD={row['HBD']}, HBA={row['HBA']}"
+        )
+        legends.append((mol, legend, atom_ids, bond_ids))
+    if not legends:
+        raise ValueError("No molecules passed the filter.")
+    # draw each filtered molecule
+    images = []
+    for mol, legend, atom_ids, bond_ids in legends:
+        drawer = rdMolDraw2D.MolDraw2DCairo(400, 350)
+        opts = drawer.drawOptions()
+        opts.legendFontSize = 200
+        drawer.DrawMolecule(mol, legend=legend, highlightAtoms=atom_ids, highlightBonds=bond_ids)
+        drawer.FinishDrawing()
+        sub_png = drawer.GetDrawingText()
+        sub_img = Image.open(io.BytesIO(sub_png))
+        images.append(sub_img)
+    plot_gallery(images, num_cols=mols_per_row)
+@op("LynxKite Graph Analytics", "Lipinski filter")
+def lipinski_filter(bundle, *, table_name: str, column_name: str, strict_lipinski: bool = True):
+    # copy bundle and get DataFrame
+    bundle = bundle.copy()
+    df = bundle.dfs[table_name].copy()
+    df["mol"] = df[column_name].apply(Chem.MolFromSmiles)
+    df = df[df["mol"].notnull()].reset_index(drop=True)
+    # compute properties
+    df["MW"] = df["mol"].apply(Descriptors.MolWt)
+    df["logP"] = df["mol"].apply(Crippen.MolLogP)
+    df["HBD"] = df["mol"].apply(Lipinski.NumHDonors)
+    df["HBA"] = df["mol"].apply(Lipinski.NumHAcceptors)
+    # compute a boolean pass/fail for Lipinski
+    df["pass_lipinski"] = (
+        (df["MW"] <= 500) & (df["logP"] <= 5) & (df["HBD"] <= 5) & (df["HBA"] <= 10)
+    )
+    df = df.drop("mol", axis=1)
+    # if strict_lipinski, drop those that fail
+    if strict_lipinski:
+        failed = df.loc[~df["pass_lipinski"], column_name].tolist()
+        df = df[df["pass_lipinski"]].reset_index(drop=True)
+        if failed:
+            print(f"Dropped {len(failed)} molecules that failed Lipinski: {failed}")
+    return df
+@op("LynxKite Graph Analytics", "View mol image", view="matplotlib", slow=True)
+def mol_image(bundle, *, table_name: str, smiles_column: str, mols_per_row: int):
+    df = bundle.dfs[table_name].copy()
+    df["mol"] = df[smiles_column].apply(Chem.MolFromSmiles)
+    df = df[df["mol"].notnull()].reset_index(drop=True)
+    df["MW"] = df["mol"].apply(Descriptors.MolWt)
+    df["logP"] = df["mol"].apply(Crippen.MolLogP)
+    df["HBD"] = df["mol"].apply(Lipinski.NumHDonors)
+    df["HBA"] = df["mol"].apply(Lipinski.NumHAcceptors)
+    legends = []
+    for _, row in df.iterrows():
+        legends.append(
+            f"{row['Name']}  pIC50={row['pIC50']:.2f}\n"
+            f"MW={row['MW']:.1f}, logP={row['logP']:.2f}\n"
+            f"HBD={row['HBD']}, HBA={row['HBA']}"
+        )
+    mols = df["mol"].tolist()
+    if not mols:
+        raise ValueError("No valid molecules to draw.")
+    # --- draw each molecule into its own sub‐image and paste ---
+    images = []
+    for mol, legend in zip(mols, legends):
+        # draw one molecule
+        drawer = rdMolDraw2D.MolDraw2DCairo(400, 350)
+        opts = drawer.drawOptions()
+        opts.legendFontSize = 200
+        drawer.DrawMolecule(mol, legend=legend)
+        drawer.FinishDrawing()
+        sub_png = drawer.GetDrawingText()
+        sub_img = Image.open(io.BytesIO(sub_png))
+        images.append(sub_img)
+    plot_gallery(images, num_cols=mols_per_row)
+def plot_gallery(images, num_cols):
+    num_rows = math.ceil(len(images) / num_cols)
+    fig, axes = plt.subplots(num_rows, num_cols, figsize=(num_cols * 4, num_rows * 3.5))
+    axes = axes.flatten()
+    for i, ax in enumerate(axes):
+        if i < len(images):
+            ax.imshow(images[i])
+        ax.set_xticks([])
+        ax.set_yticks([])
+    plt.tight_layout()
+@op("LynxKite Graph Analytics", "Train QSAR model")
+def build_qsar_model(
+    bundle,
+    *,
+    table_name: str,
+    smiles_col: str,
+    target_col: str,
+    fp_type: str,
+    radius: int = 2,
+    n_bits: int = 2048,
+    test_size: float = 0.2,
+    random_state: int = 42,
+    out_dir: str = "Models",
+):
+    """
+    Train and save a RandomForest QSAR model using one fingerprint type.
+    Parameters
+    ----------
+    bundle : any
+        An object with a dict‐like attribute `.dfs` mapping table names to DataFrames.
+    table_name : str
+        Key into bundle.dfs to get the DataFrame.
+    smiles_col : str
+        Name of the column containing SMILES strings.
+    target_col : str
+        Name of the column containing the numeric response.
+    fp_type : str
+        Fingerprint to compute: "ecfp", "rdkit", "torsion", "atompair", or "maccs".
+    radius : int
+        Radius for the Morgan (ECFP) fingerprint.
+    n_bits : int
+        Bit‐vector length for all fp types except MACCS (167).
+    test_size : float
+        Fraction of data held out for testing.
+    random_state : int
+        Random seed for reproducibility.
+    out_dir : str
+        Directory in which to save `qsar_model_<fp_type>.pkl`.
+    Returns
+    -------
+    model : RandomForestRegressor
+        The trained QSAR model.
+    metrics_df : pandas.DataFrame
+        R², MAE and RMSE on train and test splits.
+    """
+    # 1) load and sanitize data
+    df = bundle.dfs.get(table_name)
+    if df is None:
+        raise KeyError(f"Table '{table_name}' not found in bundle.dfs")
+    df = df.copy()
+    df["mol"] = df[smiles_col].apply(Chem.MolFromSmiles)
+    df = df[df["mol"].notnull()].reset_index(drop=True)
+    if df.empty:
+        raise ValueError(f"No valid molecules in '{smiles_col}'")
+    # 2) create a fixed train/test split
+    indices = np.arange(len(df))
+    train_idx, test_idx = train_test_split(indices, test_size=test_size, random_state=random_state)
+    # 3) featurize
+    fps = []
+    for mol in df["mol"]:
+        if fp_type == "ecfp":
+            bv = AllChem.GetMorganFingerprintAsBitVect(mol, radius, nBits=n_bits)
+            arr = np.zeros((n_bits,), dtype=np.int8)
+            DataStructs.ConvertToNumpyArray(bv, arr)
+        elif fp_type == "rdkit":
+            bv = Chem.RDKFingerprint(mol, fpSize=n_bits)
+            arr = np.zeros((n_bits,), dtype=np.int8)
+            DataStructs.ConvertToNumpyArray(bv, arr)
+        elif fp_type == "torsion":
+            bv = AllChem.GetHashedTopologicalTorsionFingerprintAsBitVect(mol, nBits=n_bits)
+            arr = np.zeros((n_bits,), dtype=np.int8)
+            DataStructs.ConvertToNumpyArray(bv, arr)
+        elif fp_type == "atompair":
+            bv = AllChem.GetHashedAtomPairFingerprintAsBitVect(mol, nBits=n_bits)
+            arr = np.zeros((n_bits,), dtype=np.int8)
+            DataStructs.ConvertToNumpyArray(bv, arr)
+        elif fp_type == "maccs":
+            bv = Chem.MACCSkeys.GenMACCSKeys(mol)  # 167 bits
+            arr = np.zeros((167,), dtype=np.int8)
+            DataStructs.ConvertToNumpyArray(bv, arr)
+        else:
+            raise ValueError(f"Unsupported fingerprint type: '{fp_type}'")
+        fps.append(arr)
+    X = np.vstack(fps)
+    y = df[target_col].values
+    # 4) split features/labels
+    X_train, y_train = X[train_idx], y[train_idx]
+    X_test, y_test = X[test_idx], y[test_idx]
+    # 5) train RandomForest
+    model = RandomForestRegressor(random_state=random_state)
+    model.fit(X_train, y_train)
+    # 6) compute performance metrics
+    def _metrics(y_true, y_pred):
+        mse = mean_squared_error(y_true, y_pred)
+        return {
+            "R2": r2_score(y_true, y_pred),
+            "MAE": mean_absolute_error(y_true, y_pred),
+            "RMSE": np.sqrt(mse),
+        }
+    train_m = _metrics(y_train, model.predict(X_train))
+    test_m = _metrics(y_test, model.predict(X_test))
+    metrics_df = pd.DataFrame([{"split": "train", **train_m}, {"split": "test", **test_m}])
+    # 7) save the model
+    os.makedirs(out_dir, exist_ok=True)
+    model_file = os.path.join(out_dir, f"qsar_model_{fp_type}.pkl")
+    with open(model_file, "wb") as fout:
+        pickle.dump(model, fout)
+    print(f"Trained & saved QSAR model for '{fp_type}' → {model_file}")
+    return metrics_df

examples/Image table.lynxkite.json CHANGED Viewed

@@ -27,7 +27,7 @@
     {
       "data": {
         "__execution_delay": null,
-        "collapsed": true,
         "display": null,
         "error": null,
         "input_metadata": [],
@@ -55,8 +55,8 @@
       "height": 200.0,
       "id": "Example image table 1",
       "position": {
-        "x": 213.60043945845376,
-        "y": 306.98088700969373
       },
       "type": "basic",
       "width": 280.0
@@ -138,8 +138,8 @@
       "height": 440.0,
       "id": "View tables 1",
       "position": {
-        "x": 626.2831607914023,
-        "y": 109.55448939208392
       },
       "type": "table_view",
       "width": 376.0
@@ -198,14 +198,14 @@
         "title": "Import CSV"
       },
       "dragHandle": ".bg-primary",
-      "height": 313.0,
       "id": "Import CSV 1",
       "position": {
         "x": 13.802068621055497,
         "y": -269.65065144888104
       },
       "type": "basic",
-      "width": 216.0
     },
     {
       "data": {
@@ -282,15 +282,15 @@
         "params": {
           "limit": 100.0
         },
-        "status": "done",
         "title": "View tables"
       },
       "dragHandle": ".bg-primary",
       "height": 418.0,
       "id": "View tables 2",
       "position": {
-        "x": 548.7706661684929,
-        "y": -316.9626796191875
       },
       "type": "table_view",
       "width": 1116.0
@@ -300,7 +300,7 @@
         "__execution_delay": 0.0,
         "collapsed": null,
         "display": null,
-        "error": null,
         "input_metadata": [
           {}
         ],
@@ -354,8 +354,8 @@
       "height": 296.0,
       "id": "Draw molecules 1",
       "position": {
-        "x": 289.42386787591244,
-        "y": -258.0823121715324
       },
       "type": "basic",
       "width": 212.0

     {
       "data": {
         "__execution_delay": null,
+        "collapsed": false,
         "display": null,
         "error": null,
         "input_metadata": [],
       "height": 200.0,
       "id": "Example image table 1",
       "position": {
+        "x": 356.1935187064265,
+        "y": 224.8472733628614
       },
       "type": "basic",
       "width": 280.0
       "height": 440.0,
       "id": "View tables 1",
       "position": {
+        "x": 757.4687936995374,
+        "y": 116.39895719598661
       },
       "type": "table_view",
       "width": 376.0
         "title": "Import CSV"
       },
       "dragHandle": ".bg-primary",
+      "height": 339.0,
       "id": "Import CSV 1",
       "position": {
         "x": 13.802068621055497,
         "y": -269.65065144888104
       },
       "type": "basic",
+      "width": 296.0
     },
     {
       "data": {
         "params": {
           "limit": 100.0
         },
+        "status": "planned",
         "title": "View tables"
       },
       "dragHandle": ".bg-primary",
       "height": 418.0,
       "id": "View tables 2",
       "position": {
+        "x": 815.4121289519509,
+        "y": -330.8232285057863
       },
       "type": "table_view",
       "width": 1116.0
         "__execution_delay": 0.0,
         "collapsed": null,
         "display": null,
+        "error": "module 'rdkit.Chem' has no attribute 'Draw'",
         "input_metadata": [
           {}
         ],
       "height": 296.0,
       "id": "Draw molecules 1",
       "position": {
+        "x": 351.1956913898301,
+        "y": -235.00831568554486
       },
       "type": "basic",
       "width": 212.0

examples/draw_molecules.py CHANGED Viewed

@@ -15,6 +15,8 @@ def smiles_to_data(smiles):
     import rdkit
     m = rdkit.Chem.MolFromSmiles(smiles)
     img = rdkit.Chem.Draw.MolToImage(m)
     data = pil_to_data(img)
     return data

     import rdkit
     m = rdkit.Chem.MolFromSmiles(smiles)
+    if m is None:
+        return None
     img = rdkit.Chem.Draw.MolToImage(m)
     data = pil_to_data(img)
     return data

examples/uploads/CHEMBL313_sel.csv ADDED Viewed

	@@ -0,0 +1,109 @@

+SMILES,Name,pIC50
+Cc1ccc(C2CC3CCC(C2C(=O)OC(C)C)N3C)cc1,CHEMBL321806,5.99
+Cc1ccc(C2CC3CCC(C2C(=O)Oc2ccccc2)N3C)cc1,CHEMBL340912,5.81
+CN1C2CCC1C(C(=O)Oc1ccccc1)C(c1ccc(I)cc1)C2,CHEMBL340761,7.29
+CC(C)OC(=O)C1C(c2ccc(I)cc2)CC2CCC1N2C,CHEMBL127546,7.9
+COC(=O)C1C(c2ccc(Br)cc2)CC2CCC1N2C,CHEMBL97887,8.31
+CC(C)OC(=O)C1C(c2ccc(Cl)cc2)CC2CCC1N2C,CHEMBL127040,6.89
+COC(=O)C1C2CCC(CC1c1ccc(F)cc1)N2,CHEMBL80515,8.14
+COC(=O)C1C(c2cccc(I)c2)CC2CCC1N2C,CHEMBL67387,8.01
+CCc1cc(C2C(c3ccc(C)cc3)CC3CCC2N3C)on1,CHEMBL317904,6.24
+Cc1ccc(C2CC3CCC(C2c2cc(C(C)C)no2)N3C)cc1,CHEMBL322400,5.41
+Cc1ccc(C2CC3CCC(C2c2cc(C)no2)N3C)cc1,CHEMBL103228,6.46
+CC(C)c1cc(C2C(c3ccc(Cl)cc3)CC3CCC2N3C)on1,CHEMBL321780,6.68
+Cc1ccc(C2CC3CCC(C2c2ccno2)N3C)cc1,CHEMBL317905,7.42
+CN1C2CCC1C(c1ccno1)C(c1ccc(Cl)cc1)C2,CHEMBL103523,8.09
+Cc1cc(C2C(c3ccc(Cl)cc3)CC3CCC2N3C)on1,CHEMBL316528,7.28
+CCc1cc(C2C(c3ccc(Cl)cc3)CC3CCC2N3C)on1,CHEMBL103227,6.55
+CN1C2CCC1C(c1cc(C(C)(C)C)no1)C(c1ccc(Cl)cc1)C2,CHEMBL100652,5.47
+COC(=O)C1C(c2ccc(C)cc2)CC2CCC1N2CCF,CHEMBL105089,6.43
+CN1C2CCC1C(C(=O)OCCF)C(c1ccc(I)cc1)C2,CHEMBL318961,8.59
+O=C(OCCF)C1C(c2ccc(I)cc2)CC2CCC1N2CCCF,CHEMBL317444,7.88
+Cc1ccc(C2CC3CCC(C2C(=O)OCCCF)N3C)cc1,CHEMBL430504,6.49
+CN1C2CCC1C(C(=O)OCCCF)C(c1ccc(I)cc1)C2,CHEMBL105693,8.78
+COC(=O)C1C(c2ccc(Br)cc2)CC2CCC1N2CCCF,CHEMBL433159,7.44
+COC(=O)C1C2CCC(CC1c1ccc(I)cc1)N2,CHEMBL14613,10.215
+COC(=O)C1C(c2ccc(C)cc2)CC2CCC1N2CCCF,CHEMBL319052,5.88
+COC(=O)[C@@H]1C2CCC(C[C@@H]1c1ccc(C)cc1)N2C/C=C/I,CHEMBL2113648,6.3
+C=Cc1ccc(C2CC3CCC(N3)C2C(=O)CC)cc1,CHEMBL85492,9.49
+CCC(=O)C1C(c2ccc(C(C)C)cc2)CC2CCC1N2C,CHEMBL278122,7.443
+C=C(C)c1ccc(C2CC3CCC(N3)C2C(=O)CC)cc1,CHEMBL85877,9.96
+CCC(=O)C1C2CCC(CC1c1ccc(C)cc1)N2,CHEMBL87983,7.72
+CCC(=O)C1C(c2ccc(C(CC)CC)cc2)CC2CCC1N2C,CHEMBL314919,6.27
+C=C(C)c1ccc(C2CC3CCC(C2C(=O)CC)N3C)cc1,CHEMBL82807,9.09
+C=Cc1ccc(C2CC3CCC(C2C(=O)CC)N3C)cc1,CHEMBL314361,8.49
+CCC(=O)C1C2CCC(CC1c1ccc(C(CC)CC)cc1)N2,CHEMBL87678,6.82
+CCC(=O)C1C(c2ccc(C3CCCCC3)cc2)CC2CCC1N2C,CHEMBL87739,7.01
+CCC(=O)C1C2CCC(CC1c1ccc(C(C)C)cc1)N2,CHEMBL85256,8.28
+O[C@H]1CCCC[C@@H]1N1C2CCC1CC(c1ccccc1)C2,CHEMBL338411,5.47
+Cc1ccc(C2CC3CCC(C2c2cc(C(C)(C)C)no2)N3C)cc1,CHEMBL317909,4.59
+COC(=O)C1C(c2cccc(-c3ccco3)c2)CC2CCC1N2C,CHEMBL303494,7.38
+COC(=O)C1C2CCC(CC1c1cccc(I)c1)N2,CHEMBL66068,8.87
+COC(=O)C1C(c2cccc(-c3ccccc3)c2)CC2CCC1N2C,CHEMBL294733,7.45
+COC(=O)C1C(c2cccc(-c3ccsc3)c2)CC2CCC1N2C,CHEMBL303232,7.08
+CC(=O)C1C(c2ccc(C)cc2)CC2CCC1N2C,CHEMBL23141,6.91
+CC(=O)C1C(c2ccc(F)cc2)CC2CCC1N2C,CHEMBL22665,6.07
+CCC(=O)C1C(c2ccc(-c3ccccc3)cc2)CC2CCC1N2C,CHEMBL22518,8.37
+CCC(=O)C1C(c2ccccc2)CC2CCC1N2C,CHEMBL23875,6.0
+CCC(=O)C1C(c2ccc(C(C)(C)C)cc2)CC2CCC1N2C,CHEMBL22377,5.75
+CC(=O)C1C(c2ccccc2)CC2CCC1N2C,CHEMBL416007,5.87
+CCc1ccc(C2CC3CCC(C2C(C)=O)N3C)cc1,CHEMBL23974,7.11
+CCC(=O)C1C(c2ccc(F)cc2)CC2CCC1N2C,CHEMBL23655,6.2
+COC(=O)C1C(c2ccc(I)cc2)CC2CCC1N2CCF,CHEMBL358006,9.85
+COC(=O)C1C(c2ccc(I)cc2)CC2CCC1N2CCCI,CHEMBL153200,8.05
+COC(=O)C1C(c2ccc(I)cc2)CC2CCC1N2CC1CC1,CHEMBL150084,8.89
+COC(=O)C1C(c2ccc(I)cc2)CC2CCC1N2CC(F)F,CHEMBL356166,8.02
+COC(=O)C1C(c2ccc(I)cc2)CC2CCC1N2CCCO,CHEMBL357300,8.6
+COC(=O)C1C(c2ccc(I)cc2)CC2CCC1N2CCCCl,CHEMBL345553,9.49
+CC(C)OC(=O)C1C(c2ccc(I)cc2)CC2CCC1N2CCF,CHEMBL346872,7.66
+COC(=O)C1C(c2ccc(I)cc2)CC2CCC1N2CC(OC)OC,CHEMBL153361,8.77
+COC(=O)CN1C2CCC1C(C(=O)OC)C(c1ccc(I)cc1)C2,CHEMBL149801,9.09
+CC(C)OC(=O)C1C(c2ccc(I)cc2)CC2CCC1N2CCCF,CHEMBL345760,7.31
+COC(=O)[C@@H]1C2CCC(C[C@@H]1c1ccc(I)cc1)N2CC(=O)N(C)C,CHEMBL2112890,8.19
+COC(=O)C1C(c2ccc(I)cc2)CC2CCC1N2CCCBr,CHEMBL356652,9.46
+C[C@H](CF)OC(=O)C1C(c2ccc(Cl)cc2)CC2CCC1N2C,CHEMBL2112915,6.88
+C[C@@H](CF)OC(=O)C1C(c2ccc(Cl)cc2)CC2CCC1N2C,CHEMBL2112916,6.86
+Fc1ccc(C2C3CCC(C[C@H]2c2ccc(F)cc2)N3)cc1,CHEMBL325330,7.39
+CCC(=O)C1C2CCC(CC1c1cccc(I)c1)N2,CHEMBL171565,7.8
+CCC(=O)C1C2CCC(CC1c1ccc(/C=C/I)cc1)N2,CHEMBL352913,9.21
+Cc1ccc(C2CC3CCC(C2C(=O)CC/C=C/I)N3C)cc1,CHEMBL169320,7.29
+CCC(=O)C1C(c2cccc(I)c2)CC2CCC1N2C,CHEMBL168305,7.71
+CCC(=O)C1C2CCC(CC1c1ccc(I)c(Cl)c1)N2,CHEMBL169117,8.97
+CCC(=O)C1C2CCC(CC1c1ccc(/C=C(\C)I)cc1)N2,CHEMBL423275,9.24
+CCC(=O)C1C(c2ccc(I)cc2)CC2CCC1N2C,CHEMBL352698,8.73
+CCC(=O)C1C2CCC(CC1c1ccc(I)c(F)c1)N2,CHEMBL355734,8.14
+CCC(=O)C1C2CCC(CC1c1ccc(/C=C(/C)I)cc1)N2,CHEMBL355259,9.89
+CCC(=O)C1C2CCC(CC1c1ccc(I)cc1)N2,CHEMBL354725,8.61
+COC(=O)C1C(c2ccc(-c3cscc3Br)cc2)CC2CCC1N2C,CHEMBL433560,8.4
+COC(=O)C1C(c2ccc(-c3ccc(I)s3)cc2)CC2CCC1N2C,CHEMBL178773,8.34
+COC(=O)C1C(c2ccc(-c3ccc(N)s3)cc2)CC2CCC1N2C,CHEMBL181613,7.19
+COC(=O)C1C(c2ccc(-c3ccsc3)cc2)CC2CCC1N2C,CHEMBL435287,10.77
+COC(=O)C1C2CCC(CC1c1ccc(-c3cccs3)cc1)N2,CHEMBL181557,9.96
+COC(=O)C1C(c2ccc(-c3cccs3)cc2)CC2CCC1N2C,CHEMBL181609,9.82
+COC(=O)C1C2CCC(CC1c1ccc(-c3ccsc3)cc1)N2,CHEMBL179498,9.64
+COC(=O)C1C(c2ccc(-c3ccc(Br)s3)cc2)CC2CCC1N2C,CHEMBL180918,9.42
+COC(=O)C1C(c2ccc(-c3ccc(Cl)s3)cc2)CC2CCC1N2C,CHEMBL369098,9.19
+COC(=O)C1C2CCC(CC1c1ccc(C)c(F)c1)N2,CHEMBL365738,7.62
+COC(=O)C1C2CCC(CC1c1ccc(F)c(C)c1)N2,CHEMBL195738,7.77
+COC(=O)C1C2CCC(CC1c1cccc(F)c1)N2,CHEMBL192924,7.57
+COC(=O)C1C2CCC(CC1c1ccc(F)c(F)c1)N2,CHEMBL366159,7.26
+COC(=O)C1C2CCC(CC1c1cc(F)cc(F)c1)N2,CHEMBL371607,7.86
+CN1C2CCC1C(C(=O)OCCCF)C(c1ccc(Br)cc1)C2,CHEMBL365649,8.54
+O=C(OCCCF)C1C2CCC(CC1c1ccc(Br)cc1)N2,CHEMBL184807,9.52
+CN1C2CCC1C(C(=O)OCCF)C(c1ccc(Br)cc1)C2,CHEMBL185608,8.29
+O=C(OCCF)C1C2CCC(CC1c1ccc(Br)cc1)N2,CHEMBL186119,9.62
+O=C(OCCF)C1C2CCC(CC1c1ccc(I)cc1)N2,CHEMBL186306,9.74
+O=C(OCCF)C1C(c2ccc(Br)cc2)CC2CCC1N2CCCF,CHEMBL184123,7.74
+CN1C2CCC1C(C(=O)NCCF)C(c1ccc(Br)cc1)C2,CHEMBL365413,7.41
+CN1C2CCC1C(C(=O)NCCF)C(c1ccc(I)cc1)C2,CHEMBL183259,7.51
+COC(=O)C1C2CCC(CC1c1ccc(Br)cc1)N2,CHEMBL365623,9.02
+COC(=O)C1C2CCC(CC1c1ccc(-c3ccco3)cc1)N2,CHEMBL200044,9.82
+COC(=O)C1C(c2ccc(-c3ccoc3)cc2)CC2CCC1N2C,CHEMBL200698,9.46
+COC(=O)C1C(c2ccc(-c3ccco3)cc2)CC2CCC1N2C,CHEMBL199704,8.95
+COC(=O)C1C(c2ccc(-c3nccs3)cc2)CC2CCC1N2C,CHEMBL382943,8.78
+COC(=O)C1C(c2ccc(-c3cccnc3)cc2)CC2CCC1N2C,CHEMBL199634,8.45
+COC(=O)C1C2CCC(CC1c1ccc(-c3nccs3)cc1)N2,CHEMBL381418,8.29
+COC(=O)C1C(c2ccc(-c3cnccn3)cc2)CC2CCC1N2C,CHEMBL383572,7.95
+COC(=O)C1C(c2ccc(-c3cncnc3)cc2)CC2CCC1N2C,CHEMBL372121,7.48
+COC(=O)C1C(c2ccc(-c3ccccn3)cc2)CC2CCC1N2C,CHEMBL199407,6.79
+COC(=O)[C@@H]1C2CCC(C[C@@H]1c1ccc(Br)cc1)N2C,CHEMBL218082,8.39

lynxkite-app/web/src/workspace/nodes/NodeWithImage.tsx CHANGED Viewed

@@ -3,7 +3,7 @@ import { NodeWithParams } from "./NodeWithParams";
 const NodeWithImage = (props: any) => {
   return (
-    <NodeWithParams {...props}>
       {props.data.display && <img src={props.data.display} alt="Node Display" />}
     </NodeWithParams>
   );

 const NodeWithImage = (props: any) => {
   return (
+    <NodeWithParams collapsed {...props}>
       {props.data.display && <img src={props.data.display} alt="Node Display" />}
     </NodeWithParams>
   );

lynxkite-core/src/lynxkite/core/ops.py CHANGED Viewed

@@ -129,7 +129,7 @@ class Result:
     `input_metadata` is a list of JSON objects describing each input.
     """
-    output: typing.Any = None
     display: ReadOnlyJSON | None = None
     error: str | None = None
     input_metadata: ReadOnlyJSON | None = None
@@ -187,7 +187,6 @@ class Op(BaseConfig):
         res = self.func(*inputs, **params)
         if not isinstance(res, Result):
             # Automatically wrap the result in a Result object, if it isn't already.
-            res = Result(output=res)
             if self.type in [
                 "visualization",
                 "table_view",
@@ -195,9 +194,10 @@ class Op(BaseConfig):
                 "image",
                 "molecule",
             ]:
-                # If the operation is some kind of visualization, we use the output as the
-                # value to display by default.
-                res.display = res.output
         return res
     def get_input(self, name: str):
@@ -237,6 +237,10 @@ def op(
     def decorator(func):
         sig = inspect.signature(func)
         if slow:
             func = mem.cache(func)
             func = _global_slow(func)
@@ -256,10 +260,6 @@ def op(
             _outputs = [Output(name=name, type=None) for name in outputs]
         else:
             _outputs = [Output(name="output", type=None)] if view == "basic" else []
-        _view = view
-        if view == "matplotlib":
-            _view = "image"
-            func = matplotlib_to_image(func)
         op = Op(
             func=func,
             name=name,

     `input_metadata` is a list of JSON objects describing each input.
     """
+    output: typing.Any | None = None
     display: ReadOnlyJSON | None = None
     error: str | None = None
     input_metadata: ReadOnlyJSON | None = None
         res = self.func(*inputs, **params)
         if not isinstance(res, Result):
             # Automatically wrap the result in a Result object, if it isn't already.
             if self.type in [
                 "visualization",
                 "table_view",
                 "image",
                 "molecule",
             ]:
+                # If the operation is a visualization, we use the returned value for display.
+                res = Result(display=res)
+            else:
+                res = Result(output=res)
         return res
     def get_input(self, name: str):
     def decorator(func):
         sig = inspect.signature(func)
+        _view = view
+        if view == "matplotlib":
+            _view = "image"
+            func = matplotlib_to_image(func)
         if slow:
             func = mem.cache(func)
             func = _global_slow(func)
             _outputs = [Output(name=name, type=None) for name in outputs]
         else:
             _outputs = [Output(name="output", type=None)] if view == "basic" else []
         op = Op(
             func=func,
             name=name,

lynxkite-core/src/lynxkite/core/workspace.py CHANGED Viewed

@@ -65,10 +65,14 @@ class WorkspaceNode(BaseConfig):
         self.data.status = NodeStatus.done
         if hasattr(self, "_crdt"):
             with self._crdt.doc.transaction():
-                self._crdt["data"]["display"] = self.data.display
-                self._crdt["data"]["input_metadata"] = self.data.input_metadata
-                self._crdt["data"]["error"] = self.data.error
-                self._crdt["data"]["status"] = NodeStatus.done
     def publish_error(self, error: Exception | str | None):
         """Can be called with None to clear the error state."""
@@ -176,7 +180,6 @@ class Workspace(BaseConfig):
                     # If the node is connected to a CRDT, update that too.
                     if hasattr(node, "_crdt"):
                         node._crdt["data"]["meta"] = op.model_dump()
-                        print("set metadata to", op)
                 if node.type != op.type:
                     node.type = op.type
                     if hasattr(node, "_crdt"):

         self.data.status = NodeStatus.done
         if hasattr(self, "_crdt"):
             with self._crdt.doc.transaction():
+                try:
+                    self._crdt["data"]["status"] = NodeStatus.done
+                    self._crdt["data"]["display"] = self.data.display
+                    self._crdt["data"]["input_metadata"] = self.data.input_metadata
+                    self._crdt["data"]["error"] = self.data.error
+                except Exception as e:
+                    self._crdt["data"]["error"] = str(e)
+                    raise e
     def publish_error(self, error: Exception | str | None):
         """Can be called with None to clear the error state."""
                     # If the node is connected to a CRDT, update that too.
                     if hasattr(node, "_crdt"):
                         node._crdt["data"]["meta"] = op.model_dump()
                 if node.type != op.type:
                     node.type = op.type
                     if hasattr(node, "_crdt"):

lynxkite-core/tests/test_ops.py CHANGED Viewed

@@ -104,4 +104,4 @@ def test_visualization_operations_display_is_populated_automatically():
     result = ops.CATALOGS["test"]["display_op"]()
     assert isinstance(result, ops.Result)
-    assert result.output == result.display == {"display_value": 1}

     result = ops.CATALOGS["test"]["display_op"]()
     assert isinstance(result, ops.Result)
+    assert result.display == {"display_value": 1}

lynxkite-graph-analytics/src/lynxkite_graph_analytics/core.py CHANGED Viewed

@@ -222,6 +222,7 @@ async def _execute_node(node, ws, catalog, outputs):
     try:
         result = op(*inputs, **params)
         result.output = await await_if_needed(result.output)
     except Exception as e:
         if not os.environ.get("LYNXKITE_SUPPRESS_OP_ERRORS"):
             traceback.print_exc()

     try:
         result = op(*inputs, **params)
         result.output = await await_if_needed(result.output)
+        result.display = await await_if_needed(result.display)
     except Exception as e:
         if not os.environ.get("LYNXKITE_SUPPRESS_OP_ERRORS"):
             traceback.print_exc()