DeepSEQreen_fast_build

Running on CPU Upgrade

App Files Files Community

libokj commited on May 12, 2024

Commit

489390d

verified ·

1 Parent(s): b5f72e9

Update app.py

Browse files

Files changed (1) hide show

app.py +71 -8

app.py CHANGED Viewed

@@ -6,7 +6,7 @@ import textwrap
 from email.mime.multipart import MIMEMultipart
 from email.mime.text import MIMEText
 from email.utils import formatdate, make_msgid
-from functools import cache, partial
 from math import pi
 from time import sleep, time
 from uuid import uuid4
@@ -30,14 +30,16 @@ from requests.adapters import HTTPAdapter, Retry
 from markdown import markdown
 from rdkit import Chem
 from rdkit.Chem import AllChem, Draw, RDConfig, PandasTools, Descriptors, rdMolDescriptors, rdmolops, Lipinski, Crippen
 from rdkit.Chem.Scaffolds import MurckoScaffold
-import seaborn as sns
 from bokeh.models import Legend, NumberFormatter, BooleanFormatter, HTMLTemplateFormatter, LegendItem
 from bokeh.palettes import Category20c_20
 from bokeh.plotting import figure
 from bokeh.transform import cumsum
 from bokeh.resources import INLINE
 import panel as pn
 from apscheduler.schedulers.background import BackgroundScheduler
@@ -158,6 +160,63 @@ visibility: hidden
 """
 class HelpTip:
     def __new__(cls, text):
         return gr.HTML(
@@ -917,7 +976,7 @@ def submit_predict(predict_filepath, task, preset, target_family, opts, job_info
             def max_sim(smiles):
                 return max_tanimoto_similarity(smiles, seen_smiles_with_fp=pos_compounds_df)
-            prediction_df[['Max. Tanimoto Similarity to Known Target Ligands',
                            'Max. Tanimoto Similarity Target Ligand']] = (
                 prediction_df['X1'].parallel_apply(max_sim).apply(pd.Series)
             )
@@ -932,7 +991,7 @@ def submit_predict(predict_filepath, task, preset, target_family, opts, job_info
                 compound_targets = df_training.loc[df_training['X1'] == compound]
                 return max_sequence_identity(x2, seen_fastas=compound_targets)
-            prediction_df[['Max. Sequence Identity to Known Ligand Targets',
                            'Max. Sequence Identity Ligand Target']] = (
                 prediction_df['X1^'].parallel_apply(calculate_max_sequence_identity).apply(pd.Series)
             )
@@ -1147,6 +1206,10 @@ def create_html_report(df, file=None, task=None, opts=(), progress=gr.Progress(t
                 f'<div style="overflow:auto; height: 300px; font-family: Courier !important;">{table_html}</div>')
     else:
         image_zoom_formatter = HTMLTemplateFormatter(template='<div class="image-zoom-viewer"><%= value %></div>')
         uniprot_id_formatter = HTMLTemplateFormatter(
             template='<% if (value == value) { '  # Check if value is not NaN
@@ -1167,11 +1230,12 @@ def create_html_report(df, file=None, task=None, opts=(), progress=gr.Progress(t
             'Interaction Probability': {'type': 'progress', 'max': 1.0, 'legend': True},
             'Compound': image_zoom_formatter,
             'Scaffold': image_zoom_formatter,
             'Target FASTA': {'type': 'textarea', 'width': 60},
             'Target ID': uniprot_id_formatter,
             'Compound ID': pubchem_id_formatter,
-            'Max. Sequence Identity Target': uniprot_id_formatter,
-            'Max. Tanimoto Similarity Compound': pubchem_id_formatter,
         }
         formatters = {**bool_formatters, **float_formatters, **other_formatters}
@@ -1181,7 +1245,7 @@ def create_html_report(df, file=None, task=None, opts=(), progress=gr.Progress(t
         report_table = pn.widgets.Tabulator(
             df_html, formatters=formatters,
             frozen_columns=[col for col in df_html.columns if col in [
-                'Target ID', 'Compound ID', 'Compound', 'Scaffold'
             ]],
             disabled=True, sizing_mode='stretch_both', pagination='local', page_size=30)
@@ -2215,7 +2279,6 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
                     infer_csv_prompt: gr.Button(visible=False),
                     infer_library_prompt: gr.Button(visible=True),
                 }
-        match upload_type:
             case "Upload a CSV file containing paired compound-protein data":
                 return {
                     pair_upload: gr.Column(visible=True),

 from email.mime.multipart import MIMEMultipart
 from email.mime.text import MIMEText
 from email.utils import formatdate, make_msgid
+from functools import cache
 from math import pi
 from time import sleep, time
 from uuid import uuid4
 from markdown import markdown
 from rdkit import Chem
 from rdkit.Chem import AllChem, Draw, RDConfig, PandasTools, Descriptors, rdMolDescriptors, rdmolops, Lipinski, Crippen
+from rdkit.Chem.Features.ShowFeats import _featColors
 from rdkit.Chem.Scaffolds import MurckoScaffold
+import py3Dmol
 from bokeh.models import Legend, NumberFormatter, BooleanFormatter, HTMLTemplateFormatter, LegendItem
 from bokeh.palettes import Category20c_20
 from bokeh.plotting import figure
 from bokeh.transform import cumsum
 from bokeh.resources import INLINE
+import seaborn as sns
 import panel as pn
 from apscheduler.schedulers.background import BackgroundScheduler
 """
+class View3DmolCell(py3Dmol.view):
+    def __init__(self, width=640, height=480):
+        divid = "3dmolviewer_UNIQUEID"
+        self.uniqueid = None
+        if type(width) == int:
+            width = '%dpx'%width
+        if type(height) == int:
+            height = '%dpx'%height
+        self.startjs = '''<div id="%s"  style="position: relative; width: %s; height: %s;">
+            </div>\n''' % (divid, width, height)
+        self.startjs += '<script>\n'
+        self.endjs = '</script>'
+        self.updatejs = ''
+        self.viewergrid = None
+        self.startjs += 'viewer_UNIQUEID = $3Dmol.createViewer(document.getElementById("%s"),{backgroundColor:"white"});\n' % divid
+        self.startjs += "viewer_UNIQUEID.zoomTo();\n"
+        self.endjs = "viewer_UNIQUEID.render();\n" + self.endjs
+FEAT_FACTORY = AllChem.BuildFeatureFactory(os.path.join(RDConfig.RDDataDir, 'BaseFeatures.fdef'))
+def rgb_to_hex(rgb):
+    rgb = tuple(round(i * 255) for i in rgb)
+    return '#{:02x}{:02x}{:02x}'.format(rgb[0], rgb[1], rgb[2])
+def mol_to_pharm3d(mol, mode='html'):
+    AllChem.Compute2DCoords(mol)
+    feat_factory = AllChem.BuildFeatureFactory(os.path.join(RDConfig.RDDataDir, 'BaseFeatures.fdef'))
+    feats = feat_factory.GetFeaturesForMol(mol)
+    view = View3DmolCell(width=400, height=400)
+    for feat in feats:
+        pos = feat.GetPos()
+        color = _featColors.get(feat.GetFamily(), (.5, .5, .5))
+        view.addSphere({
+            'center': {'x': pos.x,'y': pos.y,'z': pos.z},
+            'radius': 0.5,
+            'color': rgb_to_hex(color)
+        })
+    mol_block = Chem.MolToMolBlock(mol)
+    view.addModel(mol_block, 'sdf')
+    view.setStyle({'stick': {}})
+    view.zoomTo()
+    match mode:
+        case 'html':
+            return view.write_html()
+        # case 'png':
+        #     return view.png()
 class HelpTip:
     def __new__(cls, text):
         return gr.HTML(
             def max_sim(smiles):
                 return max_tanimoto_similarity(smiles, seen_smiles_with_fp=pos_compounds_df)
+            prediction_df[['Max. Tanimoto Similarity to Known Target Ligands',
                            'Max. Tanimoto Similarity Target Ligand']] = (
                 prediction_df['X1'].parallel_apply(max_sim).apply(pd.Series)
             )
                 compound_targets = df_training.loc[df_training['X1'] == compound]
                 return max_sequence_identity(x2, seen_fastas=compound_targets)
+            prediction_df[['Max. Sequence Identity to Known Ligand Targets',
                            'Max. Sequence Identity Ligand Target']] = (
                 prediction_df['X1^'].parallel_apply(calculate_max_sequence_identity).apply(pd.Series)
             )
                 f'<div style="overflow:auto; height: 300px; font-family: Courier !important;">{table_html}</div>')
     else:
+        if 'Exclude Pharmacophore 3D' not in opts:
+            df_html['Pharmacophore'] = df_html['Compound'].parallel_apply(
+                lambda x: mol_to_pharm3d(x, mode='html') if not pd.isna(x) else x)
         image_zoom_formatter = HTMLTemplateFormatter(template='<div class="image-zoom-viewer"><%= value %></div>')
         uniprot_id_formatter = HTMLTemplateFormatter(
             template='<% if (value == value) { '  # Check if value is not NaN
             'Interaction Probability': {'type': 'progress', 'max': 1.0, 'legend': True},
             'Compound': image_zoom_formatter,
             'Scaffold': image_zoom_formatter,
+            'Pharmacophore': {'type': 'executeScriptFormatter'},
             'Target FASTA': {'type': 'textarea', 'width': 60},
             'Target ID': uniprot_id_formatter,
             'Compound ID': pubchem_id_formatter,
+            'Max. Tanimoto Similarity Target Ligand': uniprot_id_formatter,
+            'Max. Sequence Identity Ligand Target': pubchem_id_formatter,
         }
         formatters = {**bool_formatters, **float_formatters, **other_formatters}
         report_table = pn.widgets.Tabulator(
             df_html, formatters=formatters,
             frozen_columns=[col for col in df_html.columns if col in [
+                'Target ID', 'Compound ID', 'Compound', 'Scaffold', 'Pharmacophore'
             ]],
             disabled=True, sizing_mode='stretch_both', pagination='local', page_size=30)
                     infer_csv_prompt: gr.Button(visible=False),
                     infer_library_prompt: gr.Button(visible=True),
                 }
             case "Upload a CSV file containing paired compound-protein data":
                 return {
                     pair_upload: gr.Column(visible=True),