libokj commited on
Commit
489390d
·
verified ·
1 Parent(s): b5f72e9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +71 -8
app.py CHANGED
@@ -6,7 +6,7 @@ import textwrap
6
  from email.mime.multipart import MIMEMultipart
7
  from email.mime.text import MIMEText
8
  from email.utils import formatdate, make_msgid
9
- from functools import cache, partial
10
  from math import pi
11
  from time import sleep, time
12
  from uuid import uuid4
@@ -30,14 +30,16 @@ from requests.adapters import HTTPAdapter, Retry
30
  from markdown import markdown
31
  from rdkit import Chem
32
  from rdkit.Chem import AllChem, Draw, RDConfig, PandasTools, Descriptors, rdMolDescriptors, rdmolops, Lipinski, Crippen
 
33
  from rdkit.Chem.Scaffolds import MurckoScaffold
34
- import seaborn as sns
35
 
36
  from bokeh.models import Legend, NumberFormatter, BooleanFormatter, HTMLTemplateFormatter, LegendItem
37
  from bokeh.palettes import Category20c_20
38
  from bokeh.plotting import figure
39
  from bokeh.transform import cumsum
40
  from bokeh.resources import INLINE
 
41
  import panel as pn
42
 
43
  from apscheduler.schedulers.background import BackgroundScheduler
@@ -158,6 +160,63 @@ visibility: hidden
158
  """
159
 
160
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
161
  class HelpTip:
162
  def __new__(cls, text):
163
  return gr.HTML(
@@ -917,7 +976,7 @@ def submit_predict(predict_filepath, task, preset, target_family, opts, job_info
917
  def max_sim(smiles):
918
  return max_tanimoto_similarity(smiles, seen_smiles_with_fp=pos_compounds_df)
919
 
920
- prediction_df[['Max. Tanimoto Similarity to Known Target Ligands',
921
  'Max. Tanimoto Similarity Target Ligand']] = (
922
  prediction_df['X1'].parallel_apply(max_sim).apply(pd.Series)
923
  )
@@ -932,7 +991,7 @@ def submit_predict(predict_filepath, task, preset, target_family, opts, job_info
932
  compound_targets = df_training.loc[df_training['X1'] == compound]
933
  return max_sequence_identity(x2, seen_fastas=compound_targets)
934
 
935
- prediction_df[['Max. Sequence Identity to Known Ligand Targets',
936
  'Max. Sequence Identity Ligand Target']] = (
937
  prediction_df['X1^'].parallel_apply(calculate_max_sequence_identity).apply(pd.Series)
938
  )
@@ -1147,6 +1206,10 @@ def create_html_report(df, file=None, task=None, opts=(), progress=gr.Progress(t
1147
  f'<div style="overflow:auto; height: 300px; font-family: Courier !important;">{table_html}</div>')
1148
 
1149
  else:
 
 
 
 
1150
  image_zoom_formatter = HTMLTemplateFormatter(template='<div class="image-zoom-viewer"><%= value %></div>')
1151
  uniprot_id_formatter = HTMLTemplateFormatter(
1152
  template='<% if (value == value) { ' # Check if value is not NaN
@@ -1167,11 +1230,12 @@ def create_html_report(df, file=None, task=None, opts=(), progress=gr.Progress(t
1167
  'Interaction Probability': {'type': 'progress', 'max': 1.0, 'legend': True},
1168
  'Compound': image_zoom_formatter,
1169
  'Scaffold': image_zoom_formatter,
 
1170
  'Target FASTA': {'type': 'textarea', 'width': 60},
1171
  'Target ID': uniprot_id_formatter,
1172
  'Compound ID': pubchem_id_formatter,
1173
- 'Max. Sequence Identity Target': uniprot_id_formatter,
1174
- 'Max. Tanimoto Similarity Compound': pubchem_id_formatter,
1175
  }
1176
  formatters = {**bool_formatters, **float_formatters, **other_formatters}
1177
 
@@ -1181,7 +1245,7 @@ def create_html_report(df, file=None, task=None, opts=(), progress=gr.Progress(t
1181
  report_table = pn.widgets.Tabulator(
1182
  df_html, formatters=formatters,
1183
  frozen_columns=[col for col in df_html.columns if col in [
1184
- 'Target ID', 'Compound ID', 'Compound', 'Scaffold'
1185
  ]],
1186
  disabled=True, sizing_mode='stretch_both', pagination='local', page_size=30)
1187
 
@@ -2215,7 +2279,6 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
2215
  infer_csv_prompt: gr.Button(visible=False),
2216
  infer_library_prompt: gr.Button(visible=True),
2217
  }
2218
- match upload_type:
2219
  case "Upload a CSV file containing paired compound-protein data":
2220
  return {
2221
  pair_upload: gr.Column(visible=True),
 
6
  from email.mime.multipart import MIMEMultipart
7
  from email.mime.text import MIMEText
8
  from email.utils import formatdate, make_msgid
9
+ from functools import cache
10
  from math import pi
11
  from time import sleep, time
12
  from uuid import uuid4
 
30
  from markdown import markdown
31
  from rdkit import Chem
32
  from rdkit.Chem import AllChem, Draw, RDConfig, PandasTools, Descriptors, rdMolDescriptors, rdmolops, Lipinski, Crippen
33
+ from rdkit.Chem.Features.ShowFeats import _featColors
34
  from rdkit.Chem.Scaffolds import MurckoScaffold
35
+ import py3Dmol
36
 
37
  from bokeh.models import Legend, NumberFormatter, BooleanFormatter, HTMLTemplateFormatter, LegendItem
38
  from bokeh.palettes import Category20c_20
39
  from bokeh.plotting import figure
40
  from bokeh.transform import cumsum
41
  from bokeh.resources import INLINE
42
+ import seaborn as sns
43
  import panel as pn
44
 
45
  from apscheduler.schedulers.background import BackgroundScheduler
 
160
  """
161
 
162
 
163
+ class View3DmolCell(py3Dmol.view):
164
+ def __init__(self, width=640, height=480):
165
+ divid = "3dmolviewer_UNIQUEID"
166
+ self.uniqueid = None
167
+ if type(width) == int:
168
+ width = '%dpx'%width
169
+ if type(height) == int:
170
+ height = '%dpx'%height
171
+ self.startjs = '''<div id="%s" style="position: relative; width: %s; height: %s;">
172
+ </div>\n''' % (divid, width, height)
173
+ self.startjs += '<script>\n'
174
+ self.endjs = '</script>'
175
+
176
+ self.updatejs = ''
177
+ self.viewergrid = None
178
+
179
+ self.startjs += 'viewer_UNIQUEID = $3Dmol.createViewer(document.getElementById("%s"),{backgroundColor:"white"});\n' % divid
180
+ self.startjs += "viewer_UNIQUEID.zoomTo();\n"
181
+ self.endjs = "viewer_UNIQUEID.render();\n" + self.endjs
182
+
183
+
184
+ FEAT_FACTORY = AllChem.BuildFeatureFactory(os.path.join(RDConfig.RDDataDir, 'BaseFeatures.fdef'))
185
+
186
+
187
+ def rgb_to_hex(rgb):
188
+ rgb = tuple(round(i * 255) for i in rgb)
189
+ return '#{:02x}{:02x}{:02x}'.format(rgb[0], rgb[1], rgb[2])
190
+
191
+
192
+ def mol_to_pharm3d(mol, mode='html'):
193
+ AllChem.Compute2DCoords(mol)
194
+
195
+ feat_factory = AllChem.BuildFeatureFactory(os.path.join(RDConfig.RDDataDir, 'BaseFeatures.fdef'))
196
+ feats = feat_factory.GetFeaturesForMol(mol)
197
+
198
+ view = View3DmolCell(width=400, height=400)
199
+ for feat in feats:
200
+ pos = feat.GetPos()
201
+ color = _featColors.get(feat.GetFamily(), (.5, .5, .5))
202
+ view.addSphere({
203
+ 'center': {'x': pos.x,'y': pos.y,'z': pos.z},
204
+ 'radius': 0.5,
205
+ 'color': rgb_to_hex(color)
206
+ })
207
+
208
+ mol_block = Chem.MolToMolBlock(mol)
209
+ view.addModel(mol_block, 'sdf')
210
+ view.setStyle({'stick': {}})
211
+ view.zoomTo()
212
+
213
+ match mode:
214
+ case 'html':
215
+ return view.write_html()
216
+ # case 'png':
217
+ # return view.png()
218
+
219
+
220
  class HelpTip:
221
  def __new__(cls, text):
222
  return gr.HTML(
 
976
  def max_sim(smiles):
977
  return max_tanimoto_similarity(smiles, seen_smiles_with_fp=pos_compounds_df)
978
 
979
+ prediction_df[['Max. Tanimoto Similarity to Known Target Ligands',
980
  'Max. Tanimoto Similarity Target Ligand']] = (
981
  prediction_df['X1'].parallel_apply(max_sim).apply(pd.Series)
982
  )
 
991
  compound_targets = df_training.loc[df_training['X1'] == compound]
992
  return max_sequence_identity(x2, seen_fastas=compound_targets)
993
 
994
+ prediction_df[['Max. Sequence Identity to Known Ligand Targets',
995
  'Max. Sequence Identity Ligand Target']] = (
996
  prediction_df['X1^'].parallel_apply(calculate_max_sequence_identity).apply(pd.Series)
997
  )
 
1206
  f'<div style="overflow:auto; height: 300px; font-family: Courier !important;">{table_html}</div>')
1207
 
1208
  else:
1209
+ if 'Exclude Pharmacophore 3D' not in opts:
1210
+ df_html['Pharmacophore'] = df_html['Compound'].parallel_apply(
1211
+ lambda x: mol_to_pharm3d(x, mode='html') if not pd.isna(x) else x)
1212
+
1213
  image_zoom_formatter = HTMLTemplateFormatter(template='<div class="image-zoom-viewer"><%= value %></div>')
1214
  uniprot_id_formatter = HTMLTemplateFormatter(
1215
  template='<% if (value == value) { ' # Check if value is not NaN
 
1230
  'Interaction Probability': {'type': 'progress', 'max': 1.0, 'legend': True},
1231
  'Compound': image_zoom_formatter,
1232
  'Scaffold': image_zoom_formatter,
1233
+ 'Pharmacophore': {'type': 'executeScriptFormatter'},
1234
  'Target FASTA': {'type': 'textarea', 'width': 60},
1235
  'Target ID': uniprot_id_formatter,
1236
  'Compound ID': pubchem_id_formatter,
1237
+ 'Max. Tanimoto Similarity Target Ligand': uniprot_id_formatter,
1238
+ 'Max. Sequence Identity Ligand Target': pubchem_id_formatter,
1239
  }
1240
  formatters = {**bool_formatters, **float_formatters, **other_formatters}
1241
 
 
1245
  report_table = pn.widgets.Tabulator(
1246
  df_html, formatters=formatters,
1247
  frozen_columns=[col for col in df_html.columns if col in [
1248
+ 'Target ID', 'Compound ID', 'Compound', 'Scaffold', 'Pharmacophore'
1249
  ]],
1250
  disabled=True, sizing_mode='stretch_both', pagination='local', page_size=30)
1251
 
 
2279
  infer_csv_prompt: gr.Button(visible=False),
2280
  infer_library_prompt: gr.Button(visible=True),
2281
  }
 
2282
  case "Upload a CSV file containing paired compound-protein data":
2283
  return {
2284
  pair_upload: gr.Column(visible=True),