Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Update app.py
Browse files
app.py
CHANGED
@@ -6,7 +6,7 @@ import textwrap
|
|
6 |
from email.mime.multipart import MIMEMultipart
|
7 |
from email.mime.text import MIMEText
|
8 |
from email.utils import formatdate, make_msgid
|
9 |
-
from functools import cache
|
10 |
from math import pi
|
11 |
from time import sleep, time
|
12 |
from uuid import uuid4
|
@@ -30,14 +30,16 @@ from requests.adapters import HTTPAdapter, Retry
|
|
30 |
from markdown import markdown
|
31 |
from rdkit import Chem
|
32 |
from rdkit.Chem import AllChem, Draw, RDConfig, PandasTools, Descriptors, rdMolDescriptors, rdmolops, Lipinski, Crippen
|
|
|
33 |
from rdkit.Chem.Scaffolds import MurckoScaffold
|
34 |
-
import
|
35 |
|
36 |
from bokeh.models import Legend, NumberFormatter, BooleanFormatter, HTMLTemplateFormatter, LegendItem
|
37 |
from bokeh.palettes import Category20c_20
|
38 |
from bokeh.plotting import figure
|
39 |
from bokeh.transform import cumsum
|
40 |
from bokeh.resources import INLINE
|
|
|
41 |
import panel as pn
|
42 |
|
43 |
from apscheduler.schedulers.background import BackgroundScheduler
|
@@ -158,6 +160,63 @@ visibility: hidden
|
|
158 |
"""
|
159 |
|
160 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
161 |
class HelpTip:
|
162 |
def __new__(cls, text):
|
163 |
return gr.HTML(
|
@@ -917,7 +976,7 @@ def submit_predict(predict_filepath, task, preset, target_family, opts, job_info
|
|
917 |
def max_sim(smiles):
|
918 |
return max_tanimoto_similarity(smiles, seen_smiles_with_fp=pos_compounds_df)
|
919 |
|
920 |
-
prediction_df[['Max. Tanimoto Similarity to Known Target Ligands',
|
921 |
'Max. Tanimoto Similarity Target Ligand']] = (
|
922 |
prediction_df['X1'].parallel_apply(max_sim).apply(pd.Series)
|
923 |
)
|
@@ -932,7 +991,7 @@ def submit_predict(predict_filepath, task, preset, target_family, opts, job_info
|
|
932 |
compound_targets = df_training.loc[df_training['X1'] == compound]
|
933 |
return max_sequence_identity(x2, seen_fastas=compound_targets)
|
934 |
|
935 |
-
prediction_df[['Max. Sequence Identity to Known Ligand Targets',
|
936 |
'Max. Sequence Identity Ligand Target']] = (
|
937 |
prediction_df['X1^'].parallel_apply(calculate_max_sequence_identity).apply(pd.Series)
|
938 |
)
|
@@ -1147,6 +1206,10 @@ def create_html_report(df, file=None, task=None, opts=(), progress=gr.Progress(t
|
|
1147 |
f'<div style="overflow:auto; height: 300px; font-family: Courier !important;">{table_html}</div>')
|
1148 |
|
1149 |
else:
|
|
|
|
|
|
|
|
|
1150 |
image_zoom_formatter = HTMLTemplateFormatter(template='<div class="image-zoom-viewer"><%= value %></div>')
|
1151 |
uniprot_id_formatter = HTMLTemplateFormatter(
|
1152 |
template='<% if (value == value) { ' # Check if value is not NaN
|
@@ -1167,11 +1230,12 @@ def create_html_report(df, file=None, task=None, opts=(), progress=gr.Progress(t
|
|
1167 |
'Interaction Probability': {'type': 'progress', 'max': 1.0, 'legend': True},
|
1168 |
'Compound': image_zoom_formatter,
|
1169 |
'Scaffold': image_zoom_formatter,
|
|
|
1170 |
'Target FASTA': {'type': 'textarea', 'width': 60},
|
1171 |
'Target ID': uniprot_id_formatter,
|
1172 |
'Compound ID': pubchem_id_formatter,
|
1173 |
-
'Max.
|
1174 |
-
'Max.
|
1175 |
}
|
1176 |
formatters = {**bool_formatters, **float_formatters, **other_formatters}
|
1177 |
|
@@ -1181,7 +1245,7 @@ def create_html_report(df, file=None, task=None, opts=(), progress=gr.Progress(t
|
|
1181 |
report_table = pn.widgets.Tabulator(
|
1182 |
df_html, formatters=formatters,
|
1183 |
frozen_columns=[col for col in df_html.columns if col in [
|
1184 |
-
'Target ID', 'Compound ID', 'Compound', 'Scaffold'
|
1185 |
]],
|
1186 |
disabled=True, sizing_mode='stretch_both', pagination='local', page_size=30)
|
1187 |
|
@@ -2215,7 +2279,6 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
|
|
2215 |
infer_csv_prompt: gr.Button(visible=False),
|
2216 |
infer_library_prompt: gr.Button(visible=True),
|
2217 |
}
|
2218 |
-
match upload_type:
|
2219 |
case "Upload a CSV file containing paired compound-protein data":
|
2220 |
return {
|
2221 |
pair_upload: gr.Column(visible=True),
|
|
|
6 |
from email.mime.multipart import MIMEMultipart
|
7 |
from email.mime.text import MIMEText
|
8 |
from email.utils import formatdate, make_msgid
|
9 |
+
from functools import cache
|
10 |
from math import pi
|
11 |
from time import sleep, time
|
12 |
from uuid import uuid4
|
|
|
30 |
from markdown import markdown
|
31 |
from rdkit import Chem
|
32 |
from rdkit.Chem import AllChem, Draw, RDConfig, PandasTools, Descriptors, rdMolDescriptors, rdmolops, Lipinski, Crippen
|
33 |
+
from rdkit.Chem.Features.ShowFeats import _featColors
|
34 |
from rdkit.Chem.Scaffolds import MurckoScaffold
|
35 |
+
import py3Dmol
|
36 |
|
37 |
from bokeh.models import Legend, NumberFormatter, BooleanFormatter, HTMLTemplateFormatter, LegendItem
|
38 |
from bokeh.palettes import Category20c_20
|
39 |
from bokeh.plotting import figure
|
40 |
from bokeh.transform import cumsum
|
41 |
from bokeh.resources import INLINE
|
42 |
+
import seaborn as sns
|
43 |
import panel as pn
|
44 |
|
45 |
from apscheduler.schedulers.background import BackgroundScheduler
|
|
|
160 |
"""
|
161 |
|
162 |
|
163 |
+
class View3DmolCell(py3Dmol.view):
|
164 |
+
def __init__(self, width=640, height=480):
|
165 |
+
divid = "3dmolviewer_UNIQUEID"
|
166 |
+
self.uniqueid = None
|
167 |
+
if type(width) == int:
|
168 |
+
width = '%dpx'%width
|
169 |
+
if type(height) == int:
|
170 |
+
height = '%dpx'%height
|
171 |
+
self.startjs = '''<div id="%s" style="position: relative; width: %s; height: %s;">
|
172 |
+
</div>\n''' % (divid, width, height)
|
173 |
+
self.startjs += '<script>\n'
|
174 |
+
self.endjs = '</script>'
|
175 |
+
|
176 |
+
self.updatejs = ''
|
177 |
+
self.viewergrid = None
|
178 |
+
|
179 |
+
self.startjs += 'viewer_UNIQUEID = $3Dmol.createViewer(document.getElementById("%s"),{backgroundColor:"white"});\n' % divid
|
180 |
+
self.startjs += "viewer_UNIQUEID.zoomTo();\n"
|
181 |
+
self.endjs = "viewer_UNIQUEID.render();\n" + self.endjs
|
182 |
+
|
183 |
+
|
184 |
+
FEAT_FACTORY = AllChem.BuildFeatureFactory(os.path.join(RDConfig.RDDataDir, 'BaseFeatures.fdef'))
|
185 |
+
|
186 |
+
|
187 |
+
def rgb_to_hex(rgb):
|
188 |
+
rgb = tuple(round(i * 255) for i in rgb)
|
189 |
+
return '#{:02x}{:02x}{:02x}'.format(rgb[0], rgb[1], rgb[2])
|
190 |
+
|
191 |
+
|
192 |
+
def mol_to_pharm3d(mol, mode='html'):
|
193 |
+
AllChem.Compute2DCoords(mol)
|
194 |
+
|
195 |
+
feat_factory = AllChem.BuildFeatureFactory(os.path.join(RDConfig.RDDataDir, 'BaseFeatures.fdef'))
|
196 |
+
feats = feat_factory.GetFeaturesForMol(mol)
|
197 |
+
|
198 |
+
view = View3DmolCell(width=400, height=400)
|
199 |
+
for feat in feats:
|
200 |
+
pos = feat.GetPos()
|
201 |
+
color = _featColors.get(feat.GetFamily(), (.5, .5, .5))
|
202 |
+
view.addSphere({
|
203 |
+
'center': {'x': pos.x,'y': pos.y,'z': pos.z},
|
204 |
+
'radius': 0.5,
|
205 |
+
'color': rgb_to_hex(color)
|
206 |
+
})
|
207 |
+
|
208 |
+
mol_block = Chem.MolToMolBlock(mol)
|
209 |
+
view.addModel(mol_block, 'sdf')
|
210 |
+
view.setStyle({'stick': {}})
|
211 |
+
view.zoomTo()
|
212 |
+
|
213 |
+
match mode:
|
214 |
+
case 'html':
|
215 |
+
return view.write_html()
|
216 |
+
# case 'png':
|
217 |
+
# return view.png()
|
218 |
+
|
219 |
+
|
220 |
class HelpTip:
|
221 |
def __new__(cls, text):
|
222 |
return gr.HTML(
|
|
|
976 |
def max_sim(smiles):
|
977 |
return max_tanimoto_similarity(smiles, seen_smiles_with_fp=pos_compounds_df)
|
978 |
|
979 |
+
prediction_df[['Max. Tanimoto Similarity to Known Target Ligands',
|
980 |
'Max. Tanimoto Similarity Target Ligand']] = (
|
981 |
prediction_df['X1'].parallel_apply(max_sim).apply(pd.Series)
|
982 |
)
|
|
|
991 |
compound_targets = df_training.loc[df_training['X1'] == compound]
|
992 |
return max_sequence_identity(x2, seen_fastas=compound_targets)
|
993 |
|
994 |
+
prediction_df[['Max. Sequence Identity to Known Ligand Targets',
|
995 |
'Max. Sequence Identity Ligand Target']] = (
|
996 |
prediction_df['X1^'].parallel_apply(calculate_max_sequence_identity).apply(pd.Series)
|
997 |
)
|
|
|
1206 |
f'<div style="overflow:auto; height: 300px; font-family: Courier !important;">{table_html}</div>')
|
1207 |
|
1208 |
else:
|
1209 |
+
if 'Exclude Pharmacophore 3D' not in opts:
|
1210 |
+
df_html['Pharmacophore'] = df_html['Compound'].parallel_apply(
|
1211 |
+
lambda x: mol_to_pharm3d(x, mode='html') if not pd.isna(x) else x)
|
1212 |
+
|
1213 |
image_zoom_formatter = HTMLTemplateFormatter(template='<div class="image-zoom-viewer"><%= value %></div>')
|
1214 |
uniprot_id_formatter = HTMLTemplateFormatter(
|
1215 |
template='<% if (value == value) { ' # Check if value is not NaN
|
|
|
1230 |
'Interaction Probability': {'type': 'progress', 'max': 1.0, 'legend': True},
|
1231 |
'Compound': image_zoom_formatter,
|
1232 |
'Scaffold': image_zoom_formatter,
|
1233 |
+
'Pharmacophore': {'type': 'executeScriptFormatter'},
|
1234 |
'Target FASTA': {'type': 'textarea', 'width': 60},
|
1235 |
'Target ID': uniprot_id_formatter,
|
1236 |
'Compound ID': pubchem_id_formatter,
|
1237 |
+
'Max. Tanimoto Similarity Target Ligand': uniprot_id_formatter,
|
1238 |
+
'Max. Sequence Identity Ligand Target': pubchem_id_formatter,
|
1239 |
}
|
1240 |
formatters = {**bool_formatters, **float_formatters, **other_formatters}
|
1241 |
|
|
|
1245 |
report_table = pn.widgets.Tabulator(
|
1246 |
df_html, formatters=formatters,
|
1247 |
frozen_columns=[col for col in df_html.columns if col in [
|
1248 |
+
'Target ID', 'Compound ID', 'Compound', 'Scaffold', 'Pharmacophore'
|
1249 |
]],
|
1250 |
disabled=True, sizing_mode='stretch_both', pagination='local', page_size=30)
|
1251 |
|
|
|
2279 |
infer_csv_prompt: gr.Button(visible=False),
|
2280 |
infer_library_prompt: gr.Button(visible=True),
|
2281 |
}
|
|
|
2282 |
case "Upload a CSV file containing paired compound-protein data":
|
2283 |
return {
|
2284 |
pair_upload: gr.Column(visible=True),
|