Spaces:
Sleeping
Sleeping
from datetime import datetime | |
from email.mime.multipart import MIMEMultipart | |
from email.mime.text import MIMEText | |
from email.utils import formatdate, make_msgid | |
from functools import cache | |
import os | |
from pathlib import Path | |
import smtplib | |
import sys | |
import tempfile | |
import pandas as pd | |
from bokeh.models import NumberFormatter, BooleanFormatter, HTMLTemplateFormatter | |
import gradio as gr | |
import pytz | |
import panel as pn | |
import seaborn as sns | |
from markdown import markdown | |
from rdkit import Chem, RDConfig | |
from rdkit.Chem import PandasTools, Crippen, Descriptors, rdMolDescriptors, Lipinski, rdmolops | |
import requests | |
from app import static | |
sys.path.append(str(Path(RDConfig.RDContribDir) / 'SA_Score')) | |
import sascorer | |
def lipinski(mol): | |
""" | |
Lipinski's rules: | |
Hydrogen bond donors <= 5 | |
Hydrogen bond acceptors <= 10 | |
Molecular weight <= 500 daltons | |
logP <= 5 | |
""" | |
return ( | |
Lipinski.NumHDonors(mol) <= 5 and | |
Lipinski.NumHAcceptors(mol) <= 10 and | |
Descriptors.MolWt(mol) <= 500 and | |
Crippen.MolLogP(mol) <= 5 | |
) | |
def reos(mol): | |
""" | |
Rapid Elimination Of Swill filter: | |
Molecular weight between 200 and 500 | |
LogP between -5.0 and +5.0 | |
H-bond donor count between 0 and 5 | |
H-bond acceptor count between 0 and 10 | |
Formal charge between -2 and +2 | |
Rotatable bond count between 0 and 8 | |
Heavy atom count between 15 and 50 | |
""" | |
return ( | |
200 <= Descriptors.MolWt(mol) <= 500 and | |
-5.0 <= Crippen.MolLogP(mol) <= 5.0 and | |
0 <= Lipinski.NumHDonors(mol) <= 5 and | |
0 <= Lipinski.NumHAcceptors(mol) <= 10 and | |
-2 <= rdmolops.GetFormalCharge(mol) <= 2 and | |
0 <= rdMolDescriptors.CalcNumRotatableBonds(mol) <= 8 and | |
15 <= rdMolDescriptors.CalcNumHeavyAtoms(mol) <= 50 | |
) | |
def ghose(mol): | |
""" | |
Ghose drug like filter: | |
Molecular weight between 160 and 480 | |
LogP between -0.4 and +5.6 | |
Atom count between 20 and 70 | |
Molar refractivity between 40 and 130 | |
""" | |
return ( | |
160 <= Descriptors.MolWt(mol) <= 480 and | |
-0.4 <= Crippen.MolLogP(mol) <= 5.6 and | |
20 <= rdMolDescriptors.CalcNumAtoms(mol) <= 70 and | |
40 <= Crippen.MolMR(mol) <= 130 | |
) | |
def veber(mol): | |
""" | |
The Veber filter is a rule of thumb filter for orally active drugs described in | |
Veber et al., J Med Chem. 2002; 45(12): 2615-23.: | |
Rotatable bonds <= 10 | |
Topological polar surface area <= 140 | |
""" | |
return ( | |
rdMolDescriptors.CalcNumRotatableBonds(mol) <= 10 and | |
rdMolDescriptors.CalcTPSA(mol) <= 140 | |
) | |
def rule_of_three(mol): | |
""" | |
Rule of Three filter (Congreve et al., Drug Discov. Today. 8 (19): 876–7, (2003).): | |
Molecular weight <= 300 | |
LogP <= 3 | |
H-bond donor <= 3 | |
H-bond acceptor count <= 3 | |
Rotatable bond count <= 3 | |
""" | |
return ( | |
Descriptors.MolWt(mol) <= 300 and | |
Crippen.MolLogP(mol) <= 3 and | |
Lipinski.NumHDonors(mol) <= 3 and | |
Lipinski.NumHAcceptors(mol) <= 3 and | |
rdMolDescriptors.CalcNumRotatableBonds(mol) <= 3 | |
) | |
def load_smarts_patterns(smarts_path): | |
# Load the CSV file containing SMARTS patterns | |
smarts_df = pd.read_csv(Path(smarts_path)) | |
# Convert all SMARTS patterns to molecules | |
smarts_mols = [Chem.MolFromSmarts(smarts) for smarts in smarts_df['smarts']] | |
return smarts_mols | |
def smarts_filter(mol, smarts_mols): | |
for smarts_mol in smarts_mols: | |
if smarts_mol is not None and mol.HasSubstructMatch(smarts_mol): | |
return False | |
return True | |
def pains(mol): | |
smarts_mols = load_smarts_patterns("data/filters/pains.csv") | |
return smarts_filter(mol, smarts_mols) | |
def mlsmr(mol): | |
smarts_mols = load_smarts_patterns("data/filters/mlsmr.csv") | |
return smarts_filter(mol, smarts_mols) | |
def dundee(mol): | |
smarts_mols = load_smarts_patterns("data/filters/dundee.csv") | |
return smarts_filter(mol, smarts_mols) | |
def glaxo(mol): | |
smarts_mols = load_smarts_patterns("data/filters/glaxo.csv") | |
return smarts_filter(mol, smarts_mols) | |
def bms(mol): | |
smarts_mols = load_smarts_patterns("data/filters/bms.csv") | |
return smarts_filter(mol, smarts_mols) | |
SCORE_MAP = { | |
'SAscore': sascorer.calculateScore, | |
'LogP': Crippen.MolLogP, | |
'Molecular Weight': Descriptors.MolWt, | |
'Number of Atoms': rdMolDescriptors.CalcNumAtoms, | |
'Number of Heavy Atoms': rdMolDescriptors.CalcNumHeavyAtoms, | |
'Molar Refractivity': Crippen.MolMR, | |
'H-Bond Donor Count': Lipinski.NumHDonors, | |
'H-Bond Acceptor Count': Lipinski.NumHAcceptors, | |
'Rotatable Bond Count': rdMolDescriptors.CalcNumRotatableBonds, | |
'Topological Polar Surface Area': rdMolDescriptors.CalcTPSA, | |
} | |
FILTER_MAP = { | |
# TODO support number_of_violations | |
'REOS': reos, | |
"Lipinski's Rule of Five": lipinski, | |
'Ghose': ghose, | |
'Rule of Three': rule_of_three, | |
'Veber': veber, | |
'PAINS': pains, | |
'MLSMR': mlsmr, | |
'Dundee': dundee, | |
'Glaxo': glaxo, | |
'BMS': bms, | |
} | |
def validate_columns(df, mandatory_cols): | |
missing_cols = [col for col in mandatory_cols if col not in df.columns] | |
if missing_cols: | |
error_message = (f"The following mandatory columns are missing " | |
f"in the uploaded dataset: {str(mandatory_cols).strip('[]')}.") | |
raise ValueError(error_message) | |
else: | |
return | |
def get_timezone_by_ip(ip, session): | |
try: | |
data = session.get(f'https://worldtimeapi.org/api/ip/{ip}').json() | |
return data['timezone'] | |
except Exception: | |
return 'UTC' | |
def ts_to_str(timestamp, timezone): | |
# Create a timezone-aware datetime object from the UNIX timestamp | |
dt = datetime.fromtimestamp(timestamp, pytz.utc) | |
# Convert the timezone-aware datetime object to the target timezone | |
target_timezone = pytz.timezone(timezone) | |
localized_dt = dt.astimezone(target_timezone) | |
# Format the datetime object to the specified string format | |
return localized_dt.strftime('%Y-%m-%d %H:%M:%S (%Z%z)') | |
def send_email(job_info): | |
if job_info.get('email'): | |
try: | |
email_info = job_info.copy() | |
email_serv = os.getenv('EMAIL_SERV') | |
email_port = os.getenv('EMAIL_PORT') | |
email_addr = os.getenv('EMAIL_ADDR') | |
email_pass = os.getenv('EMAIL_PASS') | |
email_form = os.getenv('EMAIL_FORM') | |
email_subj = os.getenv('EMAIL_SUBJ') | |
for key, value in email_info.items(): | |
if key.endswith("time") and value: | |
email_info[key] = ts_to_str(value, get_timezone_by_ip(email_info['ip'])) | |
server = smtplib.SMTP(email_serv, int(email_port)) | |
# server.starttls() | |
server.login(email_addr, email_pass) | |
msg = MIMEMultipart("alternative") | |
msg["From"] = email_addr | |
msg["To"] = email_info['email'] | |
msg["Subject"] = email_subj.format(**email_info) | |
msg["Date"] = formatdate(localtime=True) | |
msg["Message-ID"] = make_msgid() | |
msg.attach(MIMEText(markdown(email_form.format(**email_info)), 'html')) | |
msg.attach(MIMEText(email_form.format(**email_info), 'plain')) | |
server.sendmail(email_addr, email_info['email'], msg.as_string()) | |
server.quit() | |
gr.Info('Email notification sent.') | |
except Exception as e: | |
gr.Warning('Failed to send email notification due to error: ' + str(e)) | |
def read_molecule(path): | |
if path.endswith('.pdb'): | |
return Chem.MolFromPDBFile(path, sanitize=False, removeHs=True) | |
if path.endswith('.pdr'): | |
return open(path, 'r').read() | |
elif path.endswith('.mol'): | |
return Chem.MolFromMolFile(path, sanitize=False, removeHs=True) | |
elif path.endswith('.mol2'): | |
return Chem.MolFromMol2File(path, sanitize=False, removeHs=True) | |
elif path.endswith('.sdf'): | |
return Chem.SDMolSupplier(path, sanitize=False, removeHs=True)[0] | |
raise Exception('Unknown file extension') | |
def read_molecule_file(in_file, allowed_extentions): | |
if isinstance(in_file, str): | |
path = in_file | |
else: | |
path = in_file.name | |
extension = path.split('.')[-1] | |
if extension not in allowed_extentions: | |
msg = static.INVALID_FORMAT_MSG.format(extension=extension) | |
return None, None, msg | |
try: | |
mol = read_molecule(path) | |
except Exception as e: | |
e = str(e).replace('\'', '') | |
msg = static.ERROR_FORMAT_MSG.format(message=e) | |
return None, None, msg | |
if extension in 'pdb': | |
content = Chem.MolToPDBBlock(mol) | |
elif extension in ['mol', 'mol2', 'sdf']: | |
content = Chem.MolToMolBlock(mol, kekulize=False) | |
extension = 'mol' | |
else: | |
raise NotImplementedError | |
return content, extension, None | |
def show_target(in_protein): | |
molecule, extension, html = read_molecule_file(in_protein, allowed_extentions=['pdb']) | |
if molecule is not None: | |
html = static.TARGET_RENDERING_TEMPLATE.format(molecule=molecule, fmt=extension) | |
return static.IFRAME_TEMPLATE.format(html=html) | |
def show_complex(complex_path): | |
protein_complex, extension, html = read_molecule_file(complex_path, allowed_extentions=['pdb']) | |
if protein_complex is not None: | |
html = static.COMPLEX_RENDERING_TEMPLATE.format(complex=protein_complex, fmt=extension) | |
return static.IFRAME_TEMPLATE.format(html=html) | |
# def create_complex_view_html( | |
# complex_path, pocket_path_dict=None, | |
# interactive_ligands=True, interactive_pockets=True | |
# ): | |
# """Generates HTML for complex visualization.""" | |
# model_i = -1 | |
# viewer_models = "" | |
# if complex_path: | |
# complex_data, extension, html = read_molecule_file(complex_path, allowed_extentions=['pdb']) | |
# viewer_models += f'viewer.addModel(`{complex_data}`, "pdb");' | |
# model_i += 1 | |
# viewer_models += f"viewer.getModel({model_i}).setStyle({{ hetflag: false }}, proteinStyle);" | |
# viewer_models += f"viewer.getModel({model_i}).setStyle({{ hetflag: true }}, ligandStyle);" | |
# if interactive_ligands: | |
# # return ligand residue info when the ligand is clicked | |
# viewer_models += f""" | |
# let selectedLigand = null; | |
# viewer.getModel({model_i}).setClickable( | |
# {{ hetflag: true, byres: true }}, | |
# true, | |
# function (_atom, _viewer, _event, _container) {{ | |
# let currentLigand = {{ resn: _atom.resn, chain: _atom.chain, resi: _atom.resi }}; | |
# | |
# if (selectedLigand === currentLigand) {{ | |
# // Deselect ligand | |
# selectedLigand = null; | |
# _viewer.setStyle( | |
# {{ resn: _atom.resn, chain: _atom.chain, resi: _atom.resi }}, | |
# ligandStyle | |
# ); | |
# console.log("Deselected Residue:", currentLigand); | |
# window.parent.postMessage({{ | |
# name: "ligand_selection", | |
# data: {{ residue: currentLigand, add: false }} | |
# }}, "*"); | |
# }} else {{ | |
# // Select ligand and deselect previous | |
# if (selectedLigand) {{ | |
# _viewer.setStyle( | |
# {{ | |
# resn: selectedLigand.resn, | |
# chain: selectedLigand.chain, | |
# resi: selectedLigand.resi | |
# }}, | |
# ligandStyle | |
# ); | |
# }} | |
# selectedLigand = currentLigand; | |
# _viewer.setStyle( | |
# {{ resn: _atom.resn, chain: _atom.chain, resi: _atom.resi }}, | |
# {{ stick: {{ color: "red", radius: 0.4}} }} | |
# ); | |
# console.log("Selected Residue:", currentLigand); | |
# window.parent.postMessage({{ | |
# name: "ligand_selection", | |
# data: {{ residue: currentLigand, add: true }} | |
# }}, "*"); | |
# }} | |
# _viewer.render(); | |
# }} | |
# ); | |
# """ | |
# if pocket_path_dict: | |
# pocket_data_dict = {k: open(v, 'r').read() for k, v in pocket_path_dict.items()} | |
# for pocket_name, pocket_data in pocket_data_dict.items(): | |
# viewer_models += f'viewer.addModel(`{pocket_data}`, "pqr");' | |
# model_i += 1 | |
# viewer_models += f'viewer.getModel({model_i}).setStyle(pocketStyle);' | |
# if interactive_pockets: | |
# # return the pocket name when the pocket is clicked | |
# viewer_models += f""" | |
# let selectedPocket = null; | |
# viewer.getModel({model_i}).setClickable( | |
# {{ byres: true }}, | |
# true, | |
# function (_atom, _viewer, _event, _container) {{ | |
# let currentPocket = "{pocket_name}"; | |
# | |
# if (selectedPocket == currentPocket) {{ | |
# // Deselect pocket | |
# selectedPocket = null; | |
# _viewer.getModel({model_i}).setStyle( pocketStyle ); | |
# console.log("Deselected Pocket:", currentPocket); | |
# window.parent.postMessage({{ | |
# name: "pocket_selection", | |
# data: {{ pocket: currentPocket, add: false }} | |
# }}, "*"); | |
# }} else {{ | |
# // Select pocket and deselect previous | |
# if (selectedPocket) {{ | |
# _viewer.getModel(selectedPocket).setStyle( pocketStyle ); | |
# }} | |
# selectedPocket = currentPocket; | |
# _viewer.getModel({model_i}).setStyle( | |
# {{ sphere: {{ color: "red", opacity: 0.9}} }} | |
# ); | |
# console.log("Selected Pocket:", currentPocket); | |
# window.parent.postMessage({{ | |
# name: "pocket_selection", | |
# data: {{ pocket: currentPocket, add: true }} | |
# }}, "*"); | |
# }} | |
# _viewer.render(); | |
# }} | |
# ); | |
# """ | |
# | |
# html = static.COMPLEX_RENDERING_TEMPLATE.format(viewer_models=viewer_models) | |
# return static.IFRAME_TEMPLATE.format(html=html) | |
def create_result_table_html(summary_df, opts=(), progress=gr.Progress(track_tqdm=True)): | |
html_df = summary_df.copy() | |
column_aliases = { | |
'ID1': 'Compound ID', | |
'ID2': 'Target ID', | |
'X1': 'Compound SMILES', | |
'ligand_conf_path': 'Pose', | |
'output_path': 'Pose' | |
} | |
# drop any columns ending with '_path' | |
hidden_cols = [col for col in html_df.columns if col.endswith('_path')] | |
html_df.rename(columns=column_aliases, inplace=True) | |
if 'Compound' in html_df.columns and 'Exclude Molecular Graph' not in opts: | |
html_df['Compound'] = html_df['Compound'].apply(PandasTools.PrintAsImageString) | |
else: | |
html_df.drop(['Compound'], axis=1, inplace=True) | |
# if 'Scaffold' in html_df.columns and 'Exclude Scaffold Graph' not in opts: | |
# html_df['Scaffold'] = html_df['Scaffold'].parallel_apply( | |
# lambda x: PandasTools.PrintAsImageString(x) if not pd.isna(x) else x) | |
# else: | |
# html_df.drop(['Scaffold'], axis=1, inplace=True) | |
# html_df.index.name = 'Index' | |
num_cols = html_df.select_dtypes('number').columns | |
num_col_colors = sns.color_palette('husl', len(num_cols)) | |
bool_cols = html_df.select_dtypes(bool).columns | |
image_zoom_formatter = HTMLTemplateFormatter(template='<div class="image-zoom-viewer"><%= value %></div>') | |
uniprot_id_formatter = HTMLTemplateFormatter( | |
template='<% if (value == value) { ' # Check if value is not NaN | |
'if (/^[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}$/.test(value)) ' | |
# Check if value is a valid UniProt ID | |
'{ %><a href="https://www.uniprot.org/uniprotkb/<%= value %>" target="_blank"><%= value %></a><% ' | |
# Else treat it as a sequence or other plain-text string, line-warping every 60 characters | |
'} else { %><div style="white-space: pre-wrap;"><%= value.match(/.{1,60}/g).join("<br>") ' | |
'%></div><% } %><% } else { %><% } %>' # Output empty string if value is NaN | |
) | |
pubchem_id_formatter = HTMLTemplateFormatter( | |
template='<% if (value == value) { ' # Check if value is not NaN | |
'%><a href="https://pubchem.ncbi.nlm.nih.gov/#query=<%= value %>" ' | |
'target="_blank"><%= value %></a>' | |
'<% } else { %><% } %>' # Output empty string if value is NaN | |
) | |
bool_formatters = {col: BooleanFormatter() for col in bool_cols} | |
float_formatters = {col: NumberFormatter(format='0.000') for col in html_df.select_dtypes('floating').columns} | |
other_formatters = { | |
'Compound': image_zoom_formatter, | |
# 'Scaffold': image_zoom_formatter, | |
# 'Target FASTA': {'type': 'textarea', 'width': 60}, | |
'Target ID': uniprot_id_formatter, | |
# 'Compound ID': pubchem_id_formatter, ## TODO: add link to click for adding mol to the viewer | |
'Pose': {'type': 'molDisplayButtonFormatter'}, | |
} | |
formatters = {**bool_formatters, **float_formatters, **other_formatters} | |
# html = df.to_html(file) | |
# return html | |
report_table = pn.widgets.Tabulator( | |
html_df, formatters=formatters, | |
frozen_columns=['Compound ID', 'Compound'], | |
hidden_columns=hidden_cols, | |
disabled=True, sizing_mode='stretch_both', pagination='local', page_size=10 | |
) | |
for i, col in enumerate(num_cols): | |
cmap = sns.light_palette(num_col_colors[i], as_cmap=True) | |
cmap.set_bad(color='white') | |
report_table.style.background_gradient( | |
subset=html_df.columns == col, cmap=cmap) | |
# TODO change this to use commonn substructures | |
pie_charts = {} | |
# for y in html_df.columns.intersection(['Interaction Probability', 'Binding Affinity (IC50 [nM])']): | |
# for category in categories: | |
# pie_charts[y][category] = [] | |
# for k in [10, 30, 100]: | |
# if k < len(html_df): | |
# pie_charts[y][category].append(create_pie_chart(html_df, category=category, value=y, top_k=k)) | |
# else: | |
# pie_charts[y][category].append(create_pie_chart(html_df, category=category, value=y, top_k=len(html_df))) | |
# break | |
# # Add 'All' tab regardless of the prediction dataset size | |
# # pie_charts[y].append(create_pie_chart(html_df, category=category, value=y, top_k=len(html_df))) | |
# | |
# # Remove key-value pairs with an empty list | |
# pie_charts[y] = {k: v for k, v in pie_charts[y].items() if any(v)} | |
# pie_charts = {k: v for k, v in pie_charts.items() if any(v)} | |
panel_css = """ | |
.tabulator { | |
font-family: Courier New !important; | |
font-weight: normal !important; | |
font-size: 12px !important; | |
} | |
.tabulator-cell { | |
overflow: visible !important; | |
align-content: center !important; | |
} | |
.tabulator-cell:hover { | |
z-index: 1000 !important; | |
} | |
.image-zoom-viewer { | |
display: inline-block; | |
overflow: visible; | |
z-index: 1000; | |
} | |
.image-zoom-viewer::after { | |
content: ""; | |
top: 0; | |
left: 0; | |
width: 100%; | |
height: 100%; | |
pointer-events: none; | |
} | |
.image-zoom-viewer:hover::after { | |
pointer-events: all; | |
} | |
/* When hovering over the container, scale its child (the SVG) */ | |
.tabulator-cell:hover .image-zoom-viewer svg { | |
padding: 3px; | |
position: absolute; | |
background-color: rgba(250, 250, 250, 0.854); | |
box-shadow: 0 0 10px rgba(0, 0, 0, 0.618); | |
border-radius: 3px; | |
transform: scale(3); /* Scale up the SVG */ | |
transition: transform 0.3s ease; | |
pointer-events: none; /* Prevents the SVG from blocking mouse interactions */ | |
z-index: 1000; | |
} | |
""" | |
pn.extension( | |
raw_css=[panel_css], | |
js_files={'panel_custom': 'app/panel.js'}, | |
# js_modules={'3Dmol': 'static/3Dmol-min.js'}, | |
inline=True, | |
) | |
template = pn.template.VanillaTemplate( | |
sidebar=[], | |
header=False, | |
busy_indicator=None, | |
) | |
# stats_pane = pn.Column() | |
# if pie_charts: | |
# for score_name, figure_dict in pie_charts.items(): | |
# score_row = pn.Row() | |
# for category, figure_list in figure_dict.items(): | |
# score_row.append( | |
# pn.Column(f'### {category} by Top {score_name}', pn.Tabs(*figure_list, tabs_location='above')), | |
# # pn.Card(pn.Row(v), title=f'{category} by Top {k}') | |
# ) | |
# stats_pane.append( | |
# score_row | |
# ) | |
# | |
# if stats_pane: | |
# template.main.append( | |
# pn.Card(stats_pane, sizing_mode='stretch_width', title='Summary Statistics', margin=10) | |
# ) | |
template.main.append( | |
pn.Card(report_table, title=f'GenFBDD Results', # width=1200, | |
margin=10) | |
) | |
with tempfile.TemporaryDirectory() as tmpdir: | |
file = Path(tmpdir) / 'report.html' | |
template.save(file) | |
html_str = file.read_text() | |
iframe_html = static.IFRAME_TEMPLATE.format(html=html_str) | |
return iframe_html | |
def pdb_query(query, method): | |
"""Downloads protein structure data or searches FASTA sequence.""" | |
gr.Info(f'Querying protein by {method}...') | |
try: | |
if method == 'PDB ID': | |
url = f"https://files.rcsb.org/download/{query}.pdb" | |
return download_file(url) | |
elif method == 'UniProt ID': | |
pdb_ids = uniprot_to_pdb(query) | |
if pdb_ids: | |
# Download the first associated PDB file | |
return download_file(f"https://files.rcsb.org/download/{pdb_ids[0]}.pdb") | |
else: | |
raise ValueError(f"No PDB IDs found for UniProt ID: {query}") | |
elif method == 'FASTA Sequence': | |
pdb_ids = fasta_to_pdb(query) | |
if pdb_ids: | |
# Download the first associated PDB file | |
return download_file(f"https://files.rcsb.org/download/{pdb_ids[0]}.pdb") | |
else: | |
raise ValueError("No PDB IDs found for the provided FASTA sequence.") | |
else: | |
raise ValueError(f"Unsupported method: {method}") | |
except Exception as e: | |
gr.Warning(f"Error downloading PDB file: {e}") | |
return None | |
def download_file(url): | |
"""Downloads a small file to a temporary location, preserving its filename.""" | |
try: | |
response = requests.get(url) | |
response.raise_for_status() | |
filename = Path(url).name | |
temp_dir = Path(tempfile.gettempdir()) | |
temp_path = temp_dir / filename | |
temp_path.write_bytes(response.content) | |
return str(temp_path) | |
except Exception as e: | |
gr.Error(f"Download error: {e}") | |
return None | |
def uniprot_to_pdb(uniprot_id): | |
"""Queries the RCSB PDB API to find PDB entities associated with a UniProt ID.""" | |
base_url = "https://search.rcsb.org/rcsbsearch/v2/query" | |
query_payload = { | |
"query": { | |
"type": "group", | |
"logical_operator": "and", | |
"nodes": [ | |
{ | |
"type": "terminal", | |
"service": "text", | |
"parameters": { | |
"operator": "exact_match", | |
"value": uniprot_id, | |
"attribute": "rcsb_polymer_entity_container_identifiers.reference_sequence_identifiers.database_accession" | |
} | |
}, | |
{ | |
"type": "terminal", | |
"service": "text", | |
"parameters": { | |
"operator": "exact_match", | |
"value": "UniProt", | |
"attribute": "rcsb_polymer_entity_container_identifiers.reference_sequence_identifiers.database_name" | |
} | |
} | |
] | |
}, | |
"return_type": "entry" | |
} | |
try: | |
# Send POST request with JSON payload | |
response = requests.post(base_url, json=query_payload) | |
response.raise_for_status() | |
data = response.json() | |
return [entry["identifier"] for entry in data.get("result_set", [])] | |
except Exception as e: | |
print(f"Error querying UniProt ID: {e}") | |
return [] | |
def fasta_to_pdb(fasta_sequence): | |
"""Queries the RCSB PDB API to find PDB IDs associated with a FASTA sequence.""" | |
base_url = "https://search.rcsb.org/rcsbsearch/v2/query" | |
query_payload = { | |
"query": { | |
"type": "terminal", | |
"service": "sequence", | |
"parameters": { | |
"evalue_cutoff": 1, | |
"identity_cutoff": 0.9, | |
"sequence_type": "protein", | |
"value": fasta_sequence | |
} | |
}, | |
"request_options": { | |
"scoring_strategy": "sequence" | |
}, | |
"return_type": "entry" | |
} | |
try: | |
# Send POST request with JSON payload | |
response = requests.post(base_url, json=query_payload) | |
response.raise_for_status() | |
data = response.json() | |
return [entry["identifier"] for entry in data.get("result_set", [])] | |
except Exception as e: | |
print(f"Error querying FASTA sequence: {e}") | |
return [] | |