{ "cells": [ { "cell_type": "code", "execution_count": 18, "id": "2b84eb4e-3f91-4a28-8e4f-322a34a9fb55", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "* Running on local URL: http://127.0.0.1:7877\n", "* Running on public URL: https://a35567ec94eccaf8d1.gradio.live\n", "\n", "This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)\n" ] }, { "data": { "text/html": [ "
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from Bio.PDB import PDBParser, MMCIFParser, MMCIF2Dict, PDBIO\n", "from Bio.PDB.Polypeptide import is_aa\n", "from Bio.SeqUtils import seq1\n", "import gradio as gr\n", "import numpy as np\n", "import os\n", "import requests\n", "from gradio_molecule3d import Molecule3D\n", "from scipy.special import expit\n", "from typing import Optional\n", "\n", "def normalize_scores(scores):\n", " min_score = np.min(scores)\n", " max_score = np.max(scores)\n", " return (scores - min_score) / (max_score - min_score) if max_score > min_score else scores\n", "\n", "def read_mol(pdb_path):\n", " \"\"\"Read PDB file and return its content as a string\"\"\"\n", " with open(pdb_path, 'r') as f:\n", " return f.read()\n", "\n", "def fetch_structure(pdb_id: str, output_dir: str = \".\") -> Optional[str]:\n", " \"\"\"\n", " Fetch the structure file for a given PDB ID. Prioritizes CIF files.\n", " If a structure file already exists locally, it uses that.\n", " \"\"\"\n", " file_path = download_structure(pdb_id, output_dir)\n", " if file_path:\n", " return file_path\n", " else:\n", " return None\n", "\n", "def download_structure(pdb_id: str, output_dir: str) -> Optional[str]:\n", " \"\"\"\n", " Attempt to download the structure file in CIF or PDB format.\n", " Returns the path to the downloaded file, or None if download fails.\n", " \"\"\"\n", " for ext in ['.cif', '.pdb']:\n", " file_path = os.path.join(output_dir, f\"{pdb_id}{ext}\")\n", " if os.path.exists(file_path):\n", " return file_path\n", " url = f\"https://files.rcsb.org/download/{pdb_id}{ext}\"\n", " try:\n", " response = requests.get(url, timeout=10)\n", " if response.status_code == 200:\n", " with open(file_path, 'wb') as f:\n", " f.write(response.content)\n", " return file_path\n", " except Exception as e:\n", " print(f\"Download error for {pdb_id}{ext}: {e}\")\n", " return None\n", "\n", "def convert_cif_to_pdb(cif_path: str, output_dir: str = \".\") -> str:\n", " \"\"\"\n", " Convert a CIF file to PDB format using BioPython and return the PDB file path.\n", " \"\"\"\n", " pdb_path = os.path.join(output_dir, os.path.basename(cif_path).replace('.cif', '.pdb'))\n", " parser = MMCIFParser(QUIET=True)\n", " structure = parser.get_structure('protein', cif_path)\n", " io = PDBIO()\n", " io.set_structure(structure)\n", " io.save(pdb_path)\n", " return pdb_path\n", "\n", "def fetch_pdb(pdb_id):\n", " pdb_path = fetch_structure(pdb_id)\n", " if not pdb_path:\n", " return None\n", " _, ext = os.path.splitext(pdb_path)\n", " if ext == '.cif':\n", " pdb_path = convert_cif_to_pdb(pdb_path)\n", " return pdb_path\n", "\n", "def process_pdb(pdb_id, segment):\n", " # Fetch the PDB or CIF file\n", " pdb_path = fetch_pdb(pdb_id)\n", " if not pdb_path:\n", " return \"Failed to fetch PDB file\", None, None\n", " \n", " # Determine the file format and choose the appropriate parser\n", " _, ext = os.path.splitext(pdb_path)\n", " parser = MMCIFParser(QUIET=True) if ext == '.cif' else PDBParser(QUIET=True)\n", " \n", " try:\n", " # Parse the structure file\n", " structure = parser.get_structure('protein', pdb_path)\n", " except Exception as e:\n", " return f\"Error parsing structure file: {e}\", None, None\n", " \n", " # Extract the specified chain\n", " try:\n", " chain = structure[0][segment]\n", " except KeyError:\n", " return \"Invalid Chain ID\", None, None\n", " \n", " protein_residues = [res for res in chain if is_aa(res)]\n", " sequence = \"\".join(seq1(res.resname) for res in protein_residues)\n", " sequence_id = [res.id[1] for res in protein_residues]\n", " \n", " # Generate random scores for residues\n", " scores = np.random.rand(len(sequence))\n", " normalized_scores = normalize_scores(scores)\n", " \n", " # Zip residues with scores to track the residue ID and score\n", " residue_scores = [(resi, score) for resi, score in zip(sequence_id, normalized_scores)]\n", "\n", " # Generate the result string\n", " result_str = \"\\n\".join([\n", " f\"{res.resname} {res.id[1]} {sequence[i]} {normalized_scores[i]:.2f}\" \n", " for i, res in enumerate(protein_residues)])\n", " \n", " # Save the predictions to a file\n", " prediction_file = f\"{pdb_id}_predictions.txt\"\n", " with open(prediction_file, \"w\") as f:\n", " f.write(result_str)\n", "\n", " _, ext = os.path.splitext(pdb_path)\n", " if ext == '.cif':\n", " pdb_path = convert_cif_to_pdb(pdb_path)\n", "\n", " return result_str, molecule(pdb_path, residue_scores, segment), prediction_file\n", "\n", "def molecule(input_pdb, residue_scores=None, segment='A'):\n", " mol = read_mol(input_pdb) # Read PDB file content\n", " \n", " # Prepare high-scoring residues script if scores are provided\n", " high_score_script = \"\"\n", " if residue_scores is not None:\n", " # Sort residues based on their scores\n", " high_score_residues = [resi for resi, score in residue_scores if score > 0.75]\n", " mid_score_residues = [resi for resi, score in residue_scores if 0.5 < score <= 0.75]\n", " \n", " high_score_script = \"\"\"\n", " // Reset all styles first\n", " viewer.getModel(0).setStyle({}, {});\n", " \n", " // Show only the selected chain\n", " viewer.getModel(0).setStyle(\n", " {\"chain\": \"%s\"}, \n", " { cartoon: {colorscheme:\"whiteCarbon\"} }\n", " );\n", " \n", " // Highlight high-scoring residues only for the selected chain\n", " let highScoreResidues = [%s];\n", " viewer.getModel(0).setStyle(\n", " {\"chain\": \"%s\", \"resi\": highScoreResidues}, \n", " {\"stick\": {\"color\": \"red\"}}\n", " );\n", "\n", " // Highlight medium-scoring residues only for the selected chain\n", " let midScoreResidues = [%s];\n", " viewer.getModel(0).setStyle(\n", " {\"chain\": \"%s\", \"resi\": midScoreResidues}, \n", " {\"stick\": {\"color\": \"orange\"}}\n", " );\n", " \"\"\" % (segment, \n", " \", \".join(str(resi) for resi in high_score_residues),\n", " segment,\n", " \", \".join(str(resi) for resi in mid_score_residues),\n", " segment)\n", " \n", " html_content = f\"\"\"\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", " \n", " \n", " \n", " \"\"\"\n", " \n", " # Return the HTML content within an iframe safely encoded for special characters\n", " return f''\n", "\n", "reps = [\n", " {\n", " \"model\": 0,\n", " \"style\": \"cartoon\",\n", " \"color\": \"whiteCarbon\",\n", " \"residue_range\": \"\",\n", " \"around\": 0,\n", " \"byres\": False,\n", " }\n", "]\n", "\n", "# Gradio UI\n", "with gr.Blocks() as demo:\n", " gr.Markdown(\"# Protein Binding Site Prediction\")\n", " with gr.Row():\n", " pdb_input = gr.Textbox(value=\"4BDU\", label=\"PDB ID\", placeholder=\"Enter PDB ID here...\")\n", " visualize_btn = gr.Button(\"Visualize Structure\")\n", "\n", " molecule_output2 = Molecule3D(label=\"Protein Structure\", reps=reps)\n", "\n", " with gr.Row():\n", " segment_input = gr.Textbox(value=\"A\", label=\"Chain ID\", placeholder=\"Enter Chain ID here...\")\n", " prediction_btn = gr.Button(\"Predict Binding Site\")\n", "\n", " molecule_output = gr.HTML(label=\"Protein Structure\")\n", " predictions_output = gr.Textbox(label=\"Binding Site Predictions\")\n", " download_output = gr.File(label=\"Download Predictions\")\n", " \n", " visualize_btn.click(fetch_pdb, inputs=[pdb_input], outputs=molecule_output2)\n", " \n", " prediction_btn.click(process_pdb, inputs=[pdb_input, segment_input], outputs=[predictions_output, molecule_output, download_output])\n", " \n", " gr.Markdown(\"## Examples\")\n", " gr.Examples(\n", " examples=[\n", " [\"7RPZ\", \"A\"],\n", " [\"2IWI\", \"B\"],\n", " [\"2F6V\", \"A\"]\n", " ],\n", " inputs=[pdb_input, segment_input],\n", " outputs=[predictions_output, molecule_output, download_output]\n", " )\n", "\n", "demo.launch(share=True)" ] }, { "cell_type": "code", "execution_count": 20, "id": "a2f1ca04-7a27-4e4f-b44d-39b20c5d034a", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "* Running on local URL: http://127.0.0.1:7878\n", "* Running on public URL: https://fbfb00e893a2d7c6ae.gradio.live\n", "\n", "This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)\n" ] }, { "data": { "text/html": [ "
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import os\n", "from datetime import datetime\n", "import gradio as gr\n", "import numpy as np\n", "import requests\n", "from Bio.PDB import PDBParser, MMCIFParser, PDBIO\n", "from Bio.PDB.Polypeptide import is_aa\n", "from Bio.SeqUtils import seq1\n", "from gradio_molecule3d import Molecule3D\n", "from typing import Optional, Tuple\n", "\n", "def normalize_scores(scores):\n", " min_score = np.min(scores)\n", " max_score = np.max(scores)\n", " return (scores - min_score) / (max_score - min_score) if max_score > min_score else scores\n", "\n", "def read_mol(pdb_path):\n", " \"\"\"Read PDB file and return its content as a string\"\"\"\n", " with open(pdb_path, 'r') as f:\n", " return f.read()\n", "\n", "def fetch_structure(pdb_id: str, output_dir: str = \".\") -> Optional[str]:\n", " \"\"\"\n", " Fetch the structure file for a given PDB ID. Prioritizes CIF files.\n", " If a structure file already exists locally, it uses that.\n", " \"\"\"\n", " file_path = download_structure(pdb_id, output_dir)\n", " if file_path:\n", " return file_path\n", " else:\n", " return None\n", "\n", "def download_structure(pdb_id: str, output_dir: str) -> Optional[str]:\n", " \"\"\"\n", " Attempt to download the structure file in CIF or PDB format.\n", " Returns the path to the downloaded file, or None if download fails.\n", " \"\"\"\n", " for ext in ['.cif', '.pdb']:\n", " file_path = os.path.join(output_dir, f\"{pdb_id}{ext}\")\n", " if os.path.exists(file_path):\n", " return file_path\n", " url = f\"https://files.rcsb.org/download/{pdb_id}{ext}\"\n", " try:\n", " response = requests.get(url, timeout=10)\n", " if response.status_code == 200:\n", " with open(file_path, 'wb') as f:\n", " f.write(response.content)\n", " return file_path\n", " except Exception as e:\n", " print(f\"Download error for {pdb_id}{ext}: {e}\")\n", " return None\n", "\n", "def convert_cif_to_pdb(cif_path: str, output_dir: str = \".\") -> str:\n", " \"\"\"\n", " Convert a CIF file to PDB format using BioPython and return the PDB file path.\n", " \"\"\"\n", " pdb_path = os.path.join(output_dir, os.path.basename(cif_path).replace('.cif', '.pdb'))\n", " parser = MMCIFParser(QUIET=True)\n", " structure = parser.get_structure('protein', cif_path)\n", " io = PDBIO()\n", " io.set_structure(structure)\n", " io.save(pdb_path)\n", " return pdb_path\n", "\n", "def fetch_pdb(pdb_id):\n", " pdb_path = fetch_structure(pdb_id)\n", " if not pdb_path:\n", " return None\n", " _, ext = os.path.splitext(pdb_path)\n", " if ext == '.cif':\n", " pdb_path = convert_cif_to_pdb(pdb_path)\n", " return pdb_path\n", "\n", "def create_chain_specific_pdb(input_pdb: str, chain_id: str, residue_scores: list) -> str:\n", " \"\"\"\n", " Create a PDB file with only the specified chain and replace B-factor with prediction scores\n", " \"\"\"\n", " # Read the original PDB file\n", " parser = PDBParser(QUIET=True)\n", " structure = parser.get_structure('protein', input_pdb)\n", " \n", " # Prepare a new structure with only the specified chain\n", " new_structure = structure.copy()\n", " for model in new_structure:\n", " # Remove all chains except the specified one\n", " chains_to_remove = [chain for chain in model if chain.id != chain_id]\n", " for chain in chains_to_remove:\n", " model.detach_child(chain.id)\n", " \n", " # Create a modified PDB with scores in B-factor\n", " scores_dict = {resi: score for resi, score in residue_scores}\n", " for model in new_structure:\n", " for chain in model:\n", " for residue in chain:\n", " if residue.id[1] in scores_dict:\n", " for atom in residue:\n", " atom.bfactor = scores_dict[residue.id[1]] #* 100 # Scale score to B-factor range\n", " \n", " # Save the modified structure\n", " output_pdb = f\"{os.path.splitext(input_pdb)[0]}_{chain_id}_scored.pdb\"\n", " io = PDBIO()\n", " io.set_structure(new_structure)\n", " io.save(output_pdb)\n", " \n", " return output_pdb\n", "\n", "def calculate_geometric_center(pdb_path: str, high_score_residues: list, chain_id: str):\n", " \"\"\"\n", " Calculate the geometric center of high-scoring residues\n", " \"\"\"\n", " parser = PDBParser(QUIET=True)\n", " structure = parser.get_structure('protein', pdb_path)\n", " \n", " # Collect coordinates of CA atoms from high-scoring residues\n", " coords = []\n", " for model in structure:\n", " for chain in model:\n", " if chain.id == chain_id:\n", " for residue in chain:\n", " if residue.id[1] in high_score_residues:\n", " if 'CA' in residue: # Use alpha carbon as representative\n", " ca_atom = residue['CA']\n", " coords.append(ca_atom.coord)\n", " \n", " # Calculate geometric center\n", " if coords:\n", " center = np.mean(coords, axis=0)\n", " return center\n", " return None\n", "\n", "def process_pdb(pdb_id_or_file, segment):\n", " # Determine if input is a PDB ID or file path\n", " if pdb_id_or_file.endswith('.pdb'):\n", " pdb_path = pdb_id_or_file\n", " pdb_id = os.path.splitext(os.path.basename(pdb_path))[0]\n", " else:\n", " pdb_id = pdb_id_or_file\n", " pdb_path = fetch_pdb(pdb_id)\n", " \n", " if not pdb_path:\n", " return \"Failed to fetch PDB file\", None, None\n", " \n", " # Determine the file format and choose the appropriate parser\n", " _, ext = os.path.splitext(pdb_path)\n", " parser = MMCIFParser(QUIET=True) if ext == '.cif' else PDBParser(QUIET=True)\n", " \n", " try:\n", " # Parse the structure file\n", " structure = parser.get_structure('protein', pdb_path)\n", " except Exception as e:\n", " return f\"Error parsing structure file: {e}\", None, None\n", " \n", " # Extract the specified chain\n", " try:\n", " chain = structure[0][segment]\n", " except KeyError:\n", " return \"Invalid Chain ID\", None, None\n", " \n", " protein_residues = [res for res in chain if is_aa(res)]\n", " sequence = \"\".join(seq1(res.resname) for res in protein_residues)\n", " sequence_id = [res.id[1] for res in protein_residues]\n", " \n", " # Generate random scores for residues\n", " scores = np.random.rand(len(sequence))\n", " normalized_scores = normalize_scores(scores)\n", " \n", " # Zip residues with scores to track the residue ID and score\n", " residue_scores = [(resi, score) for resi, score in zip(sequence_id, normalized_scores)]\n", "\n", " # Identify high and mid scoring residues\n", " high_score_residues = [resi for resi, score in residue_scores if score > 0.75]\n", " mid_score_residues = [resi for resi, score in residue_scores if 0.5 < score <= 0.75]\n", "\n", " # Calculate geometric center of high-scoring residues\n", " geo_center = calculate_geometric_center(pdb_path, high_score_residues, segment)\n", " pymol_selection = f\"select high_score_residues, resi {'+'.join(map(str, high_score_residues))} and chain {segment}\"\n", " pymol_center_cmd = f\"show spheres, resi {'+'.join(map(str, high_score_residues))} and chain {segment}\" if geo_center is not None else \"\"\n", "\n", " # Generate the result string\n", " current_time = datetime.now().strftime(\"%Y-%m-%d %H:%M:%S\")\n", " result_str = f\"Prediction for PDB: {pdb_id}, Chain: {segment}\\nDate: {current_time}\\n\\n\"\n", " result_str += \"Columns: Residue Name, Residue Number, One-letter Code, Normalized Score\\n\\n\"\n", " result_str += \"\\n\".join([\n", " f\"{res.resname} {res.id[1]} {sequence[i]} {normalized_scores[i]:.2f}\" \n", " for i, res in enumerate(protein_residues)])\n", " \n", " # Create prediction and scored PDB files\n", " prediction_file = f\"{pdb_id}_predictions.txt\"\n", " with open(prediction_file, \"w\") as f:\n", " f.write(result_str)\n", "\n", " # Create chain-specific PDB with scores in B-factor\n", " scored_pdb = create_chain_specific_pdb(pdb_path, segment, residue_scores)\n", "\n", " # Molecule visualization with updated script\n", " mol_vis = molecule(pdb_path, residue_scores, segment)\n", "\n", " # Construct PyMOL command suggestions\n", " pymol_commands = f\"\"\"\n", "PyMOL Visualization Commands:\n", "1. Load PDB: load {os.path.abspath(pdb_path)}\n", "2. Select high-scoring residues: {pymol_selection}\n", "3. Highlight high-scoring residues: show sticks, high_score_residues\n", "{pymol_center_cmd}\n", "\"\"\"\n", " \n", " return result_str + \"\\n\\n\" + pymol_commands, mol_vis, [prediction_file, scored_pdb]\n", "\n", "# molecule() function remains the same as in the previous script, \n", "# but modify the visualization script to ensure cartoon is below stick representations\n", "\n", "def molecule(input_pdb, residue_scores=None, segment='A'):\n", " mol = read_mol(input_pdb) # Read PDB file content\n", " \n", " # Prepare high-scoring residues script if scores are provided\n", " high_score_script = \"\"\n", " if residue_scores is not None:\n", " # Sort residues based on their scores\n", " high_score_residues = [resi for resi, score in residue_scores if score > 0.75]\n", " mid_score_residues = [resi for resi, score in residue_scores if 0.5 < score <= 0.75]\n", " \n", " high_score_script = \"\"\"\n", " // Reset all styles first\n", " viewer.getModel(0).setStyle({}, {});\n", " \n", " // First, set background cartoon style for the entire chain (underneath)\n", " viewer.getModel(0).setStyle(\n", " {\"chain\": \"%s\"}, \n", " { cartoon: {colorscheme:\"whiteCarbon\", opacity:0.7} }\n", " );\n", " \n", " // Highlight high-scoring residues with sticks on top\n", " let highScoreResidues = [%s];\n", " viewer.getModel(0).setStyle(\n", " {\"chain\": \"%s\", \"resi\": highScoreResidues}, \n", " {\"stick\": {\"color\": \"red\", \"opacity\": 1}}\n", " );\n", "\n", " // Highlight medium-scoring residues\n", " let midScoreResidues = [%s];\n", " viewer.getModel(0).setStyle(\n", " {\"chain\": \"%s\", \"resi\": midScoreResidues}, \n", " {\"stick\": {\"color\": \"orange\", \"opacity\": 0.8}}\n", " );\n", " \"\"\" % (segment, \n", " \", \".join(str(resi) for resi in high_score_residues),\n", " segment,\n", " \", \".join(str(resi) for resi in mid_score_residues),\n", " segment)\n", " \n", " # Rest of the molecule() function remains the same as in the previous script\n", " \n", " html_content = f\"\"\"\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", " \n", " \n", " \n", " \"\"\"\n", " \n", " # Return the HTML content within an iframe safely encoded for special characters\n", " return f''\n", "\n", "# Gradio UI\n", "with gr.Blocks() as demo:\n", " gr.Markdown(\"# Protein Binding Site Prediction\")\n", " \n", " with gr.Row():\n", " pdb_input = gr.Textbox(value=\"4BDU\", label=\"PDB ID\", placeholder=\"Enter PDB ID here...\")\n", " file_input = gr.File(label=\"Or Upload PDB File\", file_types=['.pdb'], type=\"filepath\")\n", " visualize_btn = gr.Button(\"Visualize Structure\")\n", "\n", " molecule_output2 = Molecule3D(label=\"Protein Structure\", reps=[\n", " {\n", " \"model\": 0,\n", " \"style\": \"cartoon\",\n", " \"color\": \"whiteCarbon\",\n", " \"residue_range\": \"\",\n", " \"around\": 0,\n", " \"byres\": False,\n", " }\n", " ])\n", "\n", " with gr.Row():\n", " segment_input = gr.Textbox(value=\"A\", label=\"Chain ID\", placeholder=\"Enter Chain ID here...\")\n", " prediction_btn = gr.Button(\"Predict Binding Site\")\n", "\n", " def process_input(pdb_id, uploaded_file):\n", " \"\"\"\n", " Determine whether to use PDB ID or uploaded file\n", " \"\"\"\n", " if uploaded_file and uploaded_file.endswith('.pdb'):\n", " return uploaded_file\n", " return pdb_id\n", "\n", " molecule_output = gr.HTML(label=\"Protein Structure\")\n", " predictions_output = gr.Textbox(label=\"Binding Site Predictions\")\n", " download_output = gr.File(label=\"Download Files\", file_count=\"multiple\")\n", " \n", " prediction_btn.click(\n", " process_pdb, \n", " inputs=[\n", " gr.State(lambda: process_input(pdb_input.value, file_input.value)), \n", " segment_input\n", " ], \n", " outputs=[predictions_output, molecule_output, download_output]\n", " )\n", "\n", " visualize_btn.click(\n", " fetch_pdb, \n", " inputs=[pdb_input], \n", " outputs=molecule_output2\n", " )\n", "\n", " gr.Markdown(\"## Examples\")\n", " gr.Examples(\n", " examples=[\n", " [\"7RPZ\", \"A\"],\n", " [\"2IWI\", \"B\"],\n", " [\"2F6V\", \"A\"]\n", " ],\n", " inputs=[pdb_input, segment_input],\n", " outputs=[predictions_output, molecule_output, download_output]\n", " )\n", "\n", "demo.launch(share=True)" ] }, { "cell_type": "code", "execution_count": 32, "id": "5b266025-7503-48f5-9371-3642d09f7e93", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "* Running on local URL: http://127.0.0.1:7890\n", "* Running on public URL: https://70a6e80d8deb42ddd0.gradio.live\n", "\n", "This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)\n" ] }, { "data": { "text/html": [ "
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import os\n", "from datetime import datetime\n", "import gradio as gr\n", "import numpy as np\n", "import requests\n", "from Bio.PDB import PDBParser, MMCIFParser, PDBIO\n", "from Bio.PDB.Polypeptide import is_aa\n", "from Bio.SeqUtils import seq1\n", "from gradio_molecule3d import Molecule3D\n", "from typing import Optional, Tuple\n", "\n", "def normalize_scores(scores):\n", " min_score = np.min(scores)\n", " max_score = np.max(scores)\n", " return (scores - min_score) / (max_score - min_score) if max_score > min_score else scores\n", "\n", "def read_mol(pdb_path):\n", " \"\"\"Read PDB file and return its content as a string\"\"\"\n", " with open(pdb_path, 'r') as f:\n", " return f.read()\n", "\n", "def fetch_structure(pdb_id: str, output_dir: str = \".\") -> Optional[str]:\n", " \"\"\"\n", " Fetch the structure file for a given PDB ID. Prioritizes CIF files.\n", " If a structure file already exists locally, it uses that.\n", " \"\"\"\n", " file_path = download_structure(pdb_id, output_dir)\n", " if file_path:\n", " return file_path\n", " else:\n", " return None\n", "\n", "def download_structure(pdb_id: str, output_dir: str) -> Optional[str]:\n", " \"\"\"\n", " Attempt to download the structure file in CIF or PDB format.\n", " Returns the path to the downloaded file, or None if download fails.\n", " \"\"\"\n", " for ext in ['.cif', '.pdb']:\n", " file_path = os.path.join(output_dir, f\"{pdb_id}{ext}\")\n", " if os.path.exists(file_path):\n", " return file_path\n", " url = f\"https://files.rcsb.org/download/{pdb_id}{ext}\"\n", " try:\n", " response = requests.get(url, timeout=10)\n", " if response.status_code == 200:\n", " with open(file_path, 'wb') as f:\n", " f.write(response.content)\n", " return file_path\n", " except Exception as e:\n", " print(f\"Download error for {pdb_id}{ext}: {e}\")\n", " return None\n", "\n", "def convert_cif_to_pdb(cif_path: str, output_dir: str = \".\") -> str:\n", " \"\"\"\n", " Convert a CIF file to PDB format using BioPython and return the PDB file path.\n", " \"\"\"\n", " pdb_path = os.path.join(output_dir, os.path.basename(cif_path).replace('.cif', '.pdb'))\n", " parser = MMCIFParser(QUIET=True)\n", " structure = parser.get_structure('protein', cif_path)\n", " io = PDBIO()\n", " io.set_structure(structure)\n", " io.save(pdb_path)\n", " return pdb_path\n", "\n", "def fetch_pdb(pdb_id):\n", " pdb_path = fetch_structure(pdb_id)\n", " if not pdb_path:\n", " return None\n", " _, ext = os.path.splitext(pdb_path)\n", " if ext == '.cif':\n", " pdb_path = convert_cif_to_pdb(pdb_path)\n", " return pdb_path\n", "\n", "def create_chain_specific_pdb(input_pdb: str, chain_id: str, residue_scores: list) -> str:\n", " \"\"\"\n", " Create a PDB file with only the specified chain and replace B-factor with prediction scores\n", " \"\"\"\n", " # Read the original PDB file\n", " parser = PDBParser(QUIET=True)\n", " structure = parser.get_structure('protein', input_pdb)\n", " \n", " # Prepare a new structure with only the specified chain\n", " new_structure = structure.copy()\n", " for model in new_structure:\n", " # Remove all chains except the specified one\n", " chains_to_remove = [chain for chain in model if chain.id != chain_id]\n", " for chain in chains_to_remove:\n", " model.detach_child(chain.id)\n", " \n", " # Create a modified PDB with scores in B-factor\n", " scores_dict = {resi: score for resi, score in residue_scores}\n", " for model in new_structure:\n", " for chain in model:\n", " for residue in chain:\n", " if residue.id[1] in scores_dict:\n", " for atom in residue:\n", " atom.bfactor = scores_dict[residue.id[1]] #* 100 # Scale score to B-factor range\n", " \n", " # Save the modified structure\n", " output_pdb = f\"{os.path.splitext(input_pdb)[0]}_{chain_id}_scored.pdb\"\n", " io = PDBIO()\n", " io.set_structure(new_structure)\n", " io.save(output_pdb)\n", " \n", " return output_pdb\n", "\n", "def calculate_geometric_center(pdb_path: str, high_score_residues: list, chain_id: str):\n", " \"\"\"\n", " Calculate the geometric center of high-scoring residues\n", " \"\"\"\n", " parser = PDBParser(QUIET=True)\n", " structure = parser.get_structure('protein', pdb_path)\n", " \n", " # Collect coordinates of CA atoms from high-scoring residues\n", " coords = []\n", " for model in structure:\n", " for chain in model:\n", " if chain.id == chain_id:\n", " for residue in chain:\n", " if residue.id[1] in high_score_residues:\n", " if 'CA' in residue: # Use alpha carbon as representative\n", " ca_atom = residue['CA']\n", " coords.append(ca_atom.coord)\n", " \n", " # Calculate geometric center\n", " if coords:\n", " center = np.mean(coords, axis=0)\n", " return center\n", " return None\n", "\n", "def process_pdb(pdb_id_or_file, segment):\n", " # Determine if input is a PDB ID or file path\n", " if pdb_id_or_file.endswith('.pdb'):\n", " pdb_path = pdb_id_or_file\n", " pdb_id = os.path.splitext(os.path.basename(pdb_path))[0]\n", " else:\n", " pdb_id = pdb_id_or_file\n", " pdb_path = fetch_pdb(pdb_id)\n", " \n", " if not pdb_path:\n", " return \"Failed to fetch PDB file\", None, None\n", " \n", " # Determine the file format and choose the appropriate parser\n", " _, ext = os.path.splitext(pdb_path)\n", " parser = MMCIFParser(QUIET=True) if ext == '.cif' else PDBParser(QUIET=True)\n", " \n", " try:\n", " # Parse the structure file\n", " structure = parser.get_structure('protein', pdb_path)\n", " except Exception as e:\n", " return f\"Error parsing structure file: {e}\", None, None\n", " \n", " # Extract the specified chain\n", " try:\n", " chain = structure[0][segment]\n", " except KeyError:\n", " return \"Invalid Chain ID\", None, None\n", " \n", " protein_residues = [res for res in chain if is_aa(res)]\n", " sequence = \"\".join(seq1(res.resname) for res in protein_residues)\n", " sequence_id = [res.id[1] for res in protein_residues]\n", " \n", " # Generate random scores for residues\n", " scores = np.random.rand(len(sequence))\n", " normalized_scores = normalize_scores(scores)\n", " \n", " # Zip residues with scores to track the residue ID and score\n", " residue_scores = [(resi, score) for resi, score in zip(sequence_id, normalized_scores)]\n", "\n", " # Identify high and mid scoring residues\n", " high_score_residues = [resi for resi, score in residue_scores if score > 0.75]\n", " mid_score_residues = [resi for resi, score in residue_scores if 0.5 < score <= 0.75]\n", "\n", " # Calculate geometric center of high-scoring residues\n", " geo_center = calculate_geometric_center(pdb_path, high_score_residues, segment)\n", " pymol_selection = f\"select high_score_residues, resi {'+'.join(map(str, high_score_residues))} and chain {segment}\"\n", " pymol_center_cmd = f\"show spheres, resi {'+'.join(map(str, high_score_residues))} and chain {segment}\" if geo_center is not None else \"\"\n", "\n", " # Generate the result string\n", " current_time = datetime.now().strftime(\"%Y-%m-%d %H:%M:%S\")\n", " result_str = f\"Prediction for PDB: {pdb_id}, Chain: {segment}\\nDate: {current_time}\\n\\n\"\n", " result_str += \"Columns: Residue Name, Residue Number, One-letter Code, Normalized Score\\n\\n\"\n", " result_str += \"\\n\".join([\n", " f\"{res.resname} {res.id[1]} {sequence[i]} {normalized_scores[i]:.2f}\" \n", " for i, res in enumerate(protein_residues)])\n", " \n", " # Create prediction and scored PDB files\n", " prediction_file = f\"{pdb_id}_predictions.txt\"\n", " with open(prediction_file, \"w\") as f:\n", " f.write(result_str)\n", "\n", " # Create chain-specific PDB with scores in B-factor\n", " scored_pdb = create_chain_specific_pdb(pdb_path, segment, residue_scores)\n", "\n", " # Molecule visualization with updated script\n", " mol_vis = molecule(pdb_path, residue_scores, segment)\n", "\n", " # Construct PyMOL command suggestions\n", " pymol_commands = f\"\"\"\n", "PyMOL Visualization Commands:\n", "1. Load PDB: load {os.path.abspath(pdb_path)}\n", "2. Select high-scoring residues: {pymol_selection}\n", "3. Highlight high-scoring residues: show sticks, high_score_residues\n", "{pymol_center_cmd}\n", "\"\"\"\n", " \n", " return result_str + \"\\n\\n\" + pymol_commands, mol_vis, [prediction_file, scored_pdb]\n", "\n", "# molecule() function remains the same as in the previous script, \n", "# but modify the visualization script to ensure cartoon is below stick representations\n", "\n", "def molecule(input_pdb, residue_scores=None, segment='A'):\n", " mol = read_mol(input_pdb) # Read PDB file content\n", "\n", " # Prepare high-scoring residues script if scores are provided\n", " high_score_script = \"\"\n", " if residue_scores is not None:\n", " # Filter residues based on their scores\n", " high_score_residues = [resi for resi, score in residue_scores if score > 0.75]\n", " mid_score_residues = [resi for resi, score in residue_scores if 0.5 < score <= 0.75]\n", " \n", " high_score_script = \"\"\"\n", " // Load the original model and apply white cartoon style\n", " let chainModel = viewer.addModel(pdb, \"pdb\");\n", " chainModel.setStyle(\n", " {\"chain\": \"%s\"}, \n", " {\"cartoon\": {\"color\": \"white\"}}\n", " );\n", "\n", " // Create a new model for high-scoring residues and apply red sticks style\n", " let highScoreModel = viewer.addModel(pdb, \"pdb\");\n", " highScoreModel.setStyle(\n", " {\"chain\": \"%s\", \"resi\": [%s]}, \n", " {\"stick\": {\"color\": \"red\"}}\n", " );\n", "\n", " // Create a new model for medium-scoring residues and apply orange sticks style\n", " let midScoreModel = viewer.addModel(pdb, \"pdb\");\n", " midScoreModel.setStyle(\n", " {\"chain\": \"%s\", \"resi\": [%s]}, \n", " {\"stick\": {\"color\": \"orange\"}}\n", " );\n", " \"\"\" % (\n", " segment,\n", " segment,\n", " \", \".join(str(resi) for resi in high_score_residues),\n", " segment,\n", " \", \".join(str(resi) for resi in mid_score_residues)\n", " )\n", " \n", " # Generate the full HTML content\n", " html_content = f\"\"\"\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", " \n", " \n", " \n", " \"\"\"\n", " \n", " # Return the HTML content within an iframe safely encoded for special characters\n", " return f''\n", "\n", "\n", "# Gradio UI\n", "with gr.Blocks() as demo:\n", " gr.Markdown(\"# Protein Binding Site Prediction\")\n", " \n", " with gr.Row():\n", " pdb_input = gr.Textbox(value=\"4BDU\", label=\"PDB ID\", placeholder=\"Enter PDB ID here...\")\n", " visualize_btn = gr.Button(\"Visualize Structure\")\n", "\n", " molecule_output2 = Molecule3D(label=\"Protein Structure\", reps=[\n", " {\n", " \"model\": 0,\n", " \"style\": \"cartoon\",\n", " \"color\": \"whiteCarbon\",\n", " \"residue_range\": \"\",\n", " \"around\": 0,\n", " \"byres\": False,\n", " }\n", " ])\n", "\n", " with gr.Row():\n", " segment_input = gr.Textbox(value=\"A\", label=\"Chain ID\", placeholder=\"Enter Chain ID here...\")\n", " prediction_btn = gr.Button(\"Predict Binding Site\")\n", "\n", "\n", " molecule_output = gr.HTML(label=\"Protein Structure\")\n", " predictions_output = gr.Textbox(label=\"Binding Site Predictions\")\n", " download_output = gr.File(label=\"Download Files\", file_count=\"multiple\")\n", " \n", " prediction_btn.click(\n", " process_pdb, \n", " inputs=[\n", " pdb_input, \n", " segment_input\n", " ], \n", " outputs=[predictions_output, molecule_output, download_output]\n", " )\n", "\n", " visualize_btn.click(\n", " fetch_pdb, \n", " inputs=[pdb_input], \n", " outputs=molecule_output2\n", " )\n", "\n", " gr.Markdown(\"## Examples\")\n", " gr.Examples(\n", " examples=[\n", " [\"7RPZ\", \"A\"],\n", " [\"2IWI\", \"B\"],\n", " [\"2F6V\", \"A\"]\n", " ],\n", " inputs=[pdb_input, segment_input],\n", " outputs=[predictions_output, molecule_output, download_output]\n", " )\n", "\n", "demo.launch(share=True)" ] }, { "cell_type": "code", "execution_count": 39, "id": "514fad12-a31a-495f-af9e-04a18e11175e", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "* Running on local URL: http://127.0.0.1:7897\n", "* Running on public URL: https://0d9b5d36fa5302e0df.gradio.live\n", "\n", "This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)\n" ] }, { "data": { "text/html": [ "
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [] }, "execution_count": 39, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import os\n", "from datetime import datetime\n", "import gradio as gr\n", "import numpy as np\n", "import requests\n", "from Bio.PDB import PDBParser, MMCIFParser, PDBIO\n", "from Bio.PDB.Polypeptide import is_aa\n", "from Bio.SeqUtils import seq1\n", "from gradio_molecule3d import Molecule3D\n", "from typing import Optional, Tuple\n", "\n", "def normalize_scores(scores):\n", " min_score = np.min(scores)\n", " max_score = np.max(scores)\n", " return (scores - min_score) / (max_score - min_score) if max_score > min_score else scores\n", "\n", "def read_mol(pdb_path):\n", " \"\"\"Read PDB file and return its content as a string\"\"\"\n", " with open(pdb_path, 'r') as f:\n", " return f.read()\n", "\n", "def fetch_structure(pdb_id: str, output_dir: str = \".\") -> Optional[str]:\n", " \"\"\"\n", " Fetch the structure file for a given PDB ID. Prioritizes CIF files.\n", " If a structure file already exists locally, it uses that.\n", " \"\"\"\n", " file_path = download_structure(pdb_id, output_dir)\n", " if file_path:\n", " return file_path\n", " else:\n", " return None\n", "\n", "def download_structure(pdb_id: str, output_dir: str) -> Optional[str]:\n", " \"\"\"\n", " Attempt to download the structure file in CIF or PDB format.\n", " Returns the path to the downloaded file, or None if download fails.\n", " \"\"\"\n", " for ext in ['.cif', '.pdb']:\n", " file_path = os.path.join(output_dir, f\"{pdb_id}{ext}\")\n", " if os.path.exists(file_path):\n", " return file_path\n", " url = f\"https://files.rcsb.org/download/{pdb_id}{ext}\"\n", " try:\n", " response = requests.get(url, timeout=10)\n", " if response.status_code == 200:\n", " with open(file_path, 'wb') as f:\n", " f.write(response.content)\n", " return file_path\n", " except Exception as e:\n", " print(f\"Download error for {pdb_id}{ext}: {e}\")\n", " return None\n", "\n", "def convert_cif_to_pdb(cif_path: str, output_dir: str = \".\") -> str:\n", " \"\"\"\n", " Convert a CIF file to PDB format using BioPython and return the PDB file path.\n", " \"\"\"\n", " pdb_path = os.path.join(output_dir, os.path.basename(cif_path).replace('.cif', '.pdb'))\n", " parser = MMCIFParser(QUIET=True)\n", " structure = parser.get_structure('protein', cif_path)\n", " io = PDBIO()\n", " io.set_structure(structure)\n", " io.save(pdb_path)\n", " return pdb_path\n", "\n", "def fetch_pdb(pdb_id):\n", " pdb_path = fetch_structure(pdb_id)\n", " if not pdb_path:\n", " return None\n", " _, ext = os.path.splitext(pdb_path)\n", " if ext == '.cif':\n", " pdb_path = convert_cif_to_pdb(pdb_path)\n", " return pdb_path\n", "\n", "def create_chain_specific_pdb(input_pdb: str, chain_id: str, residue_scores: list) -> str:\n", " \"\"\"\n", " Create a PDB file with only the specified chain and replace B-factor with prediction scores\n", " \"\"\"\n", " # Read the original PDB file\n", " parser = PDBParser(QUIET=True)\n", " structure = parser.get_structure('protein', input_pdb)\n", " \n", " # Prepare a new structure with only the specified chain\n", " new_structure = structure.copy()\n", " for model in new_structure:\n", " # Remove all chains except the specified one\n", " chains_to_remove = [chain for chain in model if chain.id != chain_id]\n", " for chain in chains_to_remove:\n", " model.detach_child(chain.id)\n", " \n", " # Create a modified PDB with scores in B-factor\n", " scores_dict = {resi: score for resi, score in residue_scores}\n", " for model in new_structure:\n", " for chain in model:\n", " for residue in chain:\n", " if residue.id[1] in scores_dict:\n", " for atom in residue:\n", " atom.bfactor = scores_dict[residue.id[1]] #* 100 # Scale score to B-factor range\n", " \n", " # Save the modified structure\n", " output_pdb = f\"{os.path.splitext(input_pdb)[0]}_{chain_id}_scored.pdb\"\n", " io = PDBIO()\n", " io.set_structure(new_structure)\n", " io.save(output_pdb)\n", " \n", " return output_pdb\n", "\n", "def calculate_geometric_center(pdb_path: str, high_score_residues: list, chain_id: str):\n", " \"\"\"\n", " Calculate the geometric center of high-scoring residues\n", " \"\"\"\n", " parser = PDBParser(QUIET=True)\n", " structure = parser.get_structure('protein', pdb_path)\n", " \n", " # Collect coordinates of CA atoms from high-scoring residues\n", " coords = []\n", " for model in structure:\n", " for chain in model:\n", " if chain.id == chain_id:\n", " for residue in chain:\n", " if residue.id[1] in high_score_residues:\n", " if 'CA' in residue: # Use alpha carbon as representative\n", " ca_atom = residue['CA']\n", " coords.append(ca_atom.coord)\n", " \n", " # Calculate geometric center\n", " if coords:\n", " center = np.mean(coords, axis=0)\n", " return center\n", " return None\n", "\n", "def process_pdb(pdb_id_or_file, segment):\n", " # Determine if input is a PDB ID or file path\n", " if pdb_id_or_file.endswith('.pdb'):\n", " pdb_path = pdb_id_or_file\n", " pdb_id = os.path.splitext(os.path.basename(pdb_path))[0]\n", " else:\n", " pdb_id = pdb_id_or_file\n", " pdb_path = fetch_pdb(pdb_id)\n", " \n", " if not pdb_path:\n", " return \"Failed to fetch PDB file\", None, None\n", " \n", " # Determine the file format and choose the appropriate parser\n", " _, ext = os.path.splitext(pdb_path)\n", " parser = MMCIFParser(QUIET=True) if ext == '.cif' else PDBParser(QUIET=True)\n", " \n", " try:\n", " # Parse the structure file\n", " structure = parser.get_structure('protein', pdb_path)\n", " except Exception as e:\n", " return f\"Error parsing structure file: {e}\", None, None\n", " \n", " # Extract the specified chain\n", " try:\n", " chain = structure[0][segment]\n", " except KeyError:\n", " return \"Invalid Chain ID\", None, None\n", " \n", " protein_residues = [res for res in chain if is_aa(res)]\n", " sequence = \"\".join(seq1(res.resname) for res in protein_residues)\n", " sequence_id = [res.id[1] for res in protein_residues]\n", " \n", " # Generate random scores for residues\n", " scores = np.random.rand(len(sequence))\n", " normalized_scores = normalize_scores(scores)\n", " \n", " # Zip residues with scores to track the residue ID and score\n", " residue_scores = [(resi, score) for resi, score in zip(sequence_id, normalized_scores)]\n", "\n", " # Identify high and mid scoring residues\n", " high_score_residues = [resi for resi, score in residue_scores if score > 0.75]\n", " mid_score_residues = [resi for resi, score in residue_scores if 0.5 < score <= 0.75]\n", "\n", " # Calculate geometric center of high-scoring residues\n", " geo_center = calculate_geometric_center(pdb_path, high_score_residues, segment)\n", " pymol_selection = f\"select high_score_residues, resi {'+'.join(map(str, high_score_residues))} and chain {segment}\"\n", " pymol_center_cmd = f\"show spheres, resi {'+'.join(map(str, high_score_residues))} and chain {segment}\" if geo_center is not None else \"\"\n", "\n", " # Generate the result string\n", " current_time = datetime.now().strftime(\"%Y-%m-%d %H:%M:%S\")\n", " result_str = f\"Prediction for PDB: {pdb_id}, Chain: {segment}\\nDate: {current_time}\\n\\n\"\n", " result_str += \"Columns: Residue Name, Residue Number, One-letter Code, Normalized Score\\n\\n\"\n", " result_str += \"\\n\".join([\n", " f\"{res.resname} {res.id[1]} {sequence[i]} {normalized_scores[i]:.2f}\" \n", " for i, res in enumerate(protein_residues)])\n", " \n", " # Create prediction and scored PDB files\n", " prediction_file = f\"{pdb_id}_predictions.txt\"\n", " with open(prediction_file, \"w\") as f:\n", " f.write(result_str)\n", "\n", " # Create chain-specific PDB with scores in B-factor\n", " scored_pdb = create_chain_specific_pdb(pdb_path, segment, residue_scores)\n", "\n", " # Molecule visualization with updated script\n", " mol_vis = molecule(pdb_path, residue_scores, segment)\n", "\n", " # Construct PyMOL command suggestions\n", " pymol_commands = f\"\"\"\n", "PyMOL Visualization Commands:\n", "1. Load PDB: load {os.path.abspath(pdb_path)}\n", "2. Select high-scoring residues: {pymol_selection}\n", "3. Highlight high-scoring residues: show sticks, high_score_residues\n", "{pymol_center_cmd}\n", "\"\"\"\n", " \n", " return result_str + \"\\n\\n\" + pymol_commands, mol_vis, [prediction_file, scored_pdb]\n", "\n", "def molecule(input_pdb, residue_scores=None, segment='A'):\n", " mol = read_mol(input_pdb) # Read PDB file content\n", "\n", " # Prepare high-scoring residues script if scores are provided\n", " high_score_script = \"\"\n", " if residue_scores is not None:\n", " # Filter residues based on their scores\n", " high_score_residues = [resi for resi, score in residue_scores if score > 0.75]\n", " mid_score_residues = [resi for resi, score in residue_scores if 0.5 < score <= 0.75]\n", " \n", " high_score_script = \"\"\"\n", " // Load the original model and apply white cartoon style\n", " let chainModel = viewer.addModel(pdb, \"pdb\");\n", " chainModel.setStyle({}, {});\n", " chainModel.setStyle(\n", " {\"chain\": \"%s\"}, \n", " {\"cartoon\": {\"color\": \"white\"}}\n", " );\n", "\n", " // Create a new model for high-scoring residues and apply red sticks style\n", " let highScoreModel = viewer.addModel(pdb, \"pdb\");\n", " highScoreModel.setStyle({}, {});\n", " highScoreModel.setStyle(\n", " {\"chain\": \"%s\", \"resi\": [%s]}, \n", " {\"stick\": {\"color\": \"red\"}}\n", " );\n", "\n", " // Create a new model for medium-scoring residues and apply orange sticks style\n", " let midScoreModel = viewer.addModel(pdb, \"pdb\");\n", " highScormidScoreModeleModel.setStyle({}, {});\n", " midScoreModel.setStyle(\n", " {\"chain\": \"%s\", \"resi\": [%s]}, \n", " {\"stick\": {\"color\": \"orange\"}}\n", " );\n", " \"\"\" % (\n", " segment,\n", " segment,\n", " \", \".join(str(resi) for resi in high_score_residues),\n", " segment,\n", " \", \".join(str(resi) for resi in mid_score_residues)\n", " )\n", " \n", " # Generate the full HTML content\n", " html_content = f\"\"\"\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", " \n", " \n", " \n", " \"\"\"\n", " \n", " # Return the HTML content within an iframe safely encoded for special characters\n", " return f''\n", "\n", "\n", "# Gradio UI\n", "with gr.Blocks() as demo:\n", " gr.Markdown(\"# Protein Binding Site Prediction\")\n", " \n", " with gr.Row():\n", " pdb_input = gr.Textbox(value=\"4BDU\", label=\"PDB ID\", placeholder=\"Enter PDB ID here...\")\n", " visualize_btn = gr.Button(\"Visualize Structure\")\n", "\n", " molecule_output2 = Molecule3D(label=\"Protein Structure\", reps=[\n", " {\n", " \"model\": 0,\n", " \"style\": \"cartoon\",\n", " \"color\": \"whiteCarbon\",\n", " \"residue_range\": \"\",\n", " \"around\": 0,\n", " \"byres\": False,\n", " }\n", " ])\n", "\n", " with gr.Row():\n", " segment_input = gr.Textbox(value=\"A\", label=\"Chain ID\", placeholder=\"Enter Chain ID here...\")\n", " prediction_btn = gr.Button(\"Predict Binding Site\")\n", "\n", "\n", " molecule_output = gr.HTML(label=\"Protein Structure\")\n", " predictions_output = gr.Textbox(label=\"Binding Site Predictions\")\n", " download_output = gr.File(label=\"Download Files\", file_count=\"multiple\")\n", " \n", " prediction_btn.click(\n", " process_pdb, \n", " inputs=[\n", " pdb_input, \n", " segment_input\n", " ], \n", " outputs=[predictions_output, molecule_output, download_output]\n", " )\n", "\n", " visualize_btn.click(\n", " fetch_pdb, \n", " inputs=[pdb_input], \n", " outputs=molecule_output2\n", " )\n", "\n", " gr.Markdown(\"## Examples\")\n", " gr.Examples(\n", " examples=[\n", " [\"7RPZ\", \"A\"],\n", " [\"2IWI\", \"B\"],\n", " [\"2F6V\", \"A\"]\n", " ],\n", " inputs=[pdb_input, segment_input],\n", " outputs=[predictions_output, molecule_output, download_output]\n", " )\n", "\n", "demo.launch(share=True)" ] }, { "cell_type": "code", "execution_count": null, "id": "2f960cc2-8330-40f1-b54d-693ce922fa74", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "cec41eef-c414-440f-a0ea-63fc8d3acf0b", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python (LLM)", "language": "python", "name": "llm" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.7" } }, "nbformat": 4, "nbformat_minor": 5 }