ThorbenFroehlking commited on
Commit
64f6421
·
1 Parent(s): 66f964e
.ipynb_checkpoints/app-checkpoint.py CHANGED
@@ -234,7 +234,7 @@ reps = [
234
 
235
  # Gradio UI
236
  with gr.Blocks() as demo:
237
- gr.Markdown("# Protein Binding Site Prediction (Random Scores)")
238
  with gr.Row():
239
  pdb_input = gr.Textbox(value="2IWI", label="PDB ID", placeholder="Enter PDB ID here...")
240
  visualize_btn = gr.Button("Visualize Structure")
@@ -244,7 +244,7 @@ with gr.Blocks() as demo:
244
  with gr.Row():
245
  #pdb_input = gr.Textbox(value="2IWI", label="PDB ID", placeholder="Enter PDB ID here...")
246
  segment_input = gr.Textbox(value="A", label="Chain ID", placeholder="Enter Chain ID here...")
247
- prediction_btn = gr.Button("Predict Random Binding Site Scores")
248
 
249
  molecule_output = gr.HTML(label="Protein Structure")
250
  predictions_output = gr.Textbox(label="Binding Site Predictions")
 
234
 
235
  # Gradio UI
236
  with gr.Blocks() as demo:
237
+ gr.Markdown("# Protein Binding Site Prediction")
238
  with gr.Row():
239
  pdb_input = gr.Textbox(value="2IWI", label="PDB ID", placeholder="Enter PDB ID here...")
240
  visualize_btn = gr.Button("Visualize Structure")
 
244
  with gr.Row():
245
  #pdb_input = gr.Textbox(value="2IWI", label="PDB ID", placeholder="Enter PDB ID here...")
246
  segment_input = gr.Textbox(value="A", label="Chain ID", placeholder="Enter Chain ID here...")
247
+ prediction_btn = gr.Button("Predict Binding Site")
248
 
249
  molecule_output = gr.HTML(label="Protein Structure")
250
  predictions_output = gr.Textbox(label="Binding Site Predictions")
.ipynb_checkpoints/test-checkpoint.ipynb CHANGED
@@ -2,15 +2,15 @@
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
- "execution_count": 5,
6
- "id": "d2208d17-47b6-4ff1-b6b6-ba09a9d490c7",
7
  "metadata": {},
8
  "outputs": [
9
  {
10
  "name": "stdout",
11
  "output_type": "stream",
12
  "text": [
13
- "* Running on local URL: http://127.0.0.1:7864\n",
14
  "\n",
15
  "To create a public link, set `share=True` in `launch()`.\n"
16
  ]
@@ -18,7 +18,7 @@
18
  {
19
  "data": {
20
  "text/html": [
21
- "<div><iframe src=\"http://127.0.0.1:7864/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
22
  ],
23
  "text/plain": [
24
  "<IPython.core.display.HTML object>"
@@ -31,7 +31,7 @@
31
  "data": {
32
  "text/plain": []
33
  },
34
- "execution_count": 5,
35
  "metadata": {},
36
  "output_type": "execute_result"
37
  }
@@ -145,33 +145,9 @@
145
  "source": []
146
  },
147
  {
148
- "cell_type": "code",
149
- "execution_count": 4,
150
- "id": "a1088e14-f09c-48ff-8632-cc4685306d7c",
151
  "metadata": {},
152
- "outputs": [
153
- {
154
- "name": "stdout",
155
- "output_type": "stream",
156
- "text": [
157
- "* Running on local URL: http://127.0.0.1:7863\n",
158
- "\n",
159
- "To create a public link, set `share=True` in `launch()`.\n"
160
- ]
161
- },
162
- {
163
- "data": {
164
- "text/html": [
165
- "<div><iframe src=\"http://127.0.0.1:7863/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
166
- ],
167
- "text/plain": [
168
- "<IPython.core.display.HTML object>"
169
- ]
170
- },
171
- "metadata": {},
172
- "output_type": "display_data"
173
- }
174
- ],
175
  "source": [
176
  "import gradio as gr\n",
177
  "from gradio_molecule3d import Molecule3D\n",
@@ -248,17 +224,409 @@
248
  "outputs": [],
249
  "source": []
250
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
251
  {
252
  "cell_type": "code",
253
- "execution_count": 8,
254
- "id": "cdf7fd26-0464-40d9-9107-71c29dbcaef8",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
255
  "metadata": {},
256
  "outputs": [
257
  {
258
  "name": "stdout",
259
  "output_type": "stream",
260
  "text": [
261
- "* Running on local URL: http://127.0.0.1:7867\n",
262
  "\n",
263
  "To create a public link, set `share=True` in `launch()`.\n"
264
  ]
@@ -266,7 +634,7 @@
266
  {
267
  "data": {
268
  "text/html": [
269
- "<div><iframe src=\"http://127.0.0.1:7867/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
270
  ],
271
  "text/plain": [
272
  "<IPython.core.display.HTML object>"
@@ -279,49 +647,24 @@
279
  "data": {
280
  "text/plain": []
281
  },
282
- "execution_count": 8,
283
  "metadata": {},
284
  "output_type": "execute_result"
285
- },
286
- {
287
- "name": "stderr",
288
- "output_type": "stream",
289
- "text": [
290
- "/var/folders/tm/ym2tckv54b96ws82y3b7cqhh0000gn/T/ipykernel_11794/4072855226.py:39: MatplotlibDeprecationWarning: The get_cmap function was deprecated in Matplotlib 3.7 and will be removed in 3.11. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap()`` or ``pyplot.get_cmap()`` instead.\n",
291
- " colors = [cm.get_cmap('coolwarm')(score)[:3] for score in normalized_scores]\n",
292
- "Traceback (most recent call last):\n",
293
- " File \"/Users/thorben_froehlking/anaconda3/envs/LLM/lib/python3.12/site-packages/gradio/queueing.py\", line 622, in process_events\n",
294
- " response = await route_utils.call_process_api(\n",
295
- " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
296
- " File \"/Users/thorben_froehlking/anaconda3/envs/LLM/lib/python3.12/site-packages/gradio/route_utils.py\", line 323, in call_process_api\n",
297
- " output = await app.get_blocks().process_api(\n",
298
- " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
299
- " File \"/Users/thorben_froehlking/anaconda3/envs/LLM/lib/python3.12/site-packages/gradio/blocks.py\", line 2024, in process_api\n",
300
- " data = await self.postprocess_data(block_fn, result[\"prediction\"], state)\n",
301
- " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
302
- " File \"/Users/thorben_froehlking/anaconda3/envs/LLM/lib/python3.12/site-packages/gradio/blocks.py\", line 1830, in postprocess_data\n",
303
- " prediction_value = block.postprocess(prediction_value)\n",
304
- " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
305
- " File \"/Users/thorben_froehlking/anaconda3/envs/LLM/lib/python3.12/site-packages/gradio_molecule3d/molecule3d.py\", line 210, in postprocess\n",
306
- " orig_name=Path(file).name,\n",
307
- " ^^^^^^^^^^\n",
308
- " File \"/Users/thorben_froehlking/anaconda3/envs/LLM/lib/python3.12/pathlib.py\", line 1162, in __init__\n",
309
- " super().__init__(*args)\n",
310
- " File \"/Users/thorben_froehlking/anaconda3/envs/LLM/lib/python3.12/pathlib.py\", line 373, in __init__\n",
311
- " raise TypeError(\n",
312
- "TypeError: argument should be a str or an os.PathLike object where __fspath__ returns a str, not 'dict'\n"
313
- ]
314
  }
315
  ],
316
  "source": [
317
  "import gradio as gr\n",
318
  "import requests\n",
319
  "from Bio.PDB import PDBParser\n",
320
- "from gradio_molecule3d import Molecule3D\n",
321
  "import numpy as np\n",
322
- "from matplotlib import cm\n",
 
 
 
 
 
 
323
  "\n",
324
- "# Function to fetch a PDB file from RCSB PDB\n",
325
  "def fetch_pdb(pdb_id):\n",
326
  " pdb_url = f'https://files.rcsb.org/download/{pdb_id}.pdb'\n",
327
  " pdb_path = f'{pdb_id}.pdb'\n",
@@ -333,78 +676,129 @@
333
  " else:\n",
334
  " return None\n",
335
  "\n",
336
- "# Function to process the PDB file and return random predictions\n",
337
  "def process_pdb(pdb_id, segment):\n",
338
  " pdb_path = fetch_pdb(pdb_id)\n",
339
  " if not pdb_path:\n",
340
- " return \"Failed to fetch PDB file\", None, None, None\n",
341
- "\n",
342
  " parser = PDBParser(QUIET=True)\n",
343
  " structure = parser.get_structure('protein', pdb_path)\n",
344
- "\n",
345
  " try:\n",
346
  " chain = structure[0][segment]\n",
347
  " except KeyError:\n",
348
- " return \"Invalid Chain ID\", None, None, None\n",
349
- "\n",
350
  " sequence = [residue.get_resname() for residue in chain if residue.id[0] == ' ']\n",
351
  " random_scores = np.random.rand(len(sequence))\n",
352
- "\n",
353
- " # Normalize scores for coloring (0 = blue, 1 = red)\n",
354
- " normalized_scores = (random_scores - np.min(random_scores)) / (np.max(random_scores) - np.min(random_scores))\n",
355
- " colors = [cm.get_cmap('coolwarm')(score)[:3] for score in normalized_scores]\n",
356
- " hex_colors = [f'#{int(r*255):02x}{int(g*255):02x}{int(b*255):02x}' for r, g, b in colors]\n",
357
- "\n",
358
- " # Result string and representation\n",
359
  " result_str = \"\\n\".join(\n",
360
  " f\"{seq} {res.id[1]} {score:.2f}\" \n",
361
  " for seq, res, score in zip(sequence, chain, random_scores)\n",
362
  " )\n",
363
- "\n",
364
- " # Representation for the protein structure\n",
365
- " reps = [\n",
366
- " {\n",
367
- " \"model\": 0,\n",
368
- " \"style\": \"cartoon\",\n",
369
- " \"color\": \"whiteCarbon\"\n",
370
- " }\n",
371
- " ] + [\n",
372
- " {\n",
373
- " \"model\": 0,\n",
374
- " \"style\": \"cartoon\",\n",
375
- " \"residue_index\": i,\n",
376
- " \"color\": color\n",
377
- " }\n",
378
- " for i, color in enumerate(hex_colors)\n",
379
- " ]\n",
380
- "\n",
381
  " # Save the predictions to a file\n",
382
  " prediction_file = f\"{pdb_id}_predictions.txt\"\n",
383
  " with open(prediction_file, \"w\") as f:\n",
384
  " f.write(result_str)\n",
385
  " \n",
386
- " return result_str, reps, prediction_file\n",
387
  "\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
388
  "# Gradio UI\n",
389
  "with gr.Blocks() as demo:\n",
390
  " gr.Markdown(\"# Protein Binding Site Prediction (Random Scores)\")\n",
391
- "\n",
392
  " with gr.Row():\n",
393
  " pdb_input = gr.Textbox(value=\"2IWI\", label=\"PDB ID\", placeholder=\"Enter PDB ID here...\")\n",
394
  " segment_input = gr.Textbox(value=\"A\", label=\"Chain ID\", placeholder=\"Enter Chain ID here...\")\n",
395
  " visualize_btn = gr.Button(\"Visualize Structure\")\n",
 
 
 
 
 
 
 
396
  " prediction_btn = gr.Button(\"Predict Random Binding Site Scores\")\n",
397
  "\n",
398
- " molecule_output = Molecule3D(label=\"Protein Structure\", reps=reps)\n",
399
  " predictions_output = gr.Textbox(label=\"Binding Site Predictions\")\n",
400
  " download_output = gr.File(label=\"Download Predictions\")\n",
401
- "\n",
402
- " prediction_btn.click(\n",
403
- " fn=process_pdb,\n",
404
- " inputs=[pdb_input, segment_input],\n",
405
- " outputs=[predictions_output, molecule_output, download_output]\n",
406
- " )\n",
407
- "\n",
 
 
 
 
408
  " gr.Markdown(\"## Examples\")\n",
409
  " gr.Examples(\n",
410
  " examples=[\n",
@@ -422,7 +816,7 @@
422
  {
423
  "cell_type": "code",
424
  "execution_count": null,
425
- "id": "ee215c16-a1fb-450f-bb93-37aaee6fb3f1",
426
  "metadata": {},
427
  "outputs": [],
428
  "source": []
 
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
+ "execution_count": 3,
6
+ "id": "1f8ea359-674c-4263-9c2a-7a8e7e464249",
7
  "metadata": {},
8
  "outputs": [
9
  {
10
  "name": "stdout",
11
  "output_type": "stream",
12
  "text": [
13
+ "* Running on local URL: http://127.0.0.1:7862\n",
14
  "\n",
15
  "To create a public link, set `share=True` in `launch()`.\n"
16
  ]
 
18
  {
19
  "data": {
20
  "text/html": [
21
+ "<div><iframe src=\"http://127.0.0.1:7862/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
22
  ],
23
  "text/plain": [
24
  "<IPython.core.display.HTML object>"
 
31
  "data": {
32
  "text/plain": []
33
  },
34
+ "execution_count": 3,
35
  "metadata": {},
36
  "output_type": "execute_result"
37
  }
 
145
  "source": []
146
  },
147
  {
148
+ "cell_type": "raw",
149
+ "id": "88affe12-7c48-4bd6-9e46-32cdffa729fe",
 
150
  "metadata": {},
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
  "source": [
152
  "import gradio as gr\n",
153
  "from gradio_molecule3d import Molecule3D\n",
 
224
  "outputs": [],
225
  "source": []
226
  },
227
+ {
228
+ "cell_type": "raw",
229
+ "id": "2b970adb-3152-427f-bb58-b92974ff406e",
230
+ "metadata": {},
231
+ "source": [
232
+ "import gradio as gr\n",
233
+ "import os\n",
234
+ "import requests\n",
235
+ "from Bio.PDB import PDBParser, PDBIO\n",
236
+ "import biotite.structure.io as bsio\n",
237
+ "\n",
238
+ "def read_mol(pdb_path):\n",
239
+ " \"\"\"Read PDB file and return its content as a string\"\"\"\n",
240
+ " with open(pdb_path, 'r') as f:\n",
241
+ " return f.read()\n",
242
+ "\n",
243
+ "# Function to fetch or upload the PDB file\n",
244
+ "def get_pdb(pdb_code=\"\", filepath=\"\"):\n",
245
+ " if pdb_code and len(pdb_code) == 4:\n",
246
+ " pdb_file = f\"{pdb_code}.pdb\"\n",
247
+ " if not os.path.exists(pdb_file):\n",
248
+ " os.system(f\"wget -qnc https://files.rcsb.org/view/{pdb_code}.pdb\")\n",
249
+ " return pdb_file\n",
250
+ " elif filepath is not None:\n",
251
+ " return filepath\n",
252
+ " else:\n",
253
+ " return None\n",
254
+ "\n",
255
+ "def molecule(input_pdb):\n",
256
+ " mol = read_mol(input_pdb) # Read PDB file content\n",
257
+ " \n",
258
+ " html_content = f\"\"\"\n",
259
+ " <!DOCTYPE html>\n",
260
+ " <html>\n",
261
+ " <head> \n",
262
+ " <meta http-equiv=\"content-type\" content=\"text/html; charset=UTF-8\" />\n",
263
+ " <style>\n",
264
+ " .mol-container {{\n",
265
+ " width: 100%;\n",
266
+ " height: 700px;\n",
267
+ " position: relative;\n",
268
+ " }}\n",
269
+ " </style>\n",
270
+ " <script src=\"https://cdnjs.cloudflare.com/ajax/libs/jquery/3.6.3/jquery.min.js\"></script>\n",
271
+ " <script src=\"https://3Dmol.csb.pitt.edu/build/3Dmol-min.js\"></script>\n",
272
+ " </head>\n",
273
+ " <body>\n",
274
+ " <div id=\"container\" class=\"mol-container\"></div>\n",
275
+ " <script>\n",
276
+ " let pdb = `{mol}`; // Use template literal to properly escape PDB content\n",
277
+ " $(document).ready(function () {{\n",
278
+ " let element = $(\"#container\");\n",
279
+ " let config = {{ backgroundColor: \"white\" }};\n",
280
+ " let viewer = $3Dmol.createViewer(element, config);\n",
281
+ " viewer.addModel(pdb, \"pdb\");\n",
282
+ " viewer.getModel(0).setStyle({{}}, {{ cartoon: {{ colorscheme:\"whiteCarbon\" }} }});\n",
283
+ " viewer.zoomTo();\n",
284
+ " viewer.render();\n",
285
+ " viewer.zoom(0.8, 2000);\n",
286
+ " }});\n",
287
+ " </script>\n",
288
+ " </body>\n",
289
+ " </html>\n",
290
+ " \"\"\"\n",
291
+ " \n",
292
+ " # Return the HTML content within an iframe safely encoded for special characters\n",
293
+ " return f'<iframe width=\"100%\" height=\"700\" srcdoc=\"{html_content.replace(chr(34), \"&quot;\").replace(chr(39), \"&#39;\")}\"></iframe>'\n",
294
+ "\n",
295
+ "# Gradio function to update the visualization\n",
296
+ "def update(inp, file):\n",
297
+ " pdb_path = get_pdb(inp, file)\n",
298
+ " if pdb_path:\n",
299
+ " return molecule(pdb_path)\n",
300
+ " else:\n",
301
+ " return \"Invalid input. Please provide a valid PDB code or upload a PDB file.\"\n",
302
+ "\n",
303
+ "# Gradio UI\n",
304
+ "demo = gr.Blocks()\n",
305
+ "with demo:\n",
306
+ " gr.Markdown(\"# PDB Viewer using 3Dmol.js\")\n",
307
+ " with gr.Row():\n",
308
+ " with gr.Column():\n",
309
+ " inp = gr.Textbox(\n",
310
+ " placeholder=\"PDB Code or upload file below\", label=\"Input structure\"\n",
311
+ " )\n",
312
+ " file = gr.File(file_count=\"single\")\n",
313
+ " btn = gr.Button(\"View structure\")\n",
314
+ " mol = gr.HTML()\n",
315
+ " btn.click(fn=update, inputs=[inp, file], outputs=mol)\n",
316
+ "\n",
317
+ "# Launch the Gradio interface \n",
318
+ "demo.launch(debug=True)"
319
+ ]
320
+ },
321
  {
322
  "cell_type": "code",
323
+ "execution_count": null,
324
+ "id": "ee215c16-a1fb-450f-bb93-37aaee6fb3f1",
325
+ "metadata": {},
326
+ "outputs": [],
327
+ "source": []
328
+ },
329
+ {
330
+ "cell_type": "raw",
331
+ "id": "050aa2e8-2dbe-4a28-8692-58ca7c50fccd",
332
+ "metadata": {},
333
+ "source": [
334
+ "import gradio as gr\n",
335
+ "import os\n",
336
+ "import requests\n",
337
+ "import numpy as np\n",
338
+ "from Bio.PDB import PDBParser\n",
339
+ "\n",
340
+ "def read_mol(pdb_path):\n",
341
+ " \"\"\"Read PDB file and return its content as a string\"\"\"\n",
342
+ " with open(pdb_path, 'r') as f:\n",
343
+ " return f.read()\n",
344
+ "\n",
345
+ "# Function to fetch a PDB file from RCSB PDB\n",
346
+ "def fetch_pdb(pdb_id):\n",
347
+ " pdb_url = f'https://files.rcsb.org/download/{pdb_id}.pdb'\n",
348
+ " pdb_path = f'{pdb_id}.pdb'\n",
349
+ " response = requests.get(pdb_url)\n",
350
+ " if response.status_code == 200:\n",
351
+ " with open(pdb_path, 'wb') as f:\n",
352
+ " f.write(response.content)\n",
353
+ " return molecule(pdb_path)\n",
354
+ " else:\n",
355
+ " return None\n",
356
+ "\n",
357
+ "# Function to process the PDB file and return random predictions\n",
358
+ "def process_pdb(pdb_id, segment):\n",
359
+ " pdb_path = fetch_pdb(pdb_id)\n",
360
+ " if not pdb_path:\n",
361
+ " return \"Failed to fetch PDB file\", None, None\n",
362
+ " \n",
363
+ " parser = PDBParser(QUIET=True)\n",
364
+ " structure = parser.get_structure('protein', pdb_path)\n",
365
+ " \n",
366
+ " try:\n",
367
+ " chain = structure[0][segment]\n",
368
+ " except KeyError:\n",
369
+ " return \"Invalid Chain ID\", None, None\n",
370
+ " \n",
371
+ " sequence = [residue.get_resname() for residue in chain if residue.id[0] == ' ']\n",
372
+ " random_scores = np.random.rand(len(sequence))\n",
373
+ " result_str = \"\\n\".join(\n",
374
+ " f\"{seq} {res.id[1]} {score:.2f}\" \n",
375
+ " for seq, res, score in zip(sequence, chain, random_scores)\n",
376
+ " )\n",
377
+ " \n",
378
+ " # Save the predictions to a file\n",
379
+ " prediction_file = f\"{pdb_id}_predictions.txt\"\n",
380
+ " with open(prediction_file, \"w\") as f:\n",
381
+ " f.write(result_str)\n",
382
+ " \n",
383
+ " return result_str, molecule(pdb_path), prediction_file\n",
384
+ "\n",
385
+ "def molecule(input_pdb):\n",
386
+ " mol = read_mol(input_pdb) # Read PDB file content\n",
387
+ " \n",
388
+ " html_content = f\"\"\"\n",
389
+ " <!DOCTYPE html>\n",
390
+ " <html>\n",
391
+ " <head> \n",
392
+ " <meta http-equiv=\"content-type\" content=\"text/html; charset=UTF-8\" />\n",
393
+ " <style>\n",
394
+ " .mol-container {{\n",
395
+ " width: 100%;\n",
396
+ " height: 700px;\n",
397
+ " position: relative;\n",
398
+ " }}\n",
399
+ " </style>\n",
400
+ " <script src=\"https://cdnjs.cloudflare.com/ajax/libs/jquery/3.6.3/jquery.min.js\"></script>\n",
401
+ " <script src=\"https://3Dmol.csb.pitt.edu/build/3Dmol-min.js\"></script>\n",
402
+ " </head>\n",
403
+ " <body>\n",
404
+ " <div id=\"container\" class=\"mol-container\"></div>\n",
405
+ " <script>\n",
406
+ " let pdb = `{mol}`; // Use template literal to properly escape PDB content\n",
407
+ " $(document).ready(function () {{\n",
408
+ " let element = $(\"#container\");\n",
409
+ " let config = {{ backgroundColor: \"white\" }};\n",
410
+ " let viewer = $3Dmol.createViewer(element, config);\n",
411
+ " viewer.addModel(pdb, \"pdb\");\n",
412
+ " \n",
413
+ " // Set cartoon representation with white carbon color scheme\n",
414
+ " viewer.getModel(0).setStyle({{}}, {{ cartoon: {{ colorscheme:\"whiteCarbon\" }} }});\n",
415
+ " \n",
416
+ " // Highlight specific histidine residues in red stick representation\n",
417
+ " viewer.getModel(0).setStyle(\n",
418
+ " {{\"resn\": \"HIS\"}}, \n",
419
+ " {{\"stick\": {{\"color\": \"red\"}}}}\n",
420
+ " );\n",
421
+ " \n",
422
+ " viewer.zoomTo();\n",
423
+ " viewer.render();\n",
424
+ " viewer.zoom(0.8, 2000);\n",
425
+ " }});\n",
426
+ " </script>\n",
427
+ " </body>\n",
428
+ " </html>\n",
429
+ " \"\"\"\n",
430
+ " \n",
431
+ " # Return the HTML content within an iframe safely encoded for special characters\n",
432
+ " return f'<iframe width=\"100%\" height=\"700\" srcdoc=\"{html_content.replace(chr(34), \"&quot;\").replace(chr(39), \"&#39;\")}\"></iframe>'\n",
433
+ "\n",
434
+ "# Gradio UI\n",
435
+ "with gr.Blocks() as demo:\n",
436
+ " gr.Markdown(\"# Protein Binding Site Prediction (Random Scores)\")\n",
437
+ " with gr.Row():\n",
438
+ " pdb_input = gr.Textbox(value=\"2IWI\", label=\"PDB ID\", placeholder=\"Enter PDB ID here...\")\n",
439
+ " segment_input = gr.Textbox(value=\"A\", label=\"Chain ID\", placeholder=\"Enter Chain ID here...\")\n",
440
+ " visualize_btn = gr.Button(\"Visualize Structure\")\n",
441
+ " prediction_btn = gr.Button(\"Predict Random Binding Site Scores\")\n",
442
+ " \n",
443
+ " # Use HTML output instead of Molecule3D\n",
444
+ " molecule_output = gr.HTML(label=\"Protein Structure\")\n",
445
+ " predictions_output = gr.Textbox(label=\"Binding Site Predictions\")\n",
446
+ " download_output = gr.File(label=\"Download Predictions\")\n",
447
+ " \n",
448
+ " visualize_btn.click(fetch_pdb, inputs=[pdb_input], outputs=molecule_output)\n",
449
+ " prediction_btn.click(process_pdb, inputs=[pdb_input, segment_input], outputs=[predictions_output, molecule_output, download_output])\n",
450
+ " \n",
451
+ " gr.Markdown(\"## Examples\")\n",
452
+ " gr.Examples(\n",
453
+ " examples=[\n",
454
+ " [\"2IWI\", \"A\"],\n",
455
+ " [\"7RPZ\", \"B\"],\n",
456
+ " [\"3TJN\", \"C\"]\n",
457
+ " ],\n",
458
+ " inputs=[pdb_input, segment_input],\n",
459
+ " outputs=[predictions_output, molecule_output, download_output]\n",
460
+ " )\n",
461
+ "\n",
462
+ "demo.launch(debug=True)"
463
+ ]
464
+ },
465
+ {
466
+ "cell_type": "code",
467
+ "execution_count": null,
468
+ "id": "9a5facd9-855c-4b35-8dd3-2c0c8c7dd356",
469
+ "metadata": {},
470
+ "outputs": [],
471
+ "source": []
472
+ },
473
+ {
474
+ "cell_type": "raw",
475
+ "id": "a762170f-92a9-473d-b18d-53607a780e3b",
476
+ "metadata": {},
477
+ "source": [
478
+ "import gradio as gr\n",
479
+ "import requests\n",
480
+ "from Bio.PDB import PDBParser\n",
481
+ "import numpy as np\n",
482
+ "import os\n",
483
+ "\n",
484
+ "def read_mol(pdb_path):\n",
485
+ " \"\"\"Read PDB file and return its content as a string\"\"\"\n",
486
+ " with open(pdb_path, 'r') as f:\n",
487
+ " return f.read()\n",
488
+ "\n",
489
+ "# Function to fetch a PDB file from RCSB PDB\n",
490
+ "def fetch_pdb(pdb_id):\n",
491
+ " pdb_url = f'https://files.rcsb.org/download/{pdb_id}.pdb'\n",
492
+ " pdb_path = f'{pdb_id}.pdb'\n",
493
+ " response = requests.get(pdb_url)\n",
494
+ " if response.status_code == 200:\n",
495
+ " with open(pdb_path, 'wb') as f:\n",
496
+ " f.write(response.content)\n",
497
+ " return pdb_path\n",
498
+ " else:\n",
499
+ " return None\n",
500
+ "\n",
501
+ "# Function to process the PDB file and return random predictions\n",
502
+ "def process_pdb(pdb_id, segment):\n",
503
+ " pdb_path = fetch_pdb(pdb_id)\n",
504
+ " if not pdb_path:\n",
505
+ " return \"Failed to fetch PDB file\", None, None\n",
506
+ " parser = PDBParser(QUIET=True)\n",
507
+ " structure = parser.get_structure('protein', pdb_path)\n",
508
+ " \n",
509
+ " try:\n",
510
+ " chain = structure[0][segment]\n",
511
+ " except KeyError:\n",
512
+ " return \"Invalid Chain ID\", None, None\n",
513
+ " sequence = [residue.get_resname() for residue in chain if residue.id[0] == ' ']\n",
514
+ " random_scores = np.random.rand(len(sequence))\n",
515
+ " result_str = \"\\n\".join(\n",
516
+ " f\"{seq} {res.id[1]} {score:.2f}\" \n",
517
+ " for seq, res, score in zip(sequence, chain, random_scores)\n",
518
+ " )\n",
519
+ " # Save the predictions to a file\n",
520
+ " prediction_file = f\"{pdb_id}_predictions.txt\"\n",
521
+ " with open(prediction_file, \"w\") as f:\n",
522
+ " f.write(result_str)\n",
523
+ " \n",
524
+ " return result_str, molecule(pdb_path), prediction_file\n",
525
+ "\n",
526
+ "def molecule(input_pdb):\n",
527
+ " mol = read_mol(input_pdb) # Read PDB file content\n",
528
+ " \n",
529
+ " html_content = f\"\"\"\n",
530
+ " <!DOCTYPE html>\n",
531
+ " <html>\n",
532
+ " <head> \n",
533
+ " <meta http-equiv=\"content-type\" content=\"text/html; charset=UTF-8\" />\n",
534
+ " <style>\n",
535
+ " .mol-container {{\n",
536
+ " width: 100%;\n",
537
+ " height: 700px;\n",
538
+ " position: relative;\n",
539
+ " }}\n",
540
+ " </style>\n",
541
+ " <script src=\"https://cdnjs.cloudflare.com/ajax/libs/jquery/3.6.3/jquery.min.js\"></script>\n",
542
+ " <script src=\"https://3Dmol.csb.pitt.edu/build/3Dmol-min.js\"></script>\n",
543
+ " </head>\n",
544
+ " <body>\n",
545
+ " <div id=\"container\" class=\"mol-container\"></div>\n",
546
+ " <script>\n",
547
+ " let pdb = `{mol}`; // Use template literal to properly escape PDB content\n",
548
+ " $(document).ready(function () {{\n",
549
+ " let element = $(\"#container\");\n",
550
+ " let config = {{ backgroundColor: \"white\" }};\n",
551
+ " let viewer = $3Dmol.createViewer(element, config);\n",
552
+ " viewer.addModel(pdb, \"pdb\");\n",
553
+ " \n",
554
+ " // Set cartoon representation with white carbon color scheme\n",
555
+ " viewer.getModel(0).setStyle({{}}, {{ cartoon: {{ colorscheme:\"whiteCarbon\" }} }});\n",
556
+ " \n",
557
+ " // Highlight specific histidine residues in red stick representation\n",
558
+ " viewer.getModel(0).setStyle(\n",
559
+ " {{\"resn\": \"HIS\"}}, \n",
560
+ " {{\"stick\": {{\"color\": \"red\"}}}}\n",
561
+ " );\n",
562
+ " \n",
563
+ " viewer.zoomTo();\n",
564
+ " viewer.render();\n",
565
+ " viewer.zoom(0.8, 2000);\n",
566
+ " }});\n",
567
+ " </script>\n",
568
+ " </body>\n",
569
+ " </html>\n",
570
+ " \"\"\"\n",
571
+ " \n",
572
+ " # Return the HTML content within an iframe safely encoded for special characters\n",
573
+ " return f'<iframe width=\"100%\" height=\"700\" srcdoc=\"{html_content.replace(chr(34), \"&quot;\").replace(chr(39), \"&#39;\")}\"></iframe>'\n",
574
+ "\n",
575
+ "# Gradio UI\n",
576
+ "with gr.Blocks() as demo:\n",
577
+ " gr.Markdown(\"# Protein Binding Site Prediction (Random Scores)\")\n",
578
+ " with gr.Row():\n",
579
+ " pdb_input = gr.Textbox(value=\"2IWI\", label=\"PDB ID\", placeholder=\"Enter PDB ID here...\")\n",
580
+ " segment_input = gr.Textbox(value=\"A\", label=\"Chain ID\", placeholder=\"Enter Chain ID here...\")\n",
581
+ " visualize_btn = gr.Button(\"Visualize Structure\")\n",
582
+ " prediction_btn = gr.Button(\"Predict Random Binding Site Scores\")\n",
583
+ " \n",
584
+ " molecule_output = gr.HTML(label=\"Protein Structure\")\n",
585
+ " predictions_output = gr.Textbox(label=\"Binding Site Predictions\")\n",
586
+ " download_output = gr.File(label=\"Download Predictions\")\n",
587
+ " \n",
588
+ " # Update to explicitly use molecule() function for visualization\n",
589
+ " visualize_btn.click(\n",
590
+ " fn=lambda pdb_id: molecule(fetch_pdb(pdb_id)), \n",
591
+ " inputs=[pdb_input], \n",
592
+ " outputs=molecule_output\n",
593
+ " )\n",
594
+ " \n",
595
+ " prediction_btn.click(process_pdb, inputs=[pdb_input, segment_input], outputs=[predictions_output, molecule_output, download_output])\n",
596
+ " \n",
597
+ " gr.Markdown(\"## Examples\")\n",
598
+ " gr.Examples(\n",
599
+ " examples=[\n",
600
+ " [\"2IWI\", \"A\"],\n",
601
+ " [\"7RPZ\", \"B\"],\n",
602
+ " [\"3TJN\", \"C\"]\n",
603
+ " ],\n",
604
+ " inputs=[pdb_input, segment_input],\n",
605
+ " outputs=[predictions_output, molecule_output, download_output]\n",
606
+ " )\n",
607
+ "\n",
608
+ "demo.launch()"
609
+ ]
610
+ },
611
+ {
612
+ "cell_type": "code",
613
+ "execution_count": null,
614
+ "id": "15527a58-c449-4da0-8fab-3baaede15e41",
615
+ "metadata": {},
616
+ "outputs": [],
617
+ "source": []
618
+ },
619
+ {
620
+ "cell_type": "code",
621
+ "execution_count": 2,
622
+ "id": "9ef3e330-cb88-4c29-b84a-2f8652883cfc",
623
  "metadata": {},
624
  "outputs": [
625
  {
626
  "name": "stdout",
627
  "output_type": "stream",
628
  "text": [
629
+ "* Running on local URL: http://127.0.0.1:7860\n",
630
  "\n",
631
  "To create a public link, set `share=True` in `launch()`.\n"
632
  ]
 
634
  {
635
  "data": {
636
  "text/html": [
637
+ "<div><iframe src=\"http://127.0.0.1:7860/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
638
  ],
639
  "text/plain": [
640
  "<IPython.core.display.HTML object>"
 
647
  "data": {
648
  "text/plain": []
649
  },
650
+ "execution_count": 2,
651
  "metadata": {},
652
  "output_type": "execute_result"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
653
  }
654
  ],
655
  "source": [
656
  "import gradio as gr\n",
657
  "import requests\n",
658
  "from Bio.PDB import PDBParser\n",
 
659
  "import numpy as np\n",
660
+ "import os\n",
661
+ "from gradio_molecule3d import Molecule3D\n",
662
+ "\n",
663
+ "def read_mol(pdb_path):\n",
664
+ " \"\"\"Read PDB file and return its content as a string\"\"\"\n",
665
+ " with open(pdb_path, 'r') as f:\n",
666
+ " return f.read()\n",
667
  "\n",
 
668
  "def fetch_pdb(pdb_id):\n",
669
  " pdb_url = f'https://files.rcsb.org/download/{pdb_id}.pdb'\n",
670
  " pdb_path = f'{pdb_id}.pdb'\n",
 
676
  " else:\n",
677
  " return None\n",
678
  "\n",
 
679
  "def process_pdb(pdb_id, segment):\n",
680
  " pdb_path = fetch_pdb(pdb_id)\n",
681
  " if not pdb_path:\n",
682
+ " return \"Failed to fetch PDB file\", None, None\n",
 
683
  " parser = PDBParser(QUIET=True)\n",
684
  " structure = parser.get_structure('protein', pdb_path)\n",
685
+ " \n",
686
  " try:\n",
687
  " chain = structure[0][segment]\n",
688
  " except KeyError:\n",
689
+ " return \"Invalid Chain ID\", None, None\n",
 
690
  " sequence = [residue.get_resname() for residue in chain if residue.id[0] == ' ']\n",
691
  " random_scores = np.random.rand(len(sequence))\n",
 
 
 
 
 
 
 
692
  " result_str = \"\\n\".join(\n",
693
  " f\"{seq} {res.id[1]} {score:.2f}\" \n",
694
  " for seq, res, score in zip(sequence, chain, random_scores)\n",
695
  " )\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
696
  " # Save the predictions to a file\n",
697
  " prediction_file = f\"{pdb_id}_predictions.txt\"\n",
698
  " with open(prediction_file, \"w\") as f:\n",
699
  " f.write(result_str)\n",
700
  " \n",
701
+ " return result_str, molecule(pdb_path, random_scores), prediction_file\n",
702
  "\n",
703
+ "def molecule(input_pdb, scores=None):\n",
704
+ " mol = read_mol(input_pdb) # Read PDB file content\n",
705
+ " \n",
706
+ " # Prepare high-scoring residues script if scores are provided\n",
707
+ " high_score_script = \"\"\n",
708
+ " if scores is not None:\n",
709
+ " high_score_script = \"\"\"\n",
710
+ " // Highlight residues with high scores\n",
711
+ " let highScoreResidues = [{}];\n",
712
+ " viewer.getModel(0).setStyle(\n",
713
+ " {{\"resi\": highScoreResidues}}, \n",
714
+ " {{\"stick\": {{\"color\": \"red\"}}}}\n",
715
+ " );\n",
716
+ " \"\"\".format(\n",
717
+ " \", \".join(str(i+1) for i, score in enumerate(scores) if score > 0.8)\n",
718
+ " )\n",
719
+ " \n",
720
+ " html_content = f\"\"\"\n",
721
+ " <!DOCTYPE html>\n",
722
+ " <html>\n",
723
+ " <head> \n",
724
+ " <meta http-equiv=\"content-type\" content=\"text/html; charset=UTF-8\" />\n",
725
+ " <style>\n",
726
+ " .mol-container {{\n",
727
+ " width: 100%;\n",
728
+ " height: 700px;\n",
729
+ " position: relative;\n",
730
+ " }}\n",
731
+ " </style>\n",
732
+ " <script src=\"https://cdnjs.cloudflare.com/ajax/libs/jquery/3.6.3/jquery.min.js\"></script>\n",
733
+ " <script src=\"https://3Dmol.csb.pitt.edu/build/3Dmol-min.js\"></script>\n",
734
+ " </head>\n",
735
+ " <body>\n",
736
+ " <div id=\"container\" class=\"mol-container\"></div>\n",
737
+ " <script>\n",
738
+ " let pdb = `{mol}`; // Use template literal to properly escape PDB content\n",
739
+ " $(document).ready(function () {{\n",
740
+ " let element = $(\"#container\");\n",
741
+ " let config = {{ backgroundColor: \"white\" }};\n",
742
+ " let viewer = $3Dmol.createViewer(element, config);\n",
743
+ " viewer.addModel(pdb, \"pdb\");\n",
744
+ " \n",
745
+ " // Set cartoon representation with white carbon color scheme\n",
746
+ " viewer.getModel(0).setStyle({{}}, {{ cartoon: {{ colorscheme:\"whiteCarbon\" }} }});\n",
747
+ " \n",
748
+ " {high_score_script}\n",
749
+ " \n",
750
+ " viewer.zoomTo();\n",
751
+ " viewer.render();\n",
752
+ " viewer.zoom(0.8, 2000);\n",
753
+ " }});\n",
754
+ " </script>\n",
755
+ " </body>\n",
756
+ " </html>\n",
757
+ " \"\"\"\n",
758
+ " \n",
759
+ " # Return the HTML content within an iframe safely encoded for special characters\n",
760
+ " return f'<iframe width=\"100%\" height=\"700\" srcdoc=\"{html_content.replace(chr(34), \"&quot;\").replace(chr(39), \"&#39;\")}\"></iframe>'\n",
761
+ "\n",
762
+ "reps = [\n",
763
+ " {\n",
764
+ " \"model\": 0,\n",
765
+ " \"style\": \"cartoon\",\n",
766
+ " \"color\": \"whiteCarbon\",\n",
767
+ " \"residue_range\": \"\",\n",
768
+ " \"around\": 0,\n",
769
+ " \"byres\": False,\n",
770
+ " }\n",
771
+ " ]\n",
772
  "# Gradio UI\n",
773
  "with gr.Blocks() as demo:\n",
774
  " gr.Markdown(\"# Protein Binding Site Prediction (Random Scores)\")\n",
 
775
  " with gr.Row():\n",
776
  " pdb_input = gr.Textbox(value=\"2IWI\", label=\"PDB ID\", placeholder=\"Enter PDB ID here...\")\n",
777
  " segment_input = gr.Textbox(value=\"A\", label=\"Chain ID\", placeholder=\"Enter Chain ID here...\")\n",
778
  " visualize_btn = gr.Button(\"Visualize Structure\")\n",
779
+ " #prediction_btn = gr.Button(\"Predict Random Binding Site Scores\")\n",
780
+ "\n",
781
+ " molecule_output2 = Molecule3D(label=\"Protein Structure\", reps=reps)\n",
782
+ "\n",
783
+ " with gr.Row():\n",
784
+ " pdb_input = gr.Textbox(value=\"2IWI\", label=\"PDB ID\", placeholder=\"Enter PDB ID here...\")\n",
785
+ " segment_input = gr.Textbox(value=\"A\", label=\"Chain ID\", placeholder=\"Enter Chain ID here...\")\n",
786
  " prediction_btn = gr.Button(\"Predict Random Binding Site Scores\")\n",
787
  "\n",
788
+ " molecule_output = gr.HTML(label=\"Protein Structure\")\n",
789
  " predictions_output = gr.Textbox(label=\"Binding Site Predictions\")\n",
790
  " download_output = gr.File(label=\"Download Predictions\")\n",
791
+ " \n",
792
+ " #visualize_btn.click(\n",
793
+ " # fn=lambda pdb_id: molecule(fetch_pdb(pdb_id)), \n",
794
+ " # inputs=[pdb_input], \n",
795
+ " # outputs=molecule_output\n",
796
+ " #)\n",
797
+ " visualize_btn.click(fetch_pdb, inputs=[pdb_input], outputs=molecule_output2)\n",
798
+ " \n",
799
+ " \n",
800
+ " prediction_btn.click(process_pdb, inputs=[pdb_input, segment_input], outputs=[predictions_output, molecule_output, download_output])\n",
801
+ " \n",
802
  " gr.Markdown(\"## Examples\")\n",
803
  " gr.Examples(\n",
804
  " examples=[\n",
 
816
  {
817
  "cell_type": "code",
818
  "execution_count": null,
819
+ "id": "14605615-8610-4d9e-841b-db7618cde844",
820
  "metadata": {},
821
  "outputs": [],
822
  "source": []
.ipynb_checkpoints/test2-checkpoint.ipynb CHANGED
@@ -978,7 +978,7 @@
978
  " examples=[\n",
979
  " [\"7RPZ\", \"A\"],\n",
980
  " [\"2IWI\", \"B\"],\n",
981
- " [\"3TJN\", \"C\"]\n",
982
  " ],\n",
983
  " inputs=[pdb_input, segment_input],\n",
984
  " outputs=[predictions_output, molecule_output, download_output]\n",
@@ -1279,7 +1279,7 @@
1279
  " examples=[\n",
1280
  " [\"7RPZ\", \"A\"],\n",
1281
  " [\"2IWI\", \"B\"],\n",
1282
- " [\"3TJN\", \"C\"]\n",
1283
  " ],\n",
1284
  " inputs=[pdb_input, segment_input],\n",
1285
  " outputs=[predictions_output, molecule_output, download_output]\n",
@@ -1387,6 +1387,10 @@
1387
  " for residue in chain \n",
1388
  " if residue.get_resname().strip() in aa_dict\n",
1389
  " )\n",
 
 
 
 
1390
  " \n",
1391
  " # Prepare input for model prediction\n",
1392
  " input_ids = tokenizer(\" \".join(sequence), return_tensors=\"pt\").input_ids.to(device)\n",
@@ -1397,6 +1401,9 @@
1397
  " scores = expit(outputs[:, 1] - outputs[:, 0])\n",
1398
  " normalized_scores = normalize_scores(scores)\n",
1399
  "\n",
 
 
 
1400
  " result_str = \"\\n\".join([\n",
1401
  " f\"{res.get_resname()} {res.id[1]} {sequence[i]} {normalized_scores[i]:.2f}\" \n",
1402
  " for i, res in enumerate(chain) if res.get_resname().strip() in aa_dict\n",
@@ -1407,14 +1414,18 @@
1407
  " with open(prediction_file, \"w\") as f:\n",
1408
  " f.write(result_str)\n",
1409
  " \n",
1410
- " return result_str, molecule(pdb_path, normalized_scores, segment), prediction_file\n",
1411
  "\n",
1412
- "def molecule(input_pdb, scores=None, segment='A'):\n",
1413
  " mol = read_mol(input_pdb) # Read PDB file content\n",
1414
  " \n",
1415
  " # Prepare high-scoring residues script if scores are provided\n",
1416
  " high_score_script = \"\"\n",
1417
- " if scores is not None:\n",
 
 
 
 
1418
  " high_score_script = \"\"\"\n",
1419
  " // Reset all styles first\n",
1420
  " viewer.getModel(0).setStyle({}, {});\n",
@@ -1432,16 +1443,16 @@
1432
  " {\"stick\": {\"color\": \"red\"}}\n",
1433
  " );\n",
1434
  "\n",
1435
- " // Highlight high-scoring residues only for the selected chain\n",
1436
- " let highScoreResidues2 = [%s];\n",
1437
  " viewer.getModel(0).setStyle(\n",
1438
- " {\"chain\": \"%s\", \"resi\": highScoreResidues2}, \n",
1439
  " {\"stick\": {\"color\": \"orange\"}}\n",
1440
  " );\n",
1441
  " \"\"\" % (segment, \n",
1442
- " \", \".join(str(i+1) for i, score in enumerate(scores) if score > 0.8),\n",
1443
  " segment,\n",
1444
- " \", \".join(str(i+1) for i, score in enumerate(scores) if (score > 0.5) and (score < 0.8)),\n",
1445
  " segment)\n",
1446
  " \n",
1447
  " html_content = f\"\"\"\n",
@@ -1484,7 +1495,7 @@
1484
  " function(atom, viewer, event, container) {{\n",
1485
  " if (!atom.label) {{\n",
1486
  " atom.label = viewer.addLabel(\n",
1487
- " atom.resn + \":\" + atom.atom, \n",
1488
  " {{\n",
1489
  " position: atom, \n",
1490
  " backgroundColor: 'mintcream', \n",
@@ -1528,7 +1539,7 @@
1528
  "\n",
1529
  "# Gradio UI\n",
1530
  "with gr.Blocks() as demo:\n",
1531
- " gr.Markdown(\"# Protein Binding Site Prediction (Random Scores)\")\n",
1532
  " with gr.Row():\n",
1533
  " pdb_input = gr.Textbox(value=\"2IWI\", label=\"PDB ID\", placeholder=\"Enter PDB ID here...\")\n",
1534
  " visualize_btn = gr.Button(\"Visualize Structure\")\n",
@@ -1536,9 +1547,9 @@
1536
  " molecule_output2 = Molecule3D(label=\"Protein Structure\", reps=reps)\n",
1537
  "\n",
1538
  " with gr.Row():\n",
1539
- " pdb_input = gr.Textbox(value=\"2IWI\", label=\"PDB ID\", placeholder=\"Enter PDB ID here...\")\n",
1540
  " segment_input = gr.Textbox(value=\"A\", label=\"Chain ID\", placeholder=\"Enter Chain ID here...\")\n",
1541
- " prediction_btn = gr.Button(\"Predict Random Binding Site Scores\")\n",
1542
  "\n",
1543
  " molecule_output = gr.HTML(label=\"Protein Structure\")\n",
1544
  " predictions_output = gr.Textbox(label=\"Binding Site Predictions\")\n",
@@ -1551,9 +1562,9 @@
1551
  " gr.Markdown(\"## Examples\")\n",
1552
  " gr.Examples(\n",
1553
  " examples=[\n",
1554
- " [\"2IWI\", \"A\"],\n",
1555
- " [\"7RPZ\", \"B\"],\n",
1556
- " [\"3TJN\", \"C\"]\n",
1557
  " ],\n",
1558
  " inputs=[pdb_input, segment_input],\n",
1559
  " outputs=[predictions_output, molecule_output, download_output]\n",
 
978
  " examples=[\n",
979
  " [\"7RPZ\", \"A\"],\n",
980
  " [\"2IWI\", \"B\"],\n",
981
+ " [\"2F6V\", \"A\"]\n",
982
  " ],\n",
983
  " inputs=[pdb_input, segment_input],\n",
984
  " outputs=[predictions_output, molecule_output, download_output]\n",
 
1279
  " examples=[\n",
1280
  " [\"7RPZ\", \"A\"],\n",
1281
  " [\"2IWI\", \"B\"],\n",
1282
+ " [\"2F6V\", \"A\"]\n",
1283
  " ],\n",
1284
  " inputs=[pdb_input, segment_input],\n",
1285
  " outputs=[predictions_output, molecule_output, download_output]\n",
 
1387
  " for residue in chain \n",
1388
  " if residue.get_resname().strip() in aa_dict\n",
1389
  " )\n",
1390
+ " sequence2 = [\n",
1391
+ " (res.id[1], res) for res in chain\n",
1392
+ " if res.get_resname().strip() in aa_dict\n",
1393
+ " ]\n",
1394
  " \n",
1395
  " # Prepare input for model prediction\n",
1396
  " input_ids = tokenizer(\" \".join(sequence), return_tensors=\"pt\").input_ids.to(device)\n",
 
1401
  " scores = expit(outputs[:, 1] - outputs[:, 0])\n",
1402
  " normalized_scores = normalize_scores(scores)\n",
1403
  "\n",
1404
+ " # Zip residues with scores to track the residue ID and score\n",
1405
+ " residue_scores = [(resi, score) for (resi, _), score in zip(sequence2, normalized_scores)]\n",
1406
+ " \n",
1407
  " result_str = \"\\n\".join([\n",
1408
  " f\"{res.get_resname()} {res.id[1]} {sequence[i]} {normalized_scores[i]:.2f}\" \n",
1409
  " for i, res in enumerate(chain) if res.get_resname().strip() in aa_dict\n",
 
1414
  " with open(prediction_file, \"w\") as f:\n",
1415
  " f.write(result_str)\n",
1416
  " \n",
1417
+ " return result_str, molecule(pdb_path, residue_scores, segment), prediction_file\n",
1418
  "\n",
1419
+ "def molecule(input_pdb, residue_scores=None, segment='A'):\n",
1420
  " mol = read_mol(input_pdb) # Read PDB file content\n",
1421
  " \n",
1422
  " # Prepare high-scoring residues script if scores are provided\n",
1423
  " high_score_script = \"\"\n",
1424
+ " if residue_scores is not None:\n",
1425
+ " # Sort residues based on their scores\n",
1426
+ " high_score_residues = [resi for resi, score in residue_scores if score > 0.75]\n",
1427
+ " mid_score_residues = [resi for resi, score in residue_scores if 0.5 < score <= 0.75]\n",
1428
+ " \n",
1429
  " high_score_script = \"\"\"\n",
1430
  " // Reset all styles first\n",
1431
  " viewer.getModel(0).setStyle({}, {});\n",
 
1443
  " {\"stick\": {\"color\": \"red\"}}\n",
1444
  " );\n",
1445
  "\n",
1446
+ " // Highlight medium-scoring residues only for the selected chain\n",
1447
+ " let midScoreResidues = [%s];\n",
1448
  " viewer.getModel(0).setStyle(\n",
1449
+ " {\"chain\": \"%s\", \"resi\": midScoreResidues}, \n",
1450
  " {\"stick\": {\"color\": \"orange\"}}\n",
1451
  " );\n",
1452
  " \"\"\" % (segment, \n",
1453
+ " \", \".join(str(resi) for resi in high_score_residues),\n",
1454
  " segment,\n",
1455
+ " \", \".join(str(resi) for resi in mid_score_residues),\n",
1456
  " segment)\n",
1457
  " \n",
1458
  " html_content = f\"\"\"\n",
 
1495
  " function(atom, viewer, event, container) {{\n",
1496
  " if (!atom.label) {{\n",
1497
  " atom.label = viewer.addLabel(\n",
1498
+ " atom.resn + \":\" +atom.resi + \":\" + atom.atom, \n",
1499
  " {{\n",
1500
  " position: atom, \n",
1501
  " backgroundColor: 'mintcream', \n",
 
1539
  "\n",
1540
  "# Gradio UI\n",
1541
  "with gr.Blocks() as demo:\n",
1542
+ " gr.Markdown(\"# Protein Binding Site Prediction\")\n",
1543
  " with gr.Row():\n",
1544
  " pdb_input = gr.Textbox(value=\"2IWI\", label=\"PDB ID\", placeholder=\"Enter PDB ID here...\")\n",
1545
  " visualize_btn = gr.Button(\"Visualize Structure\")\n",
 
1547
  " molecule_output2 = Molecule3D(label=\"Protein Structure\", reps=reps)\n",
1548
  "\n",
1549
  " with gr.Row():\n",
1550
+ " #pdb_input = gr.Textbox(value=\"2IWI\", label=\"PDB ID\", placeholder=\"Enter PDB ID here...\")\n",
1551
  " segment_input = gr.Textbox(value=\"A\", label=\"Chain ID\", placeholder=\"Enter Chain ID here...\")\n",
1552
+ " prediction_btn = gr.Button(\"Predict Binding Site\")\n",
1553
  "\n",
1554
  " molecule_output = gr.HTML(label=\"Protein Structure\")\n",
1555
  " predictions_output = gr.Textbox(label=\"Binding Site Predictions\")\n",
 
1562
  " gr.Markdown(\"## Examples\")\n",
1563
  " gr.Examples(\n",
1564
  " examples=[\n",
1565
+ " [\"7RPZ\", \"A\"],\n",
1566
+ " [\"2IWI\", \"B\"],\n",
1567
+ " [\"2F6V\", \"A\"]\n",
1568
  " ],\n",
1569
  " inputs=[pdb_input, segment_input],\n",
1570
  " outputs=[predictions_output, molecule_output, download_output]\n",
app.py CHANGED
@@ -234,7 +234,7 @@ reps = [
234
 
235
  # Gradio UI
236
  with gr.Blocks() as demo:
237
- gr.Markdown("# Protein Binding Site Prediction (Random Scores)")
238
  with gr.Row():
239
  pdb_input = gr.Textbox(value="2IWI", label="PDB ID", placeholder="Enter PDB ID here...")
240
  visualize_btn = gr.Button("Visualize Structure")
@@ -244,7 +244,7 @@ with gr.Blocks() as demo:
244
  with gr.Row():
245
  #pdb_input = gr.Textbox(value="2IWI", label="PDB ID", placeholder="Enter PDB ID here...")
246
  segment_input = gr.Textbox(value="A", label="Chain ID", placeholder="Enter Chain ID here...")
247
- prediction_btn = gr.Button("Predict Random Binding Site Scores")
248
 
249
  molecule_output = gr.HTML(label="Protein Structure")
250
  predictions_output = gr.Textbox(label="Binding Site Predictions")
 
234
 
235
  # Gradio UI
236
  with gr.Blocks() as demo:
237
+ gr.Markdown("# Protein Binding Site Prediction")
238
  with gr.Row():
239
  pdb_input = gr.Textbox(value="2IWI", label="PDB ID", placeholder="Enter PDB ID here...")
240
  visualize_btn = gr.Button("Visualize Structure")
 
244
  with gr.Row():
245
  #pdb_input = gr.Textbox(value="2IWI", label="PDB ID", placeholder="Enter PDB ID here...")
246
  segment_input = gr.Textbox(value="A", label="Chain ID", placeholder="Enter Chain ID here...")
247
+ prediction_btn = gr.Button("Predict Binding Site")
248
 
249
  molecule_output = gr.HTML(label="Protein Structure")
250
  predictions_output = gr.Textbox(label="Binding Site Predictions")
test2.ipynb CHANGED
@@ -978,7 +978,7 @@
978
  " examples=[\n",
979
  " [\"7RPZ\", \"A\"],\n",
980
  " [\"2IWI\", \"B\"],\n",
981
- " [\"3TJN\", \"C\"]\n",
982
  " ],\n",
983
  " inputs=[pdb_input, segment_input],\n",
984
  " outputs=[predictions_output, molecule_output, download_output]\n",
@@ -1279,7 +1279,7 @@
1279
  " examples=[\n",
1280
  " [\"7RPZ\", \"A\"],\n",
1281
  " [\"2IWI\", \"B\"],\n",
1282
- " [\"3TJN\", \"C\"]\n",
1283
  " ],\n",
1284
  " inputs=[pdb_input, segment_input],\n",
1285
  " outputs=[predictions_output, molecule_output, download_output]\n",
@@ -1387,6 +1387,10 @@
1387
  " for residue in chain \n",
1388
  " if residue.get_resname().strip() in aa_dict\n",
1389
  " )\n",
 
 
 
 
1390
  " \n",
1391
  " # Prepare input for model prediction\n",
1392
  " input_ids = tokenizer(\" \".join(sequence), return_tensors=\"pt\").input_ids.to(device)\n",
@@ -1397,6 +1401,9 @@
1397
  " scores = expit(outputs[:, 1] - outputs[:, 0])\n",
1398
  " normalized_scores = normalize_scores(scores)\n",
1399
  "\n",
 
 
 
1400
  " result_str = \"\\n\".join([\n",
1401
  " f\"{res.get_resname()} {res.id[1]} {sequence[i]} {normalized_scores[i]:.2f}\" \n",
1402
  " for i, res in enumerate(chain) if res.get_resname().strip() in aa_dict\n",
@@ -1407,14 +1414,18 @@
1407
  " with open(prediction_file, \"w\") as f:\n",
1408
  " f.write(result_str)\n",
1409
  " \n",
1410
- " return result_str, molecule(pdb_path, normalized_scores, segment), prediction_file\n",
1411
  "\n",
1412
- "def molecule(input_pdb, scores=None, segment='A'):\n",
1413
  " mol = read_mol(input_pdb) # Read PDB file content\n",
1414
  " \n",
1415
  " # Prepare high-scoring residues script if scores are provided\n",
1416
  " high_score_script = \"\"\n",
1417
- " if scores is not None:\n",
 
 
 
 
1418
  " high_score_script = \"\"\"\n",
1419
  " // Reset all styles first\n",
1420
  " viewer.getModel(0).setStyle({}, {});\n",
@@ -1432,16 +1443,16 @@
1432
  " {\"stick\": {\"color\": \"red\"}}\n",
1433
  " );\n",
1434
  "\n",
1435
- " // Highlight high-scoring residues only for the selected chain\n",
1436
- " let highScoreResidues2 = [%s];\n",
1437
  " viewer.getModel(0).setStyle(\n",
1438
- " {\"chain\": \"%s\", \"resi\": highScoreResidues2}, \n",
1439
  " {\"stick\": {\"color\": \"orange\"}}\n",
1440
  " );\n",
1441
  " \"\"\" % (segment, \n",
1442
- " \", \".join(str(i+1) for i, score in enumerate(scores) if score > 0.8),\n",
1443
  " segment,\n",
1444
- " \", \".join(str(i+1) for i, score in enumerate(scores) if (score > 0.5) and (score < 0.8)),\n",
1445
  " segment)\n",
1446
  " \n",
1447
  " html_content = f\"\"\"\n",
@@ -1484,7 +1495,7 @@
1484
  " function(atom, viewer, event, container) {{\n",
1485
  " if (!atom.label) {{\n",
1486
  " atom.label = viewer.addLabel(\n",
1487
- " atom.resn + \":\" + atom.atom, \n",
1488
  " {{\n",
1489
  " position: atom, \n",
1490
  " backgroundColor: 'mintcream', \n",
@@ -1528,7 +1539,7 @@
1528
  "\n",
1529
  "# Gradio UI\n",
1530
  "with gr.Blocks() as demo:\n",
1531
- " gr.Markdown(\"# Protein Binding Site Prediction (Random Scores)\")\n",
1532
  " with gr.Row():\n",
1533
  " pdb_input = gr.Textbox(value=\"2IWI\", label=\"PDB ID\", placeholder=\"Enter PDB ID here...\")\n",
1534
  " visualize_btn = gr.Button(\"Visualize Structure\")\n",
@@ -1536,9 +1547,9 @@
1536
  " molecule_output2 = Molecule3D(label=\"Protein Structure\", reps=reps)\n",
1537
  "\n",
1538
  " with gr.Row():\n",
1539
- " pdb_input = gr.Textbox(value=\"2IWI\", label=\"PDB ID\", placeholder=\"Enter PDB ID here...\")\n",
1540
  " segment_input = gr.Textbox(value=\"A\", label=\"Chain ID\", placeholder=\"Enter Chain ID here...\")\n",
1541
- " prediction_btn = gr.Button(\"Predict Random Binding Site Scores\")\n",
1542
  "\n",
1543
  " molecule_output = gr.HTML(label=\"Protein Structure\")\n",
1544
  " predictions_output = gr.Textbox(label=\"Binding Site Predictions\")\n",
@@ -1551,9 +1562,9 @@
1551
  " gr.Markdown(\"## Examples\")\n",
1552
  " gr.Examples(\n",
1553
  " examples=[\n",
1554
- " [\"2IWI\", \"A\"],\n",
1555
- " [\"7RPZ\", \"B\"],\n",
1556
- " [\"3TJN\", \"C\"]\n",
1557
  " ],\n",
1558
  " inputs=[pdb_input, segment_input],\n",
1559
  " outputs=[predictions_output, molecule_output, download_output]\n",
 
978
  " examples=[\n",
979
  " [\"7RPZ\", \"A\"],\n",
980
  " [\"2IWI\", \"B\"],\n",
981
+ " [\"2F6V\", \"A\"]\n",
982
  " ],\n",
983
  " inputs=[pdb_input, segment_input],\n",
984
  " outputs=[predictions_output, molecule_output, download_output]\n",
 
1279
  " examples=[\n",
1280
  " [\"7RPZ\", \"A\"],\n",
1281
  " [\"2IWI\", \"B\"],\n",
1282
+ " [\"2F6V\", \"A\"]\n",
1283
  " ],\n",
1284
  " inputs=[pdb_input, segment_input],\n",
1285
  " outputs=[predictions_output, molecule_output, download_output]\n",
 
1387
  " for residue in chain \n",
1388
  " if residue.get_resname().strip() in aa_dict\n",
1389
  " )\n",
1390
+ " sequence2 = [\n",
1391
+ " (res.id[1], res) for res in chain\n",
1392
+ " if res.get_resname().strip() in aa_dict\n",
1393
+ " ]\n",
1394
  " \n",
1395
  " # Prepare input for model prediction\n",
1396
  " input_ids = tokenizer(\" \".join(sequence), return_tensors=\"pt\").input_ids.to(device)\n",
 
1401
  " scores = expit(outputs[:, 1] - outputs[:, 0])\n",
1402
  " normalized_scores = normalize_scores(scores)\n",
1403
  "\n",
1404
+ " # Zip residues with scores to track the residue ID and score\n",
1405
+ " residue_scores = [(resi, score) for (resi, _), score in zip(sequence2, normalized_scores)]\n",
1406
+ " \n",
1407
  " result_str = \"\\n\".join([\n",
1408
  " f\"{res.get_resname()} {res.id[1]} {sequence[i]} {normalized_scores[i]:.2f}\" \n",
1409
  " for i, res in enumerate(chain) if res.get_resname().strip() in aa_dict\n",
 
1414
  " with open(prediction_file, \"w\") as f:\n",
1415
  " f.write(result_str)\n",
1416
  " \n",
1417
+ " return result_str, molecule(pdb_path, residue_scores, segment), prediction_file\n",
1418
  "\n",
1419
+ "def molecule(input_pdb, residue_scores=None, segment='A'):\n",
1420
  " mol = read_mol(input_pdb) # Read PDB file content\n",
1421
  " \n",
1422
  " # Prepare high-scoring residues script if scores are provided\n",
1423
  " high_score_script = \"\"\n",
1424
+ " if residue_scores is not None:\n",
1425
+ " # Sort residues based on their scores\n",
1426
+ " high_score_residues = [resi for resi, score in residue_scores if score > 0.75]\n",
1427
+ " mid_score_residues = [resi for resi, score in residue_scores if 0.5 < score <= 0.75]\n",
1428
+ " \n",
1429
  " high_score_script = \"\"\"\n",
1430
  " // Reset all styles first\n",
1431
  " viewer.getModel(0).setStyle({}, {});\n",
 
1443
  " {\"stick\": {\"color\": \"red\"}}\n",
1444
  " );\n",
1445
  "\n",
1446
+ " // Highlight medium-scoring residues only for the selected chain\n",
1447
+ " let midScoreResidues = [%s];\n",
1448
  " viewer.getModel(0).setStyle(\n",
1449
+ " {\"chain\": \"%s\", \"resi\": midScoreResidues}, \n",
1450
  " {\"stick\": {\"color\": \"orange\"}}\n",
1451
  " );\n",
1452
  " \"\"\" % (segment, \n",
1453
+ " \", \".join(str(resi) for resi in high_score_residues),\n",
1454
  " segment,\n",
1455
+ " \", \".join(str(resi) for resi in mid_score_residues),\n",
1456
  " segment)\n",
1457
  " \n",
1458
  " html_content = f\"\"\"\n",
 
1495
  " function(atom, viewer, event, container) {{\n",
1496
  " if (!atom.label) {{\n",
1497
  " atom.label = viewer.addLabel(\n",
1498
+ " atom.resn + \":\" +atom.resi + \":\" + atom.atom, \n",
1499
  " {{\n",
1500
  " position: atom, \n",
1501
  " backgroundColor: 'mintcream', \n",
 
1539
  "\n",
1540
  "# Gradio UI\n",
1541
  "with gr.Blocks() as demo:\n",
1542
+ " gr.Markdown(\"# Protein Binding Site Prediction\")\n",
1543
  " with gr.Row():\n",
1544
  " pdb_input = gr.Textbox(value=\"2IWI\", label=\"PDB ID\", placeholder=\"Enter PDB ID here...\")\n",
1545
  " visualize_btn = gr.Button(\"Visualize Structure\")\n",
 
1547
  " molecule_output2 = Molecule3D(label=\"Protein Structure\", reps=reps)\n",
1548
  "\n",
1549
  " with gr.Row():\n",
1550
+ " #pdb_input = gr.Textbox(value=\"2IWI\", label=\"PDB ID\", placeholder=\"Enter PDB ID here...\")\n",
1551
  " segment_input = gr.Textbox(value=\"A\", label=\"Chain ID\", placeholder=\"Enter Chain ID here...\")\n",
1552
+ " prediction_btn = gr.Button(\"Predict Binding Site\")\n",
1553
  "\n",
1554
  " molecule_output = gr.HTML(label=\"Protein Structure\")\n",
1555
  " predictions_output = gr.Textbox(label=\"Binding Site Predictions\")\n",
 
1562
  " gr.Markdown(\"## Examples\")\n",
1563
  " gr.Examples(\n",
1564
  " examples=[\n",
1565
+ " [\"7RPZ\", \"A\"],\n",
1566
+ " [\"2IWI\", \"B\"],\n",
1567
+ " [\"2F6V\", \"A\"]\n",
1568
  " ],\n",
1569
  " inputs=[pdb_input, segment_input],\n",
1570
  " outputs=[predictions_output, molecule_output, download_output]\n",