ThorbenFroehlking commited on
Commit
09b66ec
·
1 Parent(s): e834a48
.DS_Store ADDED
Binary file (6.15 kB). View file
 
.ipynb_checkpoints/2IWI-checkpoint.pdb DELETED
The diff for this file is too large to render. See raw diff
 
.ipynb_checkpoints/app-checkpoint.py CHANGED
@@ -29,6 +29,22 @@ from datasets import Dataset
29
 
30
  from scipy.special import expit
31
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
 
34
  # Load model and move to device
@@ -39,6 +55,24 @@ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
39
  model.to(device)
40
  model.eval()
41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  def normalize_scores(scores):
43
  min_score = np.min(scores)
44
  max_score = np.max(scores)
@@ -101,36 +135,44 @@ def fetch_pdb(pdb_id):
101
  pdb_path = convert_cif_to_pdb(pdb_path)
102
  return pdb_path
103
 
104
- def create_chain_specific_pdb(input_pdb: str, chain_id: str, residue_scores: list) -> str:
105
  """
106
- Create a PDB file with only the specified chain and replace B-factor with prediction scores
107
  """
108
  # Read the original PDB file
109
  parser = PDBParser(QUIET=True)
110
  structure = parser.get_structure('protein', input_pdb)
111
 
112
- # Prepare a new structure with only the specified chain
113
- new_structure = structure.copy()
114
- for model in new_structure:
115
- # Remove all chains except the specified one
116
- chains_to_remove = [chain for chain in model if chain.id != chain_id]
117
- for chain in chains_to_remove:
118
- model.detach_child(chain.id)
119
 
120
- # Create a modified PDB with scores in B-factor
121
  scores_dict = {resi: score for resi, score in residue_scores}
122
- for model in new_structure:
123
- for chain in model:
124
- for residue in chain:
125
- if residue.id[1] in scores_dict:
126
- for atom in residue:
127
- atom.bfactor = scores_dict[residue.id[1]] #* 100 # Scale score to B-factor range
128
-
129
- # Save the modified structure
130
- output_pdb = f"{os.path.splitext(input_pdb)[0]}_{chain_id}_scored.pdb"
 
 
 
 
 
 
 
 
 
 
 
131
  io = PDBIO()
132
- io.set_structure(new_structure)
133
- io.save(output_pdb)
 
 
134
 
135
  return output_pdb
136
 
@@ -158,8 +200,6 @@ def calculate_geometric_center(pdb_path: str, high_score_residues: list, chain_i
158
  return center
159
  return None
160
 
161
-
162
-
163
  def process_pdb(pdb_id_or_file, segment):
164
  # Determine if input is a PDB ID or file path
165
  if pdb_id_or_file.endswith('.pdb'):
@@ -192,67 +232,75 @@ def process_pdb(pdb_id_or_file, segment):
192
  sequence = "".join(seq1(res.resname) for res in protein_residues)
193
  sequence_id = [res.id[1] for res in protein_residues]
194
 
195
- # Prepare input for model prediction
196
- input_ids = tokenizer(" ".join(sequence), return_tensors="pt").input_ids.to(device)
197
- with torch.no_grad():
198
- outputs = model(input_ids).logits.detach().cpu().numpy().squeeze()
199
-
200
- # Calculate scores and normalize them
201
- scores = expit(outputs[:, 1] - outputs[:, 0])
202
  normalized_scores = normalize_scores(scores)
203
 
204
  # Zip residues with scores to track the residue ID and score
205
  residue_scores = [(resi, score) for resi, score in zip(sequence_id, normalized_scores)]
206
 
207
- # Identify high and mid scoring residues
208
- high_score_residues = [resi for resi, score in residue_scores if score > 0.75]
209
- mid_score_residues = [resi for resi, score in residue_scores if 0.5 < score <= 0.75]
210
-
211
- # Calculate geometric center of high-scoring residues
212
- geo_center = calculate_geometric_center(pdb_path, high_score_residues, segment)
213
- pymol_selection = f"select high_score_residues, resi {'+'.join(map(str, high_score_residues))} and chain {segment}"
214
- pymol_center_cmd = f"show spheres, resi {'+'.join(map(str, high_score_residues))} and chain {segment}" if geo_center is not None else ""
215
-
216
- # Generate the result string
217
  current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
218
  result_str = f"Prediction for PDB: {pdb_id}, Chain: {segment}\nDate: {current_time}\n\n"
 
219
  result_str += "Columns: Residue Name, Residue Number, One-letter Code, Normalized Score\n\n"
220
  result_str += "\n".join([
221
  f"{res.resname} {res.id[1]} {sequence[i]} {normalized_scores[i]:.2f}"
222
- for i, res in enumerate(protein_residues)])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
223
 
 
 
 
 
 
 
 
 
 
 
 
224
  # Create prediction and scored PDB files
225
- prediction_file = f"{pdb_id}_predictions.txt"
226
  with open(prediction_file, "w") as f:
227
  f.write(result_str)
228
-
229
- # Create chain-specific PDB with scores in B-factor
230
- scored_pdb = create_chain_specific_pdb(pdb_path, segment, residue_scores)
231
-
232
- # Molecule visualization with updated script
233
- mol_vis = molecule(pdb_path, residue_scores, segment)
234
-
235
- # Construct PyMOL command suggestions
236
- pymol_commands = f"""
237
- PyMOL Visualization Commands:
238
- 1. Load PDB: load {os.path.abspath(pdb_path)}
239
- 2. Select high-scoring residues: {pymol_selection}
240
- 3. Highlight high-scoring residues: show sticks, high_score_residues
241
- {pymol_center_cmd}
242
- """
243
 
244
- return result_str + "\n\n" + pymol_commands, mol_vis, [prediction_file, scored_pdb]
245
-
246
 
247
  def molecule(input_pdb, residue_scores=None, segment='A'):
 
248
  mol = read_mol(input_pdb) # Read PDB file content
249
 
250
  # Prepare high-scoring residues script if scores are provided
251
  high_score_script = ""
252
  if residue_scores is not None:
253
  # Filter residues based on their scores
254
- high_score_residues = [resi for resi, score in residue_scores if score > 0.75]
255
- mid_score_residues = [resi for resi, score in residue_scores if 0.5 < score <= 0.75]
 
 
 
256
 
257
  high_score_script = """
258
  // Load the original model and apply white cartoon style
@@ -264,26 +312,57 @@ def molecule(input_pdb, residue_scores=None, segment='A'):
264
  );
265
 
266
  // Create a new model for high-scoring residues and apply red sticks style
267
- let highScoreModel = viewer.addModel(pdb, "pdb");
268
- highScoreModel.setStyle({}, {});
269
- highScoreModel.setStyle(
270
  {"chain": "%s", "resi": [%s]},
271
- {"stick": {"color": "red"}}
272
  );
273
 
274
- // Create a new model for medium-scoring residues and apply orange sticks style
275
- let midScoreModel = viewer.addModel(pdb, "pdb");
276
- midScoreModel.setStyle({}, {});
277
- midScoreModel.setStyle(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
278
  {"chain": "%s", "resi": [%s]},
279
  {"stick": {"color": "orange"}}
280
  );
 
 
 
 
 
 
 
 
 
281
  """ % (
282
  segment,
283
  segment,
284
- ", ".join(str(resi) for resi in high_score_residues),
285
  segment,
286
- ", ".join(str(resi) for resi in mid_score_residues)
 
 
 
 
 
 
287
  )
288
 
289
  # Generate the full HTML content
@@ -351,14 +430,22 @@ def molecule(input_pdb, residue_scores=None, segment='A'):
351
  # Return the HTML content within an iframe safely encoded for special characters
352
  return f'<iframe width="100%" height="700" srcdoc="{html_content.replace(chr(34), "&quot;").replace(chr(39), "&#39;")}"></iframe>'
353
 
354
-
355
  # Gradio UI
356
  with gr.Blocks() as demo:
357
  gr.Markdown("# Protein Binding Site Prediction")
358
 
359
- with gr.Row():
360
- pdb_input = gr.Textbox(value="4BDU", label="PDB ID", placeholder="Enter PDB ID here...")
361
- visualize_btn = gr.Button("Visualize Structure")
 
 
 
 
 
 
 
 
 
362
 
363
  molecule_output2 = Molecule3D(label="Protein Structure", reps=[
364
  {
@@ -375,23 +462,70 @@ with gr.Blocks() as demo:
375
  segment_input = gr.Textbox(value="A", label="Chain ID", placeholder="Enter Chain ID here...")
376
  prediction_btn = gr.Button("Predict Binding Site")
377
 
378
-
379
  molecule_output = gr.HTML(label="Protein Structure")
380
- predictions_output = gr.Textbox(label="Binding Site Predictions")
 
 
 
 
 
 
 
 
 
381
  download_output = gr.File(label="Download Files", file_count="multiple")
382
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
383
  prediction_btn.click(
384
- process_pdb,
385
- inputs=[
386
- pdb_input,
387
- segment_input
388
- ],
389
  outputs=[predictions_output, molecule_output, download_output]
390
  )
391
 
392
  visualize_btn.click(
393
- fetch_pdb,
394
- inputs=[pdb_input],
395
  outputs=molecule_output2
396
  )
397
 
 
29
 
30
  from scipy.special import expit
31
 
32
+ from datetime import datetime
33
+ import gradio as gr
34
+ import requests
35
+ from Bio.PDB import PDBParser, MMCIFParser, PDBIO
36
+ from Bio.PDB.Polypeptide import is_aa
37
+ from Bio.SeqUtils import seq1
38
+ from typing import Optional, Tuple
39
+ import numpy as np
40
+ import os
41
+ from gradio_molecule3d import Molecule3D
42
+
43
+ import re
44
+ import pandas as pd
45
+ import copy
46
+
47
+ from scipy.special import expit
48
 
49
 
50
  # Load model and move to device
 
55
  model.to(device)
56
  model.eval()
57
 
58
+ from datetime import datetime
59
+ import gradio as gr
60
+ import requests
61
+ from Bio.PDB import PDBParser, MMCIFParser, PDBIO
62
+ from Bio.PDB.Polypeptide import is_aa
63
+ from Bio.SeqUtils import seq1
64
+ from Bio.PDB import Select
65
+ from typing import Optional, Tuple
66
+ import numpy as np
67
+ import os
68
+ from gradio_molecule3d import Molecule3D
69
+
70
+ import re
71
+ import pandas as pd
72
+ import copy
73
+
74
+ from scipy.special import expit
75
+
76
  def normalize_scores(scores):
77
  min_score = np.min(scores)
78
  max_score = np.max(scores)
 
135
  pdb_path = convert_cif_to_pdb(pdb_path)
136
  return pdb_path
137
 
138
+ def create_chain_specific_pdb(input_pdb: str, chain_id: str, residue_scores: list, protein_residues: list) -> str:
139
  """
140
+ Create a PDB file with only the selected chain and residues, replacing B-factor with prediction scores
141
  """
142
  # Read the original PDB file
143
  parser = PDBParser(QUIET=True)
144
  structure = parser.get_structure('protein', input_pdb)
145
 
146
+ # Prepare a new structure with only the specified chain and selected residues
147
+ output_pdb = f"{os.path.splitext(input_pdb)[0]}_{chain_id}_predictions_scores.pdb"
 
 
 
 
 
148
 
149
+ # Create scores dictionary for easy lookup
150
  scores_dict = {resi: score for resi, score in residue_scores}
151
+
152
+ # Create a custom Select class
153
+ class ResidueSelector(Select):
154
+ def __init__(self, chain_id, selected_residues, scores_dict):
155
+ self.chain_id = chain_id
156
+ self.selected_residues = selected_residues
157
+ self.scores_dict = scores_dict
158
+
159
+ def accept_chain(self, chain):
160
+ return chain.id == self.chain_id
161
+
162
+ def accept_residue(self, residue):
163
+ return residue.id[1] in self.selected_residues
164
+
165
+ def accept_atom(self, atom):
166
+ if atom.parent.id[1] in self.scores_dict:
167
+ atom.bfactor = self.scores_dict[atom.parent.id[1]] * 100
168
+ return True
169
+
170
+ # Prepare output PDB with selected chain and residues, modified B-factors
171
  io = PDBIO()
172
+ selector = ResidueSelector(chain_id, [res.id[1] for res in protein_residues], scores_dict)
173
+
174
+ io.set_structure(structure[0])
175
+ io.save(output_pdb, selector)
176
 
177
  return output_pdb
178
 
 
200
  return center
201
  return None
202
 
 
 
203
  def process_pdb(pdb_id_or_file, segment):
204
  # Determine if input is a PDB ID or file path
205
  if pdb_id_or_file.endswith('.pdb'):
 
232
  sequence = "".join(seq1(res.resname) for res in protein_residues)
233
  sequence_id = [res.id[1] for res in protein_residues]
234
 
235
+ scores = np.random.rand(len(sequence))
 
 
 
 
 
 
236
  normalized_scores = normalize_scores(scores)
237
 
238
  # Zip residues with scores to track the residue ID and score
239
  residue_scores = [(resi, score) for resi, score in zip(sequence_id, normalized_scores)]
240
 
241
+
242
+ # Identify high scoring residues (> 0.5)
243
+ high_score_residues = [resi for resi, score in residue_scores if score > 0.5]
244
+
245
+ # Preparing the result: only print high scoring residues
 
 
 
 
 
246
  current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
247
  result_str = f"Prediction for PDB: {pdb_id}, Chain: {segment}\nDate: {current_time}\n\n"
248
+ result_str += "High-scoring Residues (Score > 0.5):\n"
249
  result_str += "Columns: Residue Name, Residue Number, One-letter Code, Normalized Score\n\n"
250
  result_str += "\n".join([
251
  f"{res.resname} {res.id[1]} {sequence[i]} {normalized_scores[i]:.2f}"
252
+ for i, res in enumerate(protein_residues) if res.id[1] in high_score_residues
253
+ ])
254
+
255
+ # Create chain-specific PDB with scores in B-factor
256
+ scored_pdb = create_chain_specific_pdb(pdb_path, segment, residue_scores, protein_residues)
257
+
258
+ # Molecule visualization with updated script with color mapping
259
+ mol_vis = molecule(pdb_path, residue_scores, segment)#, color_map)
260
+
261
+ # Improved PyMOL command suggestions
262
+ current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
263
+ pymol_commands = f"Prediction for PDB: {pdb_id}, Chain: {segment}\nDate: {current_time}\n\n"
264
+
265
+ pymol_commands += f"""
266
+ # PyMOL Visualization Commands
267
+ load {os.path.abspath(pdb_path)}, protein
268
+ hide everything, all
269
+ show cartoon, chain {segment}
270
+ color white, chain {segment}
271
+ """
272
 
273
+ # Color specific residues
274
+ for score_range, color in [
275
+ (high_score_residues, "red")
276
+ ]:
277
+ if score_range:
278
+ resi_list = '+'.join(map(str, score_range))
279
+ pymol_commands += f"""
280
+ select high_score_residues, resi {resi_list} and chain {segment}
281
+ show sticks, high_score_residues
282
+ color {color}, high_score_residues
283
+ """
284
  # Create prediction and scored PDB files
285
+ prediction_file = f"{pdb_id}_binding_site_residues.txt"
286
  with open(prediction_file, "w") as f:
287
  f.write(result_str)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
288
 
289
+ return pymol_commands, mol_vis, [prediction_file,scored_pdb]
 
290
 
291
  def molecule(input_pdb, residue_scores=None, segment='A'):
292
+ # More granular scoring for visualization
293
  mol = read_mol(input_pdb) # Read PDB file content
294
 
295
  # Prepare high-scoring residues script if scores are provided
296
  high_score_script = ""
297
  if residue_scores is not None:
298
  # Filter residues based on their scores
299
+ class1_score_residues = [resi for resi, score in residue_scores if 0.5 < score <= 0.6]
300
+ class2_score_residues = [resi for resi, score in residue_scores if 0.6 < score <= 0.7]
301
+ class3_score_residues = [resi for resi, score in residue_scores if 0.7 < score <= 0.8]
302
+ class4_score_residues = [resi for resi, score in residue_scores if 0.8 < score <= 0.9]
303
+ class5_score_residues = [resi for resi, score in residue_scores if 0.9 < score <= 1.0]
304
 
305
  high_score_script = """
306
  // Load the original model and apply white cartoon style
 
312
  );
313
 
314
  // Create a new model for high-scoring residues and apply red sticks style
315
+ let class1Model = viewer.addModel(pdb, "pdb");
316
+ class1Model.setStyle({}, {});
317
+ class1Model.setStyle(
318
  {"chain": "%s", "resi": [%s]},
319
+ {"stick": {"color": "blue"}}
320
  );
321
 
322
+ // Create a new model for high-scoring residues and apply red sticks style
323
+ let class2Model = viewer.addModel(pdb, "pdb");
324
+ class2Model.setStyle({}, {});
325
+ class2Model.setStyle(
326
+ {"chain": "%s", "resi": [%s]},
327
+ {"stick": {"color": "lightblue"}}
328
+ );
329
+
330
+ // Create a new model for high-scoring residues and apply red sticks style
331
+ let class3Model = viewer.addModel(pdb, "pdb");
332
+ class3Model.setStyle({}, {});
333
+ class3Model.setStyle(
334
+ {"chain": "%s", "resi": [%s]},
335
+ {"stick": {"color": "white"}}
336
+ );
337
+
338
+ // Create a new model for high-scoring residues and apply red sticks style
339
+ let class4Model = viewer.addModel(pdb, "pdb");
340
+ class4Model.setStyle({}, {});
341
+ class4Model.setStyle(
342
  {"chain": "%s", "resi": [%s]},
343
  {"stick": {"color": "orange"}}
344
  );
345
+
346
+ // Create a new model for high-scoring residues and apply red sticks style
347
+ let class5Model = viewer.addModel(pdb, "pdb");
348
+ class5Model.setStyle({}, {});
349
+ class5Model.setStyle(
350
+ {"chain": "%s", "resi": [%s]},
351
+ {"stick": {"color": "red"}}
352
+ );
353
+
354
  """ % (
355
  segment,
356
  segment,
357
+ ", ".join(str(resi) for resi in class1_score_residues),
358
  segment,
359
+ ", ".join(str(resi) for resi in class2_score_residues),
360
+ segment,
361
+ ", ".join(str(resi) for resi in class3_score_residues),
362
+ segment,
363
+ ", ".join(str(resi) for resi in class4_score_residues),
364
+ segment,
365
+ ", ".join(str(resi) for resi in class5_score_residues)
366
  )
367
 
368
  # Generate the full HTML content
 
430
  # Return the HTML content within an iframe safely encoded for special characters
431
  return f'<iframe width="100%" height="700" srcdoc="{html_content.replace(chr(34), "&quot;").replace(chr(39), "&#39;")}"></iframe>'
432
 
 
433
  # Gradio UI
434
  with gr.Blocks() as demo:
435
  gr.Markdown("# Protein Binding Site Prediction")
436
 
437
+ # Mode selection
438
+ mode = gr.Radio(
439
+ choices=["PDB ID", "Upload File"],
440
+ value="PDB ID",
441
+ label="Input Mode",
442
+ info="Choose whether to input a PDB ID or upload a PDB/CIF file."
443
+ )
444
+
445
+ # Input components based on mode
446
+ pdb_input = gr.Textbox(value="4BDU", label="PDB ID", placeholder="Enter PDB ID here...")
447
+ pdb_file = gr.File(label="Upload PDB/CIF File", visible=False)
448
+ visualize_btn = gr.Button("Visualize Structure")
449
 
450
  molecule_output2 = Molecule3D(label="Protein Structure", reps=[
451
  {
 
462
  segment_input = gr.Textbox(value="A", label="Chain ID", placeholder="Enter Chain ID here...")
463
  prediction_btn = gr.Button("Predict Binding Site")
464
 
 
465
  molecule_output = gr.HTML(label="Protein Structure")
466
+ explanation_vis = gr.Markdown("""
467
+ Residues with a score > 0.5 are considered binding sites and represented as sticks with the score dependent colorcoding:
468
+ - 0.5-0.6: blue
469
+ - 0.6–0.7: light blue
470
+ - 0.7–0.8: white
471
+ - 0.8–0.9: orange
472
+ - 0.9–1.0: red
473
+ """)
474
+ predictions_output = gr.Textbox(label="Visualize Prediction with PyMol")
475
+ gr.Markdown("### Download:\n- List of predicted binding site residues\n- PDB with score in beta factor column")
476
  download_output = gr.File(label="Download Files", file_count="multiple")
477
 
478
+ def process_interface(mode, pdb_id, pdb_file, chain_id):
479
+ if mode == "PDB ID":
480
+ return process_pdb(pdb_id, chain_id)
481
+ elif mode == "Upload File":
482
+ _, ext = os.path.splitext(pdb_file.name)
483
+ file_path = os.path.join('./', f"{_}{ext}")
484
+ if ext == '.cif':
485
+ pdb_path = convert_cif_to_pdb(file_path)
486
+ else:
487
+ pdb_path= file_path
488
+ return process_pdb(pdb_path, chain_id)
489
+ else:
490
+ return "Error: Invalid mode selected", None, None
491
+
492
+ def fetch_interface(mode, pdb_id, pdb_file):
493
+ if mode == "PDB ID":
494
+ return fetch_pdb(pdb_id)
495
+ elif mode == "Upload File":
496
+ _, ext = os.path.splitext(pdb_file.name)
497
+ file_path = os.path.join('./', f"{_}{ext}")
498
+ #print(ext)
499
+ if ext == '.cif':
500
+ pdb_path = convert_cif_to_pdb(file_path)
501
+ else:
502
+ pdb_path= file_path
503
+ #print(pdb_path)
504
+ return pdb_path
505
+ else:
506
+ return "Error: Invalid mode selected"
507
+
508
+ def toggle_mode(selected_mode):
509
+ if selected_mode == "PDB ID":
510
+ return gr.update(visible=True), gr.update(visible=False)
511
+ else:
512
+ return gr.update(visible=False), gr.update(visible=True)
513
+
514
+ mode.change(
515
+ toggle_mode,
516
+ inputs=[mode],
517
+ outputs=[pdb_input, pdb_file]
518
+ )
519
+
520
  prediction_btn.click(
521
+ process_interface,
522
+ inputs=[mode, pdb_input, pdb_file, segment_input],
 
 
 
523
  outputs=[predictions_output, molecule_output, download_output]
524
  )
525
 
526
  visualize_btn.click(
527
+ fetch_interface,
528
+ inputs=[mode, pdb_input, pdb_file],
529
  outputs=molecule_output2
530
  )
531
 
.ipynb_checkpoints/test3-checkpoint.ipynb CHANGED
@@ -1149,7 +1149,7 @@
1149
  },
1150
  {
1151
  "cell_type": "code",
1152
- "execution_count": 38,
1153
  "id": "514fad12-a31a-495f-af9e-04a18e11175e",
1154
  "metadata": {},
1155
  "outputs": [
@@ -1157,8 +1157,8 @@
1157
  "name": "stdout",
1158
  "output_type": "stream",
1159
  "text": [
1160
- "* Running on local URL: http://127.0.0.1:7896\n",
1161
- "* Running on public URL: https://387fb4706015321f92.gradio.live\n",
1162
  "\n",
1163
  "This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)\n"
1164
  ]
@@ -1166,7 +1166,7 @@
1166
  {
1167
  "data": {
1168
  "text/html": [
1169
- "<div><iframe src=\"https://387fb4706015321f92.gradio.live\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
1170
  ],
1171
  "text/plain": [
1172
  "<IPython.core.display.HTML object>"
@@ -1179,7 +1179,7 @@
1179
  "data": {
1180
  "text/plain": []
1181
  },
1182
- "execution_count": 38,
1183
  "metadata": {},
1184
  "output_type": "execute_result"
1185
  }
@@ -1422,7 +1422,7 @@
1422
  "\n",
1423
  " // Create a new model for medium-scoring residues and apply orange sticks style\n",
1424
  " let midScoreModel = viewer.addModel(pdb, \"pdb\");\n",
1425
- " midScoreModel.setStyle({}, {});\n",
1426
  " midScoreModel.setStyle(\n",
1427
  " {\"chain\": \"%s\", \"resi\": [%s]}, \n",
1428
  " {\"stick\": {\"color\": \"orange\"}}\n",
 
1149
  },
1150
  {
1151
  "cell_type": "code",
1152
+ "execution_count": 39,
1153
  "id": "514fad12-a31a-495f-af9e-04a18e11175e",
1154
  "metadata": {},
1155
  "outputs": [
 
1157
  "name": "stdout",
1158
  "output_type": "stream",
1159
  "text": [
1160
+ "* Running on local URL: http://127.0.0.1:7897\n",
1161
+ "* Running on public URL: https://0d9b5d36fa5302e0df.gradio.live\n",
1162
  "\n",
1163
  "This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)\n"
1164
  ]
 
1166
  {
1167
  "data": {
1168
  "text/html": [
1169
+ "<div><iframe src=\"https://0d9b5d36fa5302e0df.gradio.live\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
1170
  ],
1171
  "text/plain": [
1172
  "<IPython.core.display.HTML object>"
 
1179
  "data": {
1180
  "text/plain": []
1181
  },
1182
+ "execution_count": 39,
1183
  "metadata": {},
1184
  "output_type": "execute_result"
1185
  }
 
1422
  "\n",
1423
  " // Create a new model for medium-scoring residues and apply orange sticks style\n",
1424
  " let midScoreModel = viewer.addModel(pdb, \"pdb\");\n",
1425
+ " highScormidScoreModeleModel.setStyle({}, {});\n",
1426
  " midScoreModel.setStyle(\n",
1427
  " {\"chain\": \"%s\", \"resi\": [%s]}, \n",
1428
  " {\"stick\": {\"color\": \"orange\"}}\n",
test3.ipynb → .ipynb_checkpoints/test4-checkpoint.ipynb RENAMED
The diff for this file is too large to render. See raw diff
 
2IWI.cif DELETED
The diff for this file is too large to render. See raw diff
 
2IWI.pdb DELETED
The diff for this file is too large to render. See raw diff
 
2IWI_predictions.txt DELETED
@@ -1,249 +0,0 @@
1
- GLY 22 G 0.18
2
- LYS 23 K 0.51
3
- ASP 24 D 0.12
4
- ARG 25 R 0.25
5
- GLU 26 E 0.08
6
- ALA 27 A 0.82
7
- PHE 28 F 0.65
8
- GLU 29 E 0.65
9
- ALA 30 A 0.22
10
- GLU 31 E 0.49
11
- TYR 32 Y 0.57
12
- ARG 33 R 0.56
13
- LEU 34 L 0.83
14
- GLY 35 G 0.42
15
- PRO 36 P 0.97
16
- LEU 37 L 0.65
17
- LEU 38 L 0.08
18
- GLY 39 G 0.05
19
- LYS 40 K 0.55
20
- GLY 41 G 0.38
21
- GLY 42 G 0.45
22
- PHE 43 F 0.92
23
- GLY 44 G 0.00
24
- THR 45 T 0.76
25
- VAL 46 V 0.63
26
- PHE 47 F 0.97
27
- ALA 48 A 0.57
28
- GLY 49 G 0.94
29
- HIS 50 H 0.40
30
- ARG 51 R 0.27
31
- LEU 52 L 0.65
32
- THR 53 T 0.84
33
- ASP 54 D 0.85
34
- ARG 55 R 0.46
35
- LEU 56 L 0.87
36
- GLN 57 Q 0.76
37
- VAL 58 V 0.22
38
- ALA 59 A 0.65
39
- ILE 60 I 0.87
40
- LYS 61 K 0.69
41
- VAL 62 V 0.76
42
- ILE 63 I 0.70
43
- PRO 64 P 0.04
44
- ARG 65 R 0.20
45
- THR 79 T 0.80
46
- CYS 80 C 0.82
47
- PRO 81 P 0.72
48
- LEU 82 L 0.17
49
- GLU 83 E 0.70
50
- VAL 84 V 0.21
51
- ALA 85 A 0.15
52
- LEU 86 L 0.28
53
- LEU 87 L 0.03
54
- TRP 88 W 0.18
55
- LYS 89 K 0.01
56
- VAL 90 V 0.43
57
- GLY 91 G 0.25
58
- ALA 92 A 0.65
59
- GLY 93 G 0.00
60
- GLY 94 G 0.52
61
- GLY 95 G 0.22
62
- HIS 96 H 0.03
63
- PRO 97 P 0.57
64
- GLY 98 G 0.32
65
- VAL 99 V 0.89
66
- ILE 100 I 0.14
67
- ARG 101 R 0.66
68
- LEU 102 L 0.18
69
- LEU 103 L 0.30
70
- ASP 104 D 0.36
71
- TRP 105 W 0.83
72
- PHE 106 F 0.77
73
- GLU 107 E 0.95
74
- PHE 112 F 0.04
75
- MET 113 M 0.05
76
- LEU 114 L 0.32
77
- VAL 115 V 1.00
78
- LEU 116 L 0.43
79
- GLU 117 E 0.76
80
- ARG 118 R 0.65
81
- PRO 119 P 0.28
82
- LEU 120 L 0.74
83
- PRO 121 P 0.69
84
- ALA 122 A 0.89
85
- GLN 123 Q 0.68
86
- ASP 124 D 0.67
87
- LEU 125 L 0.89
88
- PHE 126 F 0.33
89
- ASP 127 D 0.05
90
- TYR 128 Y 0.59
91
- ILE 129 I 0.19
92
- THR 130 T 0.88
93
- GLU 131 E 0.24
94
- LYS 132 K 0.04
95
- GLY 133 G 0.99
96
- PRO 134 P 0.43
97
- LEU 135 L 0.31
98
- GLY 136 G 0.83
99
- GLU 137 E 0.12
100
- GLY 138 G 0.02
101
- PRO 139 P 0.71
102
- SER 140 S 0.70
103
- ARG 141 R 0.63
104
- CYS 142 C 0.70
105
- PHE 143 F 0.92
106
- PHE 144 F 0.02
107
- GLY 145 G 0.72
108
- GLN 146 Q 0.03
109
- VAL 147 V 0.70
110
- VAL 148 V 0.34
111
- ALA 149 A 0.95
112
- ALA 150 A 0.39
113
- ILE 151 I 0.21
114
- GLN 152 Q 0.86
115
- HIS 153 H 0.11
116
- CYS 154 C 0.30
117
- HIS 155 H 0.12
118
- SER 156 S 0.55
119
- ARG 157 R 0.20
120
- GLY 158 G 0.32
121
- VAL 159 V 0.80
122
- VAL 160 V 0.43
123
- HIS 161 H 0.99
124
- ARG 162 R 0.13
125
- ASP 163 D 0.73
126
- ILE 164 I 0.70
127
- LYS 165 K 0.88
128
- ASP 166 D 0.56
129
- GLU 167 E 0.61
130
- ASN 168 N 0.01
131
- ILE 169 I 0.48
132
- LEU 170 L 0.18
133
- ILE 171 I 0.28
134
- ASP 172 D 0.79
135
- LEU 173 L 0.33
136
- ARG 174 R 0.31
137
- ARG 175 R 0.39
138
- GLY 176 G 0.19
139
- CYS 177 C 0.57
140
- ALA 178 A 0.99
141
- LYS 179 K 0.47
142
- LEU 180 L 0.02
143
- ILE 181 I 0.81
144
- ASP 182 D 0.59
145
- PHE 183 F 0.74
146
- GLY 184 G 0.43
147
- SER 185 S 0.90
148
- GLY 186 G 0.87
149
- ALA 187 A 0.39
150
- LEU 188 L 0.43
151
- LEU 189 L 0.84
152
- HIS 190 H 0.91
153
- ASP 191 D 0.45
154
- GLU 192 E 0.00
155
- PRO 193 P 0.86
156
- TYR 194 Y 0.11
157
- THR 195 T 0.54
158
- ASP 196 D 0.70
159
- PHE 197 F 0.62
160
- ASP 198 D 0.31
161
- GLY 199 G 0.41
162
- THR 200 T 0.85
163
- ARG 201 R 0.18
164
- VAL 202 V 0.10
165
- TYR 203 Y 0.22
166
- SER 204 S 0.31
167
- PRO 205 P 0.41
168
- PRO 206 P 0.87
169
- GLU 207 E 0.77
170
- TRP 208 W 0.51
171
- ILE 209 I 0.18
172
- SER 210 S 0.03
173
- ARG 211 R 0.41
174
- HIS 212 H 0.83
175
- GLN 213 Q 0.30
176
- TYR 214 Y 0.38
177
- HIS 215 H 0.28
178
- ALA 216 A 0.51
179
- LEU 217 L 0.61
180
- PRO 218 P 0.77
181
- ALA 219 A 0.79
182
- THR 220 T 0.32
183
- VAL 221 V 0.35
184
- TRP 222 W 0.44
185
- SER 223 S 0.35
186
- LEU 224 L 0.67
187
- GLY 225 G 0.21
188
- ILE 226 I 0.88
189
- LEU 227 L 0.38
190
- LEU 228 L 0.27
191
- TYR 229 Y 0.53
192
- ASP 230 D 0.36
193
- MET 231 M 0.76
194
- VAL 232 V 0.59
195
- CYS 233 C 0.44
196
- GLY 234 G 0.88
197
- ASP 235 D 0.54
198
- ILE 236 I 0.63
199
- PRO 237 P 0.41
200
- PHE 238 F 0.84
201
- GLU 239 E 0.66
202
- ARG 240 R 0.20
203
- ASP 241 D 0.08
204
- GLN 242 Q 0.23
205
- GLU 243 E 0.31
206
- ILE 244 I 0.17
207
- LEU 245 L 0.58
208
- GLU 246 E 0.76
209
- ALA 247 A 0.82
210
- GLU 248 E 0.39
211
- LEU 249 L 0.53
212
- HIS 250 H 0.67
213
- PHE 251 F 0.36
214
- PRO 252 P 0.16
215
- ALA 253 A 0.08
216
- HIS 254 H 0.53
217
- VAL 255 V 0.39
218
- SER 256 S 0.24
219
- PRO 257 P 0.06
220
- ASP 258 D 0.79
221
- CYS 259 C 0.54
222
- CYS 260 C 0.46
223
- ALA 261 A 0.29
224
- LEU 262 L 0.60
225
- ILE 263 I 0.33
226
- ARG 264 R 0.56
227
- ARG 265 R 0.95
228
- CYS 266 C 0.63
229
- LEU 267 L 0.83
230
- ALA 268 A 0.22
231
- PRO 269 P 0.18
232
- LYS 270 K 0.71
233
- PRO 271 P 0.91
234
- SER 272 S 0.84
235
- SER 273 S 0.62
236
- ARG 274 R 0.22
237
- PRO 275 P 0.34
238
- SER 276 S 0.74
239
- LEU 277 L 0.41
240
- GLU 278 E 0.78
241
- GLU 279 E 0.76
242
- ILE 280 I 0.40
243
- LEU 281 L 0.27
244
- LEU 282 L 0.23
245
- ASP 283 D 0.65
246
- PRO 284 P 0.45
247
- TRP 285 W 0.72
248
- MET 286 M 0.57
249
- GLN 287 Q 0.29
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4BDU.cif DELETED
The diff for this file is too large to render. See raw diff
 
4BDU.pdb DELETED
The diff for this file is too large to render. See raw diff
 
4BDU_A_scored.pdb DELETED
The diff for this file is too large to render. See raw diff
 
4BDU_C_scored.pdb DELETED
The diff for this file is too large to render. See raw diff
 
4BDU_predictions.txt DELETED
@@ -1,300 +0,0 @@
1
- Prediction for PDB: 4BDU, Chain: A
2
- Date: 2024-12-11 16:57:50
3
-
4
- Columns: Residue Name, Residue Number, One-letter Code, Normalized Score
5
-
6
- SER 2 S 0.05
7
- LYS 3 K 0.39
8
- GLY 4 G 0.24
9
- GLU 5 E 0.26
10
- GLU 6 E 0.35
11
- LEU 7 L 0.45
12
- PHE 8 F 0.82
13
- THR 9 T 0.32
14
- GLY 10 G 0.73
15
- VAL 11 V 0.42
16
- VAL 12 V 0.33
17
- PRO 13 P 0.96
18
- ILE 14 I 0.68
19
- LEU 15 L 0.71
20
- VAL 16 V 0.84
21
- GLU 17 E 0.26
22
- LEU 18 L 0.54
23
- ASP 19 D 0.46
24
- GLY 20 G 0.12
25
- ASP 21 D 0.57
26
- VAL 22 V 0.32
27
- ASN 23 N 0.18
28
- GLY 24 G 0.48
29
- HIS 25 H 0.95
30
- LYS 26 K 0.88
31
- PHE 27 F 0.13
32
- SER 28 S 0.12
33
- VAL 29 V 0.58
34
- SER 30 S 0.19
35
- GLY 31 G 0.09
36
- GLU 32 E 0.17
37
- GLY 33 G 0.60
38
- GLU 34 E 0.92
39
- GLY 35 G 0.48
40
- ASP 36 D 0.35
41
- ALA 37 A 0.72
42
- THR 38 T 0.47
43
- TYR 39 Y 0.11
44
- GLY 40 G 0.57
45
- LYS 41 K 0.86
46
- LEU 42 L 0.42
47
- THR 43 T 0.98
48
- LEU 44 L 0.27
49
- LYS 45 K 0.05
50
- PHE 46 F 0.54
51
- ILE 47 I 0.25
52
- CYS 48 C 0.73
53
- THR 49 T 0.44
54
- THR 50 T 0.85
55
- GLY 51 G 0.17
56
- LYS 52 K 0.72
57
- LEU 53 L 0.03
58
- PRO 54 P 0.26
59
- VAL 55 V 0.64
60
- PRO 56 P 0.88
61
- TRP 57 W 0.84
62
- PRO 58 P 0.71
63
- THR 59 T 0.41
64
- LEU 60 L 0.18
65
- VAL 61 V 0.32
66
- THR 62 T 0.87
67
- THR 63 T 0.87
68
- PHE 64 F 1.00
69
- VAL 68 V 0.50
70
- GLN 69 Q 0.10
71
- CYS 70 C 0.71
72
- PHE 71 F 0.47
73
- SER 72 S 0.46
74
- ARG 73 R 0.99
75
- TYR 74 Y 0.40
76
- PRO 75 P 0.78
77
- ASP 76 D 0.42
78
- HIS 77 H 0.93
79
- MET 78 M 0.47
80
- LYS 79 K 0.51
81
- GLN 80 Q 0.85
82
- HIS 81 H 0.11
83
- ASP 82 D 0.87
84
- PHE 83 F 0.13
85
- PHE 84 F 0.56
86
- LYS 85 K 0.44
87
- SER 86 S 0.44
88
- ALA 87 A 0.20
89
- MET 88 M 0.33
90
- PRO 89 P 0.77
91
- GLU 90 E 0.32
92
- GLY 91 G 0.80
93
- TYR 92 Y 0.52
94
- VAL 93 V 0.46
95
- GLN 94 Q 0.26
96
- GLU 95 E 0.03
97
- ARG 96 R 0.99
98
- THR 97 T 0.72
99
- ILE 98 I 0.38
100
- PHE 99 F 0.63
101
- PHE 100 F 0.03
102
- LYS 101 K 0.10
103
- ASP 102 D 0.52
104
- ASP 103 D 0.41
105
- GLY 104 G 0.91
106
- ASN 105 N 0.17
107
- TYR 106 Y 0.75
108
- LYS 107 K 0.07
109
- THR 108 T 0.78
110
- ARG 109 R 0.21
111
- ALA 110 A 0.93
112
- GLU 111 E 0.34
113
- VAL 112 V 0.06
114
- LYS 113 K 0.92
115
- PHE 114 F 0.43
116
- GLU 115 E 0.22
117
- GLY 116 G 0.67
118
- ASP 117 D 0.54
119
- THR 118 T 0.18
120
- LEU 119 L 0.33
121
- VAL 120 V 0.52
122
- ASN 121 N 0.23
123
- ARG 122 R 0.18
124
- ILE 123 I 0.52
125
- GLU 124 E 0.85
126
- LEU 125 L 0.66
127
- LYS 126 K 0.69
128
- GLY 127 G 0.46
129
- ILE 128 I 0.48
130
- ASP 129 D 0.55
131
- PHE 130 F 0.90
132
- LYS 131 K 1.00
133
- GLU 132 E 0.98
134
- ASP 133 D 0.41
135
- GLY 134 G 0.78
136
- ASN 135 N 0.12
137
- ILE 136 I 0.06
138
- LEU 137 L 0.80
139
- GLY 138 G 0.70
140
- HIS 139 H 0.52
141
- LYS 140 K 0.40
142
- LEU 141 L 0.97
143
- GLU 142 E 0.25
144
- TYR 143 Y 0.53
145
- ASN 144 N 0.26
146
- TYR 145 Y 0.67
147
- ASN 146 N 0.65
148
- SER 147 S 0.91
149
- HIS 148 H 0.82
150
- ASN 149 N 0.93
151
- VAL 150 V 0.67
152
- TYR 151 Y 0.87
153
- ILE 152 I 0.02
154
- MET 153 M 0.37
155
- ALA 154 A 0.50
156
- ASP 155 D 0.89
157
- LYS 156 K 1.00
158
- GLN 157 Q 0.96
159
- LYS 158 K 0.83
160
- ASN 159 N 0.95
161
- GLY 160 G 0.02
162
- ILE 161 I 0.57
163
- LYS 162 K 0.82
164
- VAL 163 V 0.66
165
- ASN 164 N 0.32
166
- PHE 165 F 0.50
167
- LYS 166 K 0.11
168
- ILE 167 I 0.49
169
- ARG 168 R 0.20
170
- HIS 169 H 0.82
171
- ASN 170 N 0.34
172
- ILE 171 I 0.91
173
- GLU 172 E 0.28
174
- ASP 173 D 0.02
175
- GLY 174 G 0.09
176
- SER 175 S 0.44
177
- VAL 176 V 0.87
178
- GLN 177 Q 0.65
179
- LEU 178 L 0.88
180
- ALA 179 A 0.89
181
- ASP 180 D 0.53
182
- HIS 181 H 0.89
183
- TYR 182 Y 0.44
184
- GLN 183 Q 0.02
185
- GLN 184 Q 0.91
186
- ASN 185 N 0.57
187
- THR 186 T 0.00
188
- PRO 187 P 0.97
189
- ILE 188 I 0.17
190
- GLY 189 G 0.57
191
- ASP 190 D 0.46
192
- GLY 191 G 0.08
193
- PRO 192 P 0.85
194
- VAL 193 V 0.09
195
- LEU 194 L 0.79
196
- LEU 195 L 0.61
197
- PRO 196 P 0.72
198
- ASP 197 D 0.29
199
- ASN 198 N 0.95
200
- HIS 199 H 0.78
201
- TYR 200 Y 0.02
202
- LEU 201 L 0.55
203
- SER 202 S 0.63
204
- THR 203 T 0.38
205
- GLN 204 Q 0.18
206
- SER 205 S 0.48
207
- ASN 206 N 0.19
208
- LEU 207 L 0.71
209
- SER 208 S 0.56
210
- LYS 209 K 0.56
211
- ASP 210 D 0.98
212
- PRO 211 P 0.43
213
- ASN 212 N 0.91
214
- GLU 213 E 0.76
215
- LYS 214 K 0.58
216
- ARG 215 R 0.42
217
- ASP 216 D 0.81
218
- HIS 217 H 0.96
219
- MET 218 M 0.26
220
- VAL 219 V 0.01
221
- LEU 220 L 0.27
222
- LEU 221 L 0.26
223
- GLU 222 E 0.92
224
- PHE 223 F 0.84
225
- VAL 224 V 0.72
226
- THR 225 T 1.00
227
- ALA 226 A 0.55
228
- ALA 227 A 0.72
229
- GLY 228 G 0.44
230
- ILE 229 I 0.01
231
- THR 230 T 0.98
232
- ALA 1054 A 0.83
233
- SER 1055 S 0.78
234
- THR 1056 T 0.55
235
- LYS 1057 K 0.40
236
- LYS 1058 K 0.06
237
- LEU 1059 L 0.82
238
- SER 1060 S 0.59
239
- GLU 1061 E 0.68
240
- SER 1062 S 0.28
241
- LEU 1063 L 0.79
242
- LYS 1064 K 0.94
243
- ARG 1065 R 0.32
244
- ILE 1066 I 0.28
245
- GLY 1067 G 0.94
246
- ASP 1068 D 0.19
247
- GLU 1069 E 0.76
248
- LEU 1070 L 0.19
249
- ASP 1071 D 0.14
250
- SER 1072 S 0.04
251
- ASN 1073 N 0.39
252
- MET 1074 M 0.50
253
- GLU 1075 E 0.92
254
- LEU 1076 L 0.81
255
- GLN 1077 Q 0.04
256
- ARG 1078 R 0.97
257
- MET 1079 M 0.20
258
- ILE 1080 I 0.90
259
- ALA 1081 A 0.43
260
- ALA 1082 A 0.93
261
- VAL 1083 V 0.28
262
- ASP 1084 D 0.29
263
- THR 1085 T 0.83
264
- ASP 1086 D 0.79
265
- SER 1087 S 0.39
266
- PRO 1088 P 0.85
267
- ARG 1089 R 0.41
268
- GLU 1090 E 0.08
269
- VAL 1091 V 0.10
270
- PHE 1092 F 0.15
271
- PHE 1093 F 0.10
272
- ARG 1094 R 0.59
273
- VAL 1095 V 0.69
274
- ALA 1096 A 0.50
275
- ALA 1097 A 0.86
276
- ASP 1098 D 0.77
277
- MET 1099 M 0.60
278
- PHE 1100 F 0.13
279
- SER 1101 S 0.22
280
- ASP 1102 D 0.29
281
- GLY 1103 G 0.22
282
- ASN 1104 N 0.01
283
- PHE 1105 F 0.24
284
- ASN 1106 N 0.48
285
- TRP 1107 W 0.45
286
- GLY 1108 G 0.52
287
- ARG 1109 R 0.86
288
- VAL 1110 V 0.68
289
- VAL 1111 V 0.96
290
- ALA 1112 A 0.01
291
- LEU 1113 L 0.88
292
- PHE 1114 F 0.66
293
- TYR 1115 Y 0.11
294
- PHE 1116 F 0.62
295
- ALA 1117 A 0.62
296
- SER 1118 S 0.26
297
- LYS 1119 K 0.58
298
- LEU 1120 L 0.18
299
- VAL 1121 V 0.85
300
- LEU 1122 L 0.27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
__pycache__/model_loader.cpython-312.pyc DELETED
Binary file (32.5 kB)
 
app.py CHANGED
@@ -29,6 +29,22 @@ from datasets import Dataset
29
 
30
  from scipy.special import expit
31
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
 
34
  # Load model and move to device
@@ -39,6 +55,24 @@ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
39
  model.to(device)
40
  model.eval()
41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  def normalize_scores(scores):
43
  min_score = np.min(scores)
44
  max_score = np.max(scores)
@@ -101,36 +135,44 @@ def fetch_pdb(pdb_id):
101
  pdb_path = convert_cif_to_pdb(pdb_path)
102
  return pdb_path
103
 
104
- def create_chain_specific_pdb(input_pdb: str, chain_id: str, residue_scores: list) -> str:
105
  """
106
- Create a PDB file with only the specified chain and replace B-factor with prediction scores
107
  """
108
  # Read the original PDB file
109
  parser = PDBParser(QUIET=True)
110
  structure = parser.get_structure('protein', input_pdb)
111
 
112
- # Prepare a new structure with only the specified chain
113
- new_structure = structure.copy()
114
- for model in new_structure:
115
- # Remove all chains except the specified one
116
- chains_to_remove = [chain for chain in model if chain.id != chain_id]
117
- for chain in chains_to_remove:
118
- model.detach_child(chain.id)
119
 
120
- # Create a modified PDB with scores in B-factor
121
  scores_dict = {resi: score for resi, score in residue_scores}
122
- for model in new_structure:
123
- for chain in model:
124
- for residue in chain:
125
- if residue.id[1] in scores_dict:
126
- for atom in residue:
127
- atom.bfactor = scores_dict[residue.id[1]] #* 100 # Scale score to B-factor range
128
-
129
- # Save the modified structure
130
- output_pdb = f"{os.path.splitext(input_pdb)[0]}_{chain_id}_scored.pdb"
 
 
 
 
 
 
 
 
 
 
 
131
  io = PDBIO()
132
- io.set_structure(new_structure)
133
- io.save(output_pdb)
 
 
134
 
135
  return output_pdb
136
 
@@ -158,8 +200,6 @@ def calculate_geometric_center(pdb_path: str, high_score_residues: list, chain_i
158
  return center
159
  return None
160
 
161
-
162
-
163
  def process_pdb(pdb_id_or_file, segment):
164
  # Determine if input is a PDB ID or file path
165
  if pdb_id_or_file.endswith('.pdb'):
@@ -192,67 +232,75 @@ def process_pdb(pdb_id_or_file, segment):
192
  sequence = "".join(seq1(res.resname) for res in protein_residues)
193
  sequence_id = [res.id[1] for res in protein_residues]
194
 
195
- # Prepare input for model prediction
196
- input_ids = tokenizer(" ".join(sequence), return_tensors="pt").input_ids.to(device)
197
- with torch.no_grad():
198
- outputs = model(input_ids).logits.detach().cpu().numpy().squeeze()
199
-
200
- # Calculate scores and normalize them
201
- scores = expit(outputs[:, 1] - outputs[:, 0])
202
  normalized_scores = normalize_scores(scores)
203
 
204
  # Zip residues with scores to track the residue ID and score
205
  residue_scores = [(resi, score) for resi, score in zip(sequence_id, normalized_scores)]
206
 
207
- # Identify high and mid scoring residues
208
- high_score_residues = [resi for resi, score in residue_scores if score > 0.75]
209
- mid_score_residues = [resi for resi, score in residue_scores if 0.5 < score <= 0.75]
210
-
211
- # Calculate geometric center of high-scoring residues
212
- geo_center = calculate_geometric_center(pdb_path, high_score_residues, segment)
213
- pymol_selection = f"select high_score_residues, resi {'+'.join(map(str, high_score_residues))} and chain {segment}"
214
- pymol_center_cmd = f"show spheres, resi {'+'.join(map(str, high_score_residues))} and chain {segment}" if geo_center is not None else ""
215
-
216
- # Generate the result string
217
  current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
218
  result_str = f"Prediction for PDB: {pdb_id}, Chain: {segment}\nDate: {current_time}\n\n"
 
219
  result_str += "Columns: Residue Name, Residue Number, One-letter Code, Normalized Score\n\n"
220
  result_str += "\n".join([
221
  f"{res.resname} {res.id[1]} {sequence[i]} {normalized_scores[i]:.2f}"
222
- for i, res in enumerate(protein_residues)])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
223
 
 
 
 
 
 
 
 
 
 
 
 
224
  # Create prediction and scored PDB files
225
- prediction_file = f"{pdb_id}_predictions.txt"
226
  with open(prediction_file, "w") as f:
227
  f.write(result_str)
228
-
229
- # Create chain-specific PDB with scores in B-factor
230
- scored_pdb = create_chain_specific_pdb(pdb_path, segment, residue_scores)
231
-
232
- # Molecule visualization with updated script
233
- mol_vis = molecule(pdb_path, residue_scores, segment)
234
-
235
- # Construct PyMOL command suggestions
236
- pymol_commands = f"""
237
- PyMOL Visualization Commands:
238
- 1. Load PDB: load {os.path.abspath(pdb_path)}
239
- 2. Select high-scoring residues: {pymol_selection}
240
- 3. Highlight high-scoring residues: show sticks, high_score_residues
241
- {pymol_center_cmd}
242
- """
243
 
244
- return result_str + "\n\n" + pymol_commands, mol_vis, [prediction_file, scored_pdb]
245
-
246
 
247
  def molecule(input_pdb, residue_scores=None, segment='A'):
 
248
  mol = read_mol(input_pdb) # Read PDB file content
249
 
250
  # Prepare high-scoring residues script if scores are provided
251
  high_score_script = ""
252
  if residue_scores is not None:
253
  # Filter residues based on their scores
254
- high_score_residues = [resi for resi, score in residue_scores if score > 0.75]
255
- mid_score_residues = [resi for resi, score in residue_scores if 0.5 < score <= 0.75]
 
 
 
256
 
257
  high_score_script = """
258
  // Load the original model and apply white cartoon style
@@ -264,26 +312,57 @@ def molecule(input_pdb, residue_scores=None, segment='A'):
264
  );
265
 
266
  // Create a new model for high-scoring residues and apply red sticks style
267
- let highScoreModel = viewer.addModel(pdb, "pdb");
268
- highScoreModel.setStyle({}, {});
269
- highScoreModel.setStyle(
270
  {"chain": "%s", "resi": [%s]},
271
- {"stick": {"color": "red"}}
272
  );
273
 
274
- // Create a new model for medium-scoring residues and apply orange sticks style
275
- let midScoreModel = viewer.addModel(pdb, "pdb");
276
- midScoreModel.setStyle({}, {});
277
- midScoreModel.setStyle(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
278
  {"chain": "%s", "resi": [%s]},
279
  {"stick": {"color": "orange"}}
280
  );
 
 
 
 
 
 
 
 
 
281
  """ % (
282
  segment,
283
  segment,
284
- ", ".join(str(resi) for resi in high_score_residues),
285
  segment,
286
- ", ".join(str(resi) for resi in mid_score_residues)
 
 
 
 
 
 
287
  )
288
 
289
  # Generate the full HTML content
@@ -351,14 +430,22 @@ def molecule(input_pdb, residue_scores=None, segment='A'):
351
  # Return the HTML content within an iframe safely encoded for special characters
352
  return f'<iframe width="100%" height="700" srcdoc="{html_content.replace(chr(34), "&quot;").replace(chr(39), "&#39;")}"></iframe>'
353
 
354
-
355
  # Gradio UI
356
  with gr.Blocks() as demo:
357
  gr.Markdown("# Protein Binding Site Prediction")
358
 
359
- with gr.Row():
360
- pdb_input = gr.Textbox(value="4BDU", label="PDB ID", placeholder="Enter PDB ID here...")
361
- visualize_btn = gr.Button("Visualize Structure")
 
 
 
 
 
 
 
 
 
362
 
363
  molecule_output2 = Molecule3D(label="Protein Structure", reps=[
364
  {
@@ -375,23 +462,70 @@ with gr.Blocks() as demo:
375
  segment_input = gr.Textbox(value="A", label="Chain ID", placeholder="Enter Chain ID here...")
376
  prediction_btn = gr.Button("Predict Binding Site")
377
 
378
-
379
  molecule_output = gr.HTML(label="Protein Structure")
380
- predictions_output = gr.Textbox(label="Binding Site Predictions")
 
 
 
 
 
 
 
 
 
381
  download_output = gr.File(label="Download Files", file_count="multiple")
382
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
383
  prediction_btn.click(
384
- process_pdb,
385
- inputs=[
386
- pdb_input,
387
- segment_input
388
- ],
389
  outputs=[predictions_output, molecule_output, download_output]
390
  )
391
 
392
  visualize_btn.click(
393
- fetch_pdb,
394
- inputs=[pdb_input],
395
  outputs=molecule_output2
396
  )
397
 
 
29
 
30
  from scipy.special import expit
31
 
32
+ from datetime import datetime
33
+ import gradio as gr
34
+ import requests
35
+ from Bio.PDB import PDBParser, MMCIFParser, PDBIO
36
+ from Bio.PDB.Polypeptide import is_aa
37
+ from Bio.SeqUtils import seq1
38
+ from typing import Optional, Tuple
39
+ import numpy as np
40
+ import os
41
+ from gradio_molecule3d import Molecule3D
42
+
43
+ import re
44
+ import pandas as pd
45
+ import copy
46
+
47
+ from scipy.special import expit
48
 
49
 
50
  # Load model and move to device
 
55
  model.to(device)
56
  model.eval()
57
 
58
+ from datetime import datetime
59
+ import gradio as gr
60
+ import requests
61
+ from Bio.PDB import PDBParser, MMCIFParser, PDBIO
62
+ from Bio.PDB.Polypeptide import is_aa
63
+ from Bio.SeqUtils import seq1
64
+ from Bio.PDB import Select
65
+ from typing import Optional, Tuple
66
+ import numpy as np
67
+ import os
68
+ from gradio_molecule3d import Molecule3D
69
+
70
+ import re
71
+ import pandas as pd
72
+ import copy
73
+
74
+ from scipy.special import expit
75
+
76
  def normalize_scores(scores):
77
  min_score = np.min(scores)
78
  max_score = np.max(scores)
 
135
  pdb_path = convert_cif_to_pdb(pdb_path)
136
  return pdb_path
137
 
138
+ def create_chain_specific_pdb(input_pdb: str, chain_id: str, residue_scores: list, protein_residues: list) -> str:
139
  """
140
+ Create a PDB file with only the selected chain and residues, replacing B-factor with prediction scores
141
  """
142
  # Read the original PDB file
143
  parser = PDBParser(QUIET=True)
144
  structure = parser.get_structure('protein', input_pdb)
145
 
146
+ # Prepare a new structure with only the specified chain and selected residues
147
+ output_pdb = f"{os.path.splitext(input_pdb)[0]}_{chain_id}_predictions_scores.pdb"
 
 
 
 
 
148
 
149
+ # Create scores dictionary for easy lookup
150
  scores_dict = {resi: score for resi, score in residue_scores}
151
+
152
+ # Create a custom Select class
153
+ class ResidueSelector(Select):
154
+ def __init__(self, chain_id, selected_residues, scores_dict):
155
+ self.chain_id = chain_id
156
+ self.selected_residues = selected_residues
157
+ self.scores_dict = scores_dict
158
+
159
+ def accept_chain(self, chain):
160
+ return chain.id == self.chain_id
161
+
162
+ def accept_residue(self, residue):
163
+ return residue.id[1] in self.selected_residues
164
+
165
+ def accept_atom(self, atom):
166
+ if atom.parent.id[1] in self.scores_dict:
167
+ atom.bfactor = self.scores_dict[atom.parent.id[1]] * 100
168
+ return True
169
+
170
+ # Prepare output PDB with selected chain and residues, modified B-factors
171
  io = PDBIO()
172
+ selector = ResidueSelector(chain_id, [res.id[1] for res in protein_residues], scores_dict)
173
+
174
+ io.set_structure(structure[0])
175
+ io.save(output_pdb, selector)
176
 
177
  return output_pdb
178
 
 
200
  return center
201
  return None
202
 
 
 
203
  def process_pdb(pdb_id_or_file, segment):
204
  # Determine if input is a PDB ID or file path
205
  if pdb_id_or_file.endswith('.pdb'):
 
232
  sequence = "".join(seq1(res.resname) for res in protein_residues)
233
  sequence_id = [res.id[1] for res in protein_residues]
234
 
235
+ scores = np.random.rand(len(sequence))
 
 
 
 
 
 
236
  normalized_scores = normalize_scores(scores)
237
 
238
  # Zip residues with scores to track the residue ID and score
239
  residue_scores = [(resi, score) for resi, score in zip(sequence_id, normalized_scores)]
240
 
241
+
242
+ # Identify high scoring residues (> 0.5)
243
+ high_score_residues = [resi for resi, score in residue_scores if score > 0.5]
244
+
245
+ # Preparing the result: only print high scoring residues
 
 
 
 
 
246
  current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
247
  result_str = f"Prediction for PDB: {pdb_id}, Chain: {segment}\nDate: {current_time}\n\n"
248
+ result_str += "High-scoring Residues (Score > 0.5):\n"
249
  result_str += "Columns: Residue Name, Residue Number, One-letter Code, Normalized Score\n\n"
250
  result_str += "\n".join([
251
  f"{res.resname} {res.id[1]} {sequence[i]} {normalized_scores[i]:.2f}"
252
+ for i, res in enumerate(protein_residues) if res.id[1] in high_score_residues
253
+ ])
254
+
255
+ # Create chain-specific PDB with scores in B-factor
256
+ scored_pdb = create_chain_specific_pdb(pdb_path, segment, residue_scores, protein_residues)
257
+
258
+ # Molecule visualization with updated script with color mapping
259
+ mol_vis = molecule(pdb_path, residue_scores, segment)#, color_map)
260
+
261
+ # Improved PyMOL command suggestions
262
+ current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
263
+ pymol_commands = f"Prediction for PDB: {pdb_id}, Chain: {segment}\nDate: {current_time}\n\n"
264
+
265
+ pymol_commands += f"""
266
+ # PyMOL Visualization Commands
267
+ load {os.path.abspath(pdb_path)}, protein
268
+ hide everything, all
269
+ show cartoon, chain {segment}
270
+ color white, chain {segment}
271
+ """
272
 
273
+ # Color specific residues
274
+ for score_range, color in [
275
+ (high_score_residues, "red")
276
+ ]:
277
+ if score_range:
278
+ resi_list = '+'.join(map(str, score_range))
279
+ pymol_commands += f"""
280
+ select high_score_residues, resi {resi_list} and chain {segment}
281
+ show sticks, high_score_residues
282
+ color {color}, high_score_residues
283
+ """
284
  # Create prediction and scored PDB files
285
+ prediction_file = f"{pdb_id}_binding_site_residues.txt"
286
  with open(prediction_file, "w") as f:
287
  f.write(result_str)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
288
 
289
+ return pymol_commands, mol_vis, [prediction_file,scored_pdb]
 
290
 
291
  def molecule(input_pdb, residue_scores=None, segment='A'):
292
+ # More granular scoring for visualization
293
  mol = read_mol(input_pdb) # Read PDB file content
294
 
295
  # Prepare high-scoring residues script if scores are provided
296
  high_score_script = ""
297
  if residue_scores is not None:
298
  # Filter residues based on their scores
299
+ class1_score_residues = [resi for resi, score in residue_scores if 0.5 < score <= 0.6]
300
+ class2_score_residues = [resi for resi, score in residue_scores if 0.6 < score <= 0.7]
301
+ class3_score_residues = [resi for resi, score in residue_scores if 0.7 < score <= 0.8]
302
+ class4_score_residues = [resi for resi, score in residue_scores if 0.8 < score <= 0.9]
303
+ class5_score_residues = [resi for resi, score in residue_scores if 0.9 < score <= 1.0]
304
 
305
  high_score_script = """
306
  // Load the original model and apply white cartoon style
 
312
  );
313
 
314
  // Create a new model for high-scoring residues and apply red sticks style
315
+ let class1Model = viewer.addModel(pdb, "pdb");
316
+ class1Model.setStyle({}, {});
317
+ class1Model.setStyle(
318
  {"chain": "%s", "resi": [%s]},
319
+ {"stick": {"color": "blue"}}
320
  );
321
 
322
+ // Create a new model for high-scoring residues and apply red sticks style
323
+ let class2Model = viewer.addModel(pdb, "pdb");
324
+ class2Model.setStyle({}, {});
325
+ class2Model.setStyle(
326
+ {"chain": "%s", "resi": [%s]},
327
+ {"stick": {"color": "lightblue"}}
328
+ );
329
+
330
+ // Create a new model for high-scoring residues and apply red sticks style
331
+ let class3Model = viewer.addModel(pdb, "pdb");
332
+ class3Model.setStyle({}, {});
333
+ class3Model.setStyle(
334
+ {"chain": "%s", "resi": [%s]},
335
+ {"stick": {"color": "white"}}
336
+ );
337
+
338
+ // Create a new model for high-scoring residues and apply red sticks style
339
+ let class4Model = viewer.addModel(pdb, "pdb");
340
+ class4Model.setStyle({}, {});
341
+ class4Model.setStyle(
342
  {"chain": "%s", "resi": [%s]},
343
  {"stick": {"color": "orange"}}
344
  );
345
+
346
+ // Create a new model for high-scoring residues and apply red sticks style
347
+ let class5Model = viewer.addModel(pdb, "pdb");
348
+ class5Model.setStyle({}, {});
349
+ class5Model.setStyle(
350
+ {"chain": "%s", "resi": [%s]},
351
+ {"stick": {"color": "red"}}
352
+ );
353
+
354
  """ % (
355
  segment,
356
  segment,
357
+ ", ".join(str(resi) for resi in class1_score_residues),
358
  segment,
359
+ ", ".join(str(resi) for resi in class2_score_residues),
360
+ segment,
361
+ ", ".join(str(resi) for resi in class3_score_residues),
362
+ segment,
363
+ ", ".join(str(resi) for resi in class4_score_residues),
364
+ segment,
365
+ ", ".join(str(resi) for resi in class5_score_residues)
366
  )
367
 
368
  # Generate the full HTML content
 
430
  # Return the HTML content within an iframe safely encoded for special characters
431
  return f'<iframe width="100%" height="700" srcdoc="{html_content.replace(chr(34), "&quot;").replace(chr(39), "&#39;")}"></iframe>'
432
 
 
433
  # Gradio UI
434
  with gr.Blocks() as demo:
435
  gr.Markdown("# Protein Binding Site Prediction")
436
 
437
+ # Mode selection
438
+ mode = gr.Radio(
439
+ choices=["PDB ID", "Upload File"],
440
+ value="PDB ID",
441
+ label="Input Mode",
442
+ info="Choose whether to input a PDB ID or upload a PDB/CIF file."
443
+ )
444
+
445
+ # Input components based on mode
446
+ pdb_input = gr.Textbox(value="4BDU", label="PDB ID", placeholder="Enter PDB ID here...")
447
+ pdb_file = gr.File(label="Upload PDB/CIF File", visible=False)
448
+ visualize_btn = gr.Button("Visualize Structure")
449
 
450
  molecule_output2 = Molecule3D(label="Protein Structure", reps=[
451
  {
 
462
  segment_input = gr.Textbox(value="A", label="Chain ID", placeholder="Enter Chain ID here...")
463
  prediction_btn = gr.Button("Predict Binding Site")
464
 
 
465
  molecule_output = gr.HTML(label="Protein Structure")
466
+ explanation_vis = gr.Markdown("""
467
+ Residues with a score > 0.5 are considered binding sites and represented as sticks with the score dependent colorcoding:
468
+ - 0.5-0.6: blue
469
+ - 0.6–0.7: light blue
470
+ - 0.7–0.8: white
471
+ - 0.8–0.9: orange
472
+ - 0.9–1.0: red
473
+ """)
474
+ predictions_output = gr.Textbox(label="Visualize Prediction with PyMol")
475
+ gr.Markdown("### Download:\n- List of predicted binding site residues\n- PDB with score in beta factor column")
476
  download_output = gr.File(label="Download Files", file_count="multiple")
477
 
478
+ def process_interface(mode, pdb_id, pdb_file, chain_id):
479
+ if mode == "PDB ID":
480
+ return process_pdb(pdb_id, chain_id)
481
+ elif mode == "Upload File":
482
+ _, ext = os.path.splitext(pdb_file.name)
483
+ file_path = os.path.join('./', f"{_}{ext}")
484
+ if ext == '.cif':
485
+ pdb_path = convert_cif_to_pdb(file_path)
486
+ else:
487
+ pdb_path= file_path
488
+ return process_pdb(pdb_path, chain_id)
489
+ else:
490
+ return "Error: Invalid mode selected", None, None
491
+
492
+ def fetch_interface(mode, pdb_id, pdb_file):
493
+ if mode == "PDB ID":
494
+ return fetch_pdb(pdb_id)
495
+ elif mode == "Upload File":
496
+ _, ext = os.path.splitext(pdb_file.name)
497
+ file_path = os.path.join('./', f"{_}{ext}")
498
+ #print(ext)
499
+ if ext == '.cif':
500
+ pdb_path = convert_cif_to_pdb(file_path)
501
+ else:
502
+ pdb_path= file_path
503
+ #print(pdb_path)
504
+ return pdb_path
505
+ else:
506
+ return "Error: Invalid mode selected"
507
+
508
+ def toggle_mode(selected_mode):
509
+ if selected_mode == "PDB ID":
510
+ return gr.update(visible=True), gr.update(visible=False)
511
+ else:
512
+ return gr.update(visible=False), gr.update(visible=True)
513
+
514
+ mode.change(
515
+ toggle_mode,
516
+ inputs=[mode],
517
+ outputs=[pdb_input, pdb_file]
518
+ )
519
+
520
  prediction_btn.click(
521
+ process_interface,
522
+ inputs=[mode, pdb_input, pdb_file, segment_input],
 
 
 
523
  outputs=[predictions_output, molecule_output, download_output]
524
  )
525
 
526
  visualize_btn.click(
527
+ fetch_interface,
528
+ inputs=[mode, pdb_input, pdb_file],
529
  outputs=molecule_output2
530
  )
531
 
test.ipynb DELETED
@@ -1,846 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "code",
5
- "execution_count": 3,
6
- "id": "1f8ea359-674c-4263-9c2a-7a8e7e464249",
7
- "metadata": {},
8
- "outputs": [
9
- {
10
- "name": "stdout",
11
- "output_type": "stream",
12
- "text": [
13
- "* Running on local URL: http://127.0.0.1:7862\n",
14
- "\n",
15
- "To create a public link, set `share=True` in `launch()`.\n"
16
- ]
17
- },
18
- {
19
- "data": {
20
- "text/html": [
21
- "<div><iframe src=\"http://127.0.0.1:7862/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
22
- ],
23
- "text/plain": [
24
- "<IPython.core.display.HTML object>"
25
- ]
26
- },
27
- "metadata": {},
28
- "output_type": "display_data"
29
- },
30
- {
31
- "data": {
32
- "text/plain": []
33
- },
34
- "execution_count": 3,
35
- "metadata": {},
36
- "output_type": "execute_result"
37
- }
38
- ],
39
- "source": [
40
- "import gradio as gr\n",
41
- "import requests\n",
42
- "from Bio.PDB import PDBParser\n",
43
- "from gradio_molecule3d import Molecule3D\n",
44
- "import numpy as np\n",
45
- "\n",
46
- "# Function to fetch a PDB file from RCSB PDB\n",
47
- "def fetch_pdb(pdb_id):\n",
48
- " pdb_url = f'https://files.rcsb.org/download/{pdb_id}.pdb'\n",
49
- " pdb_path = f'{pdb_id}.pdb'\n",
50
- " response = requests.get(pdb_url)\n",
51
- " if response.status_code == 200:\n",
52
- " with open(pdb_path, 'wb') as f:\n",
53
- " f.write(response.content)\n",
54
- " return pdb_path\n",
55
- " else:\n",
56
- " return None\n",
57
- "\n",
58
- "# Function to process the PDB file and return random predictions\n",
59
- "def process_pdb(pdb_id, segment):\n",
60
- " pdb_path = fetch_pdb(pdb_id)\n",
61
- " if not pdb_path:\n",
62
- " return \"Failed to fetch PDB file\", None, None\n",
63
- "\n",
64
- " parser = PDBParser(QUIET=True)\n",
65
- " structure = parser.get_structure('protein', pdb_path)\n",
66
- " \n",
67
- " try:\n",
68
- " chain = structure[0][segment]\n",
69
- " except KeyError:\n",
70
- " return \"Invalid Chain ID\", None, None\n",
71
- "\n",
72
- " sequence = [residue.get_resname() for residue in chain if residue.id[0] == ' ']\n",
73
- " random_scores = np.random.rand(len(sequence))\n",
74
- "\n",
75
- " result_str = \"\\n\".join(\n",
76
- " f\"{seq} {res.id[1]} {score:.2f}\" \n",
77
- " for seq, res, score in zip(sequence, chain, random_scores)\n",
78
- " )\n",
79
- "\n",
80
- " # Save the predictions to a file\n",
81
- " prediction_file = f\"{pdb_id}_predictions.txt\"\n",
82
- " with open(prediction_file, \"w\") as f:\n",
83
- " f.write(result_str)\n",
84
- " \n",
85
- " return result_str, pdb_path, prediction_file\n",
86
- "\n",
87
- "#reps = [{\"model\": 0, \"style\": \"cartoon\", \"color\": \"spectrum\"}]\n",
88
- "\n",
89
- "reps = [\n",
90
- " {\n",
91
- " \"model\": 0,\n",
92
- " \"style\": \"cartoon\",\n",
93
- " \"color\": \"whiteCarbon\",\n",
94
- " \"residue_range\": \"\",\n",
95
- " \"around\": 0,\n",
96
- " \"byres\": False,\n",
97
- " },\n",
98
- " {\n",
99
- " \"model\": 0,\n",
100
- " \"chain\": \"A\",\n",
101
- " \"resname\": \"HIS\",\n",
102
- " \"style\": \"stick\",\n",
103
- " \"color\": \"red\"\n",
104
- " }\n",
105
- " ]\n",
106
- "\n",
107
- "\n",
108
- "# Gradio UI\n",
109
- "with gr.Blocks() as demo:\n",
110
- " gr.Markdown(\"# Protein Binding Site Prediction (Random Scores)\")\n",
111
- "\n",
112
- " with gr.Row():\n",
113
- " pdb_input = gr.Textbox(value=\"2IWI\", label=\"PDB ID\", placeholder=\"Enter PDB ID here...\")\n",
114
- " segment_input = gr.Textbox(value=\"A\", label=\"Chain ID\", placeholder=\"Enter Chain ID here...\")\n",
115
- " visualize_btn = gr.Button(\"Visualize Structure\")\n",
116
- " prediction_btn = gr.Button(\"Predict Random Binding Site Scores\")\n",
117
- "\n",
118
- " molecule_output = Molecule3D(label=\"Protein Structure\", reps=reps)\n",
119
- " predictions_output = gr.Textbox(label=\"Binding Site Predictions\")\n",
120
- " download_output = gr.File(label=\"Download Predictions\")\n",
121
- "\n",
122
- " visualize_btn.click(fetch_pdb, inputs=[pdb_input], outputs=molecule_output)\n",
123
- " prediction_btn.click(process_pdb, inputs=[pdb_input, segment_input], outputs=[predictions_output, molecule_output, download_output])\n",
124
- "\n",
125
- " gr.Markdown(\"## Examples\")\n",
126
- " gr.Examples(\n",
127
- " examples=[\n",
128
- " [\"2IWI\", \"A\"],\n",
129
- " [\"7RPZ\", \"B\"],\n",
130
- " [\"3TJN\", \"C\"]\n",
131
- " ],\n",
132
- " inputs=[pdb_input, segment_input],\n",
133
- " outputs=[predictions_output, molecule_output, download_output]\n",
134
- " )\n",
135
- "\n",
136
- "demo.launch()"
137
- ]
138
- },
139
- {
140
- "cell_type": "code",
141
- "execution_count": null,
142
- "id": "bd50ff2e-ed03-498e-8af2-73c0fb8ea07e",
143
- "metadata": {},
144
- "outputs": [],
145
- "source": []
146
- },
147
- {
148
- "cell_type": "raw",
149
- "id": "88affe12-7c48-4bd6-9e46-32cdffa729fe",
150
- "metadata": {},
151
- "source": [
152
- "import gradio as gr\n",
153
- "from gradio_molecule3d import Molecule3D\n",
154
- "\n",
155
- "\n",
156
- "example = Molecule3D().example_value()\n",
157
- "\n",
158
- "\n",
159
- "reps = [\n",
160
- " {\n",
161
- " \"model\": 0,\n",
162
- " \"style\": \"cartoon\",\n",
163
- " \"color\": \"whiteCarbon\",\n",
164
- " \"residue_range\": \"\",\n",
165
- " \"around\": 0,\n",
166
- " \"byres\": False,\n",
167
- " },\n",
168
- " {\n",
169
- " \"model\": 0,\n",
170
- " \"chain\": \"A\",\n",
171
- " \"resname\": \"HIS\",\n",
172
- " \"style\": \"stick\",\n",
173
- " \"color\": \"red\"\n",
174
- " }\n",
175
- " ]\n",
176
- "\n",
177
- "\n",
178
- "\n",
179
- "def predict(x):\n",
180
- " print(\"predict function\", x)\n",
181
- " print(x.name)\n",
182
- " return x\n",
183
- "\n",
184
- "with gr.Blocks() as demo:\n",
185
- " gr.Markdown(\"# Molecule3D\")\n",
186
- " inp = Molecule3D(label=\"Molecule3D\", reps=reps)\n",
187
- " out = Molecule3D(label=\"Output\", reps=reps)\n",
188
- "\n",
189
- " btn = gr.Button(\"Predict\")\n",
190
- " gr.Markdown(\"\"\" \n",
191
- " You can configure the default rendering of the molecule by adding a list of representations\n",
192
- " <pre>\n",
193
- " reps = [\n",
194
- " {\n",
195
- " \"model\": 0,\n",
196
- " \"style\": \"cartoon\",\n",
197
- " \"color\": \"whiteCarbon\",\n",
198
- " \"residue_range\": \"\",\n",
199
- " \"around\": 0,\n",
200
- " \"byres\": False,\n",
201
- " },\n",
202
- " {\n",
203
- " \"model\": 0,\n",
204
- " \"chain\": \"A\",\n",
205
- " \"resname\": \"HIS\",\n",
206
- " \"style\": \"stick\",\n",
207
- " \"color\": \"red\"\n",
208
- " }\n",
209
- " ]\n",
210
- " </pre>\n",
211
- " \"\"\")\n",
212
- " btn.click(predict, inputs=inp, outputs=out)\n",
213
- "\n",
214
- "\n",
215
- "if __name__ == \"__main__\":\n",
216
- " demo.launch()"
217
- ]
218
- },
219
- {
220
- "cell_type": "code",
221
- "execution_count": null,
222
- "id": "d27cc368-26a0-42c2-a68a-8833de7bb4a0",
223
- "metadata": {},
224
- "outputs": [],
225
- "source": []
226
- },
227
- {
228
- "cell_type": "raw",
229
- "id": "2b970adb-3152-427f-bb58-b92974ff406e",
230
- "metadata": {},
231
- "source": [
232
- "import gradio as gr\n",
233
- "import os\n",
234
- "import requests\n",
235
- "from Bio.PDB import PDBParser, PDBIO\n",
236
- "import biotite.structure.io as bsio\n",
237
- "\n",
238
- "def read_mol(pdb_path):\n",
239
- " \"\"\"Read PDB file and return its content as a string\"\"\"\n",
240
- " with open(pdb_path, 'r') as f:\n",
241
- " return f.read()\n",
242
- "\n",
243
- "# Function to fetch or upload the PDB file\n",
244
- "def get_pdb(pdb_code=\"\", filepath=\"\"):\n",
245
- " if pdb_code and len(pdb_code) == 4:\n",
246
- " pdb_file = f\"{pdb_code}.pdb\"\n",
247
- " if not os.path.exists(pdb_file):\n",
248
- " os.system(f\"wget -qnc https://files.rcsb.org/view/{pdb_code}.pdb\")\n",
249
- " return pdb_file\n",
250
- " elif filepath is not None:\n",
251
- " return filepath\n",
252
- " else:\n",
253
- " return None\n",
254
- "\n",
255
- "def molecule(input_pdb):\n",
256
- " mol = read_mol(input_pdb) # Read PDB file content\n",
257
- " \n",
258
- " html_content = f\"\"\"\n",
259
- " <!DOCTYPE html>\n",
260
- " <html>\n",
261
- " <head> \n",
262
- " <meta http-equiv=\"content-type\" content=\"text/html; charset=UTF-8\" />\n",
263
- " <style>\n",
264
- " .mol-container {{\n",
265
- " width: 100%;\n",
266
- " height: 700px;\n",
267
- " position: relative;\n",
268
- " }}\n",
269
- " </style>\n",
270
- " <script src=\"https://cdnjs.cloudflare.com/ajax/libs/jquery/3.6.3/jquery.min.js\"></script>\n",
271
- " <script src=\"https://3Dmol.csb.pitt.edu/build/3Dmol-min.js\"></script>\n",
272
- " </head>\n",
273
- " <body>\n",
274
- " <div id=\"container\" class=\"mol-container\"></div>\n",
275
- " <script>\n",
276
- " let pdb = `{mol}`; // Use template literal to properly escape PDB content\n",
277
- " $(document).ready(function () {{\n",
278
- " let element = $(\"#container\");\n",
279
- " let config = {{ backgroundColor: \"white\" }};\n",
280
- " let viewer = $3Dmol.createViewer(element, config);\n",
281
- " viewer.addModel(pdb, \"pdb\");\n",
282
- " viewer.getModel(0).setStyle({{}}, {{ cartoon: {{ colorscheme:\"whiteCarbon\" }} }});\n",
283
- " viewer.zoomTo();\n",
284
- " viewer.render();\n",
285
- " viewer.zoom(0.8, 2000);\n",
286
- " }});\n",
287
- " </script>\n",
288
- " </body>\n",
289
- " </html>\n",
290
- " \"\"\"\n",
291
- " \n",
292
- " # Return the HTML content within an iframe safely encoded for special characters\n",
293
- " return f'<iframe width=\"100%\" height=\"700\" srcdoc=\"{html_content.replace(chr(34), \"&quot;\").replace(chr(39), \"&#39;\")}\"></iframe>'\n",
294
- "\n",
295
- "# Gradio function to update the visualization\n",
296
- "def update(inp, file):\n",
297
- " pdb_path = get_pdb(inp, file)\n",
298
- " if pdb_path:\n",
299
- " return molecule(pdb_path)\n",
300
- " else:\n",
301
- " return \"Invalid input. Please provide a valid PDB code or upload a PDB file.\"\n",
302
- "\n",
303
- "# Gradio UI\n",
304
- "demo = gr.Blocks()\n",
305
- "with demo:\n",
306
- " gr.Markdown(\"# PDB Viewer using 3Dmol.js\")\n",
307
- " with gr.Row():\n",
308
- " with gr.Column():\n",
309
- " inp = gr.Textbox(\n",
310
- " placeholder=\"PDB Code or upload file below\", label=\"Input structure\"\n",
311
- " )\n",
312
- " file = gr.File(file_count=\"single\")\n",
313
- " btn = gr.Button(\"View structure\")\n",
314
- " mol = gr.HTML()\n",
315
- " btn.click(fn=update, inputs=[inp, file], outputs=mol)\n",
316
- "\n",
317
- "# Launch the Gradio interface \n",
318
- "demo.launch(debug=True)"
319
- ]
320
- },
321
- {
322
- "cell_type": "code",
323
- "execution_count": null,
324
- "id": "ee215c16-a1fb-450f-bb93-37aaee6fb3f1",
325
- "metadata": {},
326
- "outputs": [],
327
- "source": []
328
- },
329
- {
330
- "cell_type": "raw",
331
- "id": "050aa2e8-2dbe-4a28-8692-58ca7c50fccd",
332
- "metadata": {},
333
- "source": [
334
- "import gradio as gr\n",
335
- "import os\n",
336
- "import requests\n",
337
- "import numpy as np\n",
338
- "from Bio.PDB import PDBParser\n",
339
- "\n",
340
- "def read_mol(pdb_path):\n",
341
- " \"\"\"Read PDB file and return its content as a string\"\"\"\n",
342
- " with open(pdb_path, 'r') as f:\n",
343
- " return f.read()\n",
344
- "\n",
345
- "# Function to fetch a PDB file from RCSB PDB\n",
346
- "def fetch_pdb(pdb_id):\n",
347
- " pdb_url = f'https://files.rcsb.org/download/{pdb_id}.pdb'\n",
348
- " pdb_path = f'{pdb_id}.pdb'\n",
349
- " response = requests.get(pdb_url)\n",
350
- " if response.status_code == 200:\n",
351
- " with open(pdb_path, 'wb') as f:\n",
352
- " f.write(response.content)\n",
353
- " return molecule(pdb_path)\n",
354
- " else:\n",
355
- " return None\n",
356
- "\n",
357
- "# Function to process the PDB file and return random predictions\n",
358
- "def process_pdb(pdb_id, segment):\n",
359
- " pdb_path = fetch_pdb(pdb_id)\n",
360
- " if not pdb_path:\n",
361
- " return \"Failed to fetch PDB file\", None, None\n",
362
- " \n",
363
- " parser = PDBParser(QUIET=True)\n",
364
- " structure = parser.get_structure('protein', pdb_path)\n",
365
- " \n",
366
- " try:\n",
367
- " chain = structure[0][segment]\n",
368
- " except KeyError:\n",
369
- " return \"Invalid Chain ID\", None, None\n",
370
- " \n",
371
- " sequence = [residue.get_resname() for residue in chain if residue.id[0] == ' ']\n",
372
- " random_scores = np.random.rand(len(sequence))\n",
373
- " result_str = \"\\n\".join(\n",
374
- " f\"{seq} {res.id[1]} {score:.2f}\" \n",
375
- " for seq, res, score in zip(sequence, chain, random_scores)\n",
376
- " )\n",
377
- " \n",
378
- " # Save the predictions to a file\n",
379
- " prediction_file = f\"{pdb_id}_predictions.txt\"\n",
380
- " with open(prediction_file, \"w\") as f:\n",
381
- " f.write(result_str)\n",
382
- " \n",
383
- " return result_str, molecule(pdb_path), prediction_file\n",
384
- "\n",
385
- "def molecule(input_pdb):\n",
386
- " mol = read_mol(input_pdb) # Read PDB file content\n",
387
- " \n",
388
- " html_content = f\"\"\"\n",
389
- " <!DOCTYPE html>\n",
390
- " <html>\n",
391
- " <head> \n",
392
- " <meta http-equiv=\"content-type\" content=\"text/html; charset=UTF-8\" />\n",
393
- " <style>\n",
394
- " .mol-container {{\n",
395
- " width: 100%;\n",
396
- " height: 700px;\n",
397
- " position: relative;\n",
398
- " }}\n",
399
- " </style>\n",
400
- " <script src=\"https://cdnjs.cloudflare.com/ajax/libs/jquery/3.6.3/jquery.min.js\"></script>\n",
401
- " <script src=\"https://3Dmol.csb.pitt.edu/build/3Dmol-min.js\"></script>\n",
402
- " </head>\n",
403
- " <body>\n",
404
- " <div id=\"container\" class=\"mol-container\"></div>\n",
405
- " <script>\n",
406
- " let pdb = `{mol}`; // Use template literal to properly escape PDB content\n",
407
- " $(document).ready(function () {{\n",
408
- " let element = $(\"#container\");\n",
409
- " let config = {{ backgroundColor: \"white\" }};\n",
410
- " let viewer = $3Dmol.createViewer(element, config);\n",
411
- " viewer.addModel(pdb, \"pdb\");\n",
412
- " \n",
413
- " // Set cartoon representation with white carbon color scheme\n",
414
- " viewer.getModel(0).setStyle({{}}, {{ cartoon: {{ colorscheme:\"whiteCarbon\" }} }});\n",
415
- " \n",
416
- " // Highlight specific histidine residues in red stick representation\n",
417
- " viewer.getModel(0).setStyle(\n",
418
- " {{\"resn\": \"HIS\"}}, \n",
419
- " {{\"stick\": {{\"color\": \"red\"}}}}\n",
420
- " );\n",
421
- " \n",
422
- " viewer.zoomTo();\n",
423
- " viewer.render();\n",
424
- " viewer.zoom(0.8, 2000);\n",
425
- " }});\n",
426
- " </script>\n",
427
- " </body>\n",
428
- " </html>\n",
429
- " \"\"\"\n",
430
- " \n",
431
- " # Return the HTML content within an iframe safely encoded for special characters\n",
432
- " return f'<iframe width=\"100%\" height=\"700\" srcdoc=\"{html_content.replace(chr(34), \"&quot;\").replace(chr(39), \"&#39;\")}\"></iframe>'\n",
433
- "\n",
434
- "# Gradio UI\n",
435
- "with gr.Blocks() as demo:\n",
436
- " gr.Markdown(\"# Protein Binding Site Prediction (Random Scores)\")\n",
437
- " with gr.Row():\n",
438
- " pdb_input = gr.Textbox(value=\"2IWI\", label=\"PDB ID\", placeholder=\"Enter PDB ID here...\")\n",
439
- " segment_input = gr.Textbox(value=\"A\", label=\"Chain ID\", placeholder=\"Enter Chain ID here...\")\n",
440
- " visualize_btn = gr.Button(\"Visualize Structure\")\n",
441
- " prediction_btn = gr.Button(\"Predict Random Binding Site Scores\")\n",
442
- " \n",
443
- " # Use HTML output instead of Molecule3D\n",
444
- " molecule_output = gr.HTML(label=\"Protein Structure\")\n",
445
- " predictions_output = gr.Textbox(label=\"Binding Site Predictions\")\n",
446
- " download_output = gr.File(label=\"Download Predictions\")\n",
447
- " \n",
448
- " visualize_btn.click(fetch_pdb, inputs=[pdb_input], outputs=molecule_output)\n",
449
- " prediction_btn.click(process_pdb, inputs=[pdb_input, segment_input], outputs=[predictions_output, molecule_output, download_output])\n",
450
- " \n",
451
- " gr.Markdown(\"## Examples\")\n",
452
- " gr.Examples(\n",
453
- " examples=[\n",
454
- " [\"2IWI\", \"A\"],\n",
455
- " [\"7RPZ\", \"B\"],\n",
456
- " [\"3TJN\", \"C\"]\n",
457
- " ],\n",
458
- " inputs=[pdb_input, segment_input],\n",
459
- " outputs=[predictions_output, molecule_output, download_output]\n",
460
- " )\n",
461
- "\n",
462
- "demo.launch(debug=True)"
463
- ]
464
- },
465
- {
466
- "cell_type": "code",
467
- "execution_count": null,
468
- "id": "9a5facd9-855c-4b35-8dd3-2c0c8c7dd356",
469
- "metadata": {},
470
- "outputs": [],
471
- "source": []
472
- },
473
- {
474
- "cell_type": "raw",
475
- "id": "a762170f-92a9-473d-b18d-53607a780e3b",
476
- "metadata": {},
477
- "source": [
478
- "import gradio as gr\n",
479
- "import requests\n",
480
- "from Bio.PDB import PDBParser\n",
481
- "import numpy as np\n",
482
- "import os\n",
483
- "\n",
484
- "def read_mol(pdb_path):\n",
485
- " \"\"\"Read PDB file and return its content as a string\"\"\"\n",
486
- " with open(pdb_path, 'r') as f:\n",
487
- " return f.read()\n",
488
- "\n",
489
- "# Function to fetch a PDB file from RCSB PDB\n",
490
- "def fetch_pdb(pdb_id):\n",
491
- " pdb_url = f'https://files.rcsb.org/download/{pdb_id}.pdb'\n",
492
- " pdb_path = f'{pdb_id}.pdb'\n",
493
- " response = requests.get(pdb_url)\n",
494
- " if response.status_code == 200:\n",
495
- " with open(pdb_path, 'wb') as f:\n",
496
- " f.write(response.content)\n",
497
- " return pdb_path\n",
498
- " else:\n",
499
- " return None\n",
500
- "\n",
501
- "# Function to process the PDB file and return random predictions\n",
502
- "def process_pdb(pdb_id, segment):\n",
503
- " pdb_path = fetch_pdb(pdb_id)\n",
504
- " if not pdb_path:\n",
505
- " return \"Failed to fetch PDB file\", None, None\n",
506
- " parser = PDBParser(QUIET=True)\n",
507
- " structure = parser.get_structure('protein', pdb_path)\n",
508
- " \n",
509
- " try:\n",
510
- " chain = structure[0][segment]\n",
511
- " except KeyError:\n",
512
- " return \"Invalid Chain ID\", None, None\n",
513
- " sequence = [residue.get_resname() for residue in chain if residue.id[0] == ' ']\n",
514
- " random_scores = np.random.rand(len(sequence))\n",
515
- " result_str = \"\\n\".join(\n",
516
- " f\"{seq} {res.id[1]} {score:.2f}\" \n",
517
- " for seq, res, score in zip(sequence, chain, random_scores)\n",
518
- " )\n",
519
- " # Save the predictions to a file\n",
520
- " prediction_file = f\"{pdb_id}_predictions.txt\"\n",
521
- " with open(prediction_file, \"w\") as f:\n",
522
- " f.write(result_str)\n",
523
- " \n",
524
- " return result_str, molecule(pdb_path), prediction_file\n",
525
- "\n",
526
- "def molecule(input_pdb):\n",
527
- " mol = read_mol(input_pdb) # Read PDB file content\n",
528
- " \n",
529
- " html_content = f\"\"\"\n",
530
- " <!DOCTYPE html>\n",
531
- " <html>\n",
532
- " <head> \n",
533
- " <meta http-equiv=\"content-type\" content=\"text/html; charset=UTF-8\" />\n",
534
- " <style>\n",
535
- " .mol-container {{\n",
536
- " width: 100%;\n",
537
- " height: 700px;\n",
538
- " position: relative;\n",
539
- " }}\n",
540
- " </style>\n",
541
- " <script src=\"https://cdnjs.cloudflare.com/ajax/libs/jquery/3.6.3/jquery.min.js\"></script>\n",
542
- " <script src=\"https://3Dmol.csb.pitt.edu/build/3Dmol-min.js\"></script>\n",
543
- " </head>\n",
544
- " <body>\n",
545
- " <div id=\"container\" class=\"mol-container\"></div>\n",
546
- " <script>\n",
547
- " let pdb = `{mol}`; // Use template literal to properly escape PDB content\n",
548
- " $(document).ready(function () {{\n",
549
- " let element = $(\"#container\");\n",
550
- " let config = {{ backgroundColor: \"white\" }};\n",
551
- " let viewer = $3Dmol.createViewer(element, config);\n",
552
- " viewer.addModel(pdb, \"pdb\");\n",
553
- " \n",
554
- " // Set cartoon representation with white carbon color scheme\n",
555
- " viewer.getModel(0).setStyle({{}}, {{ cartoon: {{ colorscheme:\"whiteCarbon\" }} }});\n",
556
- " \n",
557
- " // Highlight specific histidine residues in red stick representation\n",
558
- " viewer.getModel(0).setStyle(\n",
559
- " {{\"resn\": \"HIS\"}}, \n",
560
- " {{\"stick\": {{\"color\": \"red\"}}}}\n",
561
- " );\n",
562
- " \n",
563
- " viewer.zoomTo();\n",
564
- " viewer.render();\n",
565
- " viewer.zoom(0.8, 2000);\n",
566
- " }});\n",
567
- " </script>\n",
568
- " </body>\n",
569
- " </html>\n",
570
- " \"\"\"\n",
571
- " \n",
572
- " # Return the HTML content within an iframe safely encoded for special characters\n",
573
- " return f'<iframe width=\"100%\" height=\"700\" srcdoc=\"{html_content.replace(chr(34), \"&quot;\").replace(chr(39), \"&#39;\")}\"></iframe>'\n",
574
- "\n",
575
- "# Gradio UI\n",
576
- "with gr.Blocks() as demo:\n",
577
- " gr.Markdown(\"# Protein Binding Site Prediction (Random Scores)\")\n",
578
- " with gr.Row():\n",
579
- " pdb_input = gr.Textbox(value=\"2IWI\", label=\"PDB ID\", placeholder=\"Enter PDB ID here...\")\n",
580
- " segment_input = gr.Textbox(value=\"A\", label=\"Chain ID\", placeholder=\"Enter Chain ID here...\")\n",
581
- " visualize_btn = gr.Button(\"Visualize Structure\")\n",
582
- " prediction_btn = gr.Button(\"Predict Random Binding Site Scores\")\n",
583
- " \n",
584
- " molecule_output = gr.HTML(label=\"Protein Structure\")\n",
585
- " predictions_output = gr.Textbox(label=\"Binding Site Predictions\")\n",
586
- " download_output = gr.File(label=\"Download Predictions\")\n",
587
- " \n",
588
- " # Update to explicitly use molecule() function for visualization\n",
589
- " visualize_btn.click(\n",
590
- " fn=lambda pdb_id: molecule(fetch_pdb(pdb_id)), \n",
591
- " inputs=[pdb_input], \n",
592
- " outputs=molecule_output\n",
593
- " )\n",
594
- " \n",
595
- " prediction_btn.click(process_pdb, inputs=[pdb_input, segment_input], outputs=[predictions_output, molecule_output, download_output])\n",
596
- " \n",
597
- " gr.Markdown(\"## Examples\")\n",
598
- " gr.Examples(\n",
599
- " examples=[\n",
600
- " [\"2IWI\", \"A\"],\n",
601
- " [\"7RPZ\", \"B\"],\n",
602
- " [\"3TJN\", \"C\"]\n",
603
- " ],\n",
604
- " inputs=[pdb_input, segment_input],\n",
605
- " outputs=[predictions_output, molecule_output, download_output]\n",
606
- " )\n",
607
- "\n",
608
- "demo.launch()"
609
- ]
610
- },
611
- {
612
- "cell_type": "code",
613
- "execution_count": null,
614
- "id": "15527a58-c449-4da0-8fab-3baaede15e41",
615
- "metadata": {},
616
- "outputs": [],
617
- "source": []
618
- },
619
- {
620
- "cell_type": "code",
621
- "execution_count": 2,
622
- "id": "9ef3e330-cb88-4c29-b84a-2f8652883cfc",
623
- "metadata": {},
624
- "outputs": [
625
- {
626
- "name": "stdout",
627
- "output_type": "stream",
628
- "text": [
629
- "* Running on local URL: http://127.0.0.1:7860\n",
630
- "\n",
631
- "To create a public link, set `share=True` in `launch()`.\n"
632
- ]
633
- },
634
- {
635
- "data": {
636
- "text/html": [
637
- "<div><iframe src=\"http://127.0.0.1:7860/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
638
- ],
639
- "text/plain": [
640
- "<IPython.core.display.HTML object>"
641
- ]
642
- },
643
- "metadata": {},
644
- "output_type": "display_data"
645
- },
646
- {
647
- "data": {
648
- "text/plain": []
649
- },
650
- "execution_count": 2,
651
- "metadata": {},
652
- "output_type": "execute_result"
653
- }
654
- ],
655
- "source": [
656
- "import gradio as gr\n",
657
- "import requests\n",
658
- "from Bio.PDB import PDBParser\n",
659
- "import numpy as np\n",
660
- "import os\n",
661
- "from gradio_molecule3d import Molecule3D\n",
662
- "\n",
663
- "def read_mol(pdb_path):\n",
664
- " \"\"\"Read PDB file and return its content as a string\"\"\"\n",
665
- " with open(pdb_path, 'r') as f:\n",
666
- " return f.read()\n",
667
- "\n",
668
- "def fetch_pdb(pdb_id):\n",
669
- " pdb_url = f'https://files.rcsb.org/download/{pdb_id}.pdb'\n",
670
- " pdb_path = f'{pdb_id}.pdb'\n",
671
- " response = requests.get(pdb_url)\n",
672
- " if response.status_code == 200:\n",
673
- " with open(pdb_path, 'wb') as f:\n",
674
- " f.write(response.content)\n",
675
- " return pdb_path\n",
676
- " else:\n",
677
- " return None\n",
678
- "\n",
679
- "def process_pdb(pdb_id, segment):\n",
680
- " pdb_path = fetch_pdb(pdb_id)\n",
681
- " if not pdb_path:\n",
682
- " return \"Failed to fetch PDB file\", None, None\n",
683
- " parser = PDBParser(QUIET=True)\n",
684
- " structure = parser.get_structure('protein', pdb_path)\n",
685
- " \n",
686
- " try:\n",
687
- " chain = structure[0][segment]\n",
688
- " except KeyError:\n",
689
- " return \"Invalid Chain ID\", None, None\n",
690
- " sequence = [residue.get_resname() for residue in chain if residue.id[0] == ' ']\n",
691
- " random_scores = np.random.rand(len(sequence))\n",
692
- " result_str = \"\\n\".join(\n",
693
- " f\"{seq} {res.id[1]} {score:.2f}\" \n",
694
- " for seq, res, score in zip(sequence, chain, random_scores)\n",
695
- " )\n",
696
- " # Save the predictions to a file\n",
697
- " prediction_file = f\"{pdb_id}_predictions.txt\"\n",
698
- " with open(prediction_file, \"w\") as f:\n",
699
- " f.write(result_str)\n",
700
- " \n",
701
- " return result_str, molecule(pdb_path, random_scores), prediction_file\n",
702
- "\n",
703
- "def molecule(input_pdb, scores=None):\n",
704
- " mol = read_mol(input_pdb) # Read PDB file content\n",
705
- " \n",
706
- " # Prepare high-scoring residues script if scores are provided\n",
707
- " high_score_script = \"\"\n",
708
- " if scores is not None:\n",
709
- " high_score_script = \"\"\"\n",
710
- " // Highlight residues with high scores\n",
711
- " let highScoreResidues = [{}];\n",
712
- " viewer.getModel(0).setStyle(\n",
713
- " {{\"resi\": highScoreResidues}}, \n",
714
- " {{\"stick\": {{\"color\": \"red\"}}}}\n",
715
- " );\n",
716
- " \"\"\".format(\n",
717
- " \", \".join(str(i+1) for i, score in enumerate(scores) if score > 0.8)\n",
718
- " )\n",
719
- " \n",
720
- " html_content = f\"\"\"\n",
721
- " <!DOCTYPE html>\n",
722
- " <html>\n",
723
- " <head> \n",
724
- " <meta http-equiv=\"content-type\" content=\"text/html; charset=UTF-8\" />\n",
725
- " <style>\n",
726
- " .mol-container {{\n",
727
- " width: 100%;\n",
728
- " height: 700px;\n",
729
- " position: relative;\n",
730
- " }}\n",
731
- " </style>\n",
732
- " <script src=\"https://cdnjs.cloudflare.com/ajax/libs/jquery/3.6.3/jquery.min.js\"></script>\n",
733
- " <script src=\"https://3Dmol.csb.pitt.edu/build/3Dmol-min.js\"></script>\n",
734
- " </head>\n",
735
- " <body>\n",
736
- " <div id=\"container\" class=\"mol-container\"></div>\n",
737
- " <script>\n",
738
- " let pdb = `{mol}`; // Use template literal to properly escape PDB content\n",
739
- " $(document).ready(function () {{\n",
740
- " let element = $(\"#container\");\n",
741
- " let config = {{ backgroundColor: \"white\" }};\n",
742
- " let viewer = $3Dmol.createViewer(element, config);\n",
743
- " viewer.addModel(pdb, \"pdb\");\n",
744
- " \n",
745
- " // Set cartoon representation with white carbon color scheme\n",
746
- " viewer.getModel(0).setStyle({{}}, {{ cartoon: {{ colorscheme:\"whiteCarbon\" }} }});\n",
747
- " \n",
748
- " {high_score_script}\n",
749
- " \n",
750
- " viewer.zoomTo();\n",
751
- " viewer.render();\n",
752
- " viewer.zoom(0.8, 2000);\n",
753
- " }});\n",
754
- " </script>\n",
755
- " </body>\n",
756
- " </html>\n",
757
- " \"\"\"\n",
758
- " \n",
759
- " # Return the HTML content within an iframe safely encoded for special characters\n",
760
- " return f'<iframe width=\"100%\" height=\"700\" srcdoc=\"{html_content.replace(chr(34), \"&quot;\").replace(chr(39), \"&#39;\")}\"></iframe>'\n",
761
- "\n",
762
- "reps = [\n",
763
- " {\n",
764
- " \"model\": 0,\n",
765
- " \"style\": \"cartoon\",\n",
766
- " \"color\": \"whiteCarbon\",\n",
767
- " \"residue_range\": \"\",\n",
768
- " \"around\": 0,\n",
769
- " \"byres\": False,\n",
770
- " }\n",
771
- " ]\n",
772
- "# Gradio UI\n",
773
- "with gr.Blocks() as demo:\n",
774
- " gr.Markdown(\"# Protein Binding Site Prediction (Random Scores)\")\n",
775
- " with gr.Row():\n",
776
- " pdb_input = gr.Textbox(value=\"2IWI\", label=\"PDB ID\", placeholder=\"Enter PDB ID here...\")\n",
777
- " segment_input = gr.Textbox(value=\"A\", label=\"Chain ID\", placeholder=\"Enter Chain ID here...\")\n",
778
- " visualize_btn = gr.Button(\"Visualize Structure\")\n",
779
- " #prediction_btn = gr.Button(\"Predict Random Binding Site Scores\")\n",
780
- "\n",
781
- " molecule_output2 = Molecule3D(label=\"Protein Structure\", reps=reps)\n",
782
- "\n",
783
- " with gr.Row():\n",
784
- " pdb_input = gr.Textbox(value=\"2IWI\", label=\"PDB ID\", placeholder=\"Enter PDB ID here...\")\n",
785
- " segment_input = gr.Textbox(value=\"A\", label=\"Chain ID\", placeholder=\"Enter Chain ID here...\")\n",
786
- " prediction_btn = gr.Button(\"Predict Random Binding Site Scores\")\n",
787
- "\n",
788
- " molecule_output = gr.HTML(label=\"Protein Structure\")\n",
789
- " predictions_output = gr.Textbox(label=\"Binding Site Predictions\")\n",
790
- " download_output = gr.File(label=\"Download Predictions\")\n",
791
- " \n",
792
- " #visualize_btn.click(\n",
793
- " # fn=lambda pdb_id: molecule(fetch_pdb(pdb_id)), \n",
794
- " # inputs=[pdb_input], \n",
795
- " # outputs=molecule_output\n",
796
- " #)\n",
797
- " visualize_btn.click(fetch_pdb, inputs=[pdb_input], outputs=molecule_output2)\n",
798
- " \n",
799
- " \n",
800
- " prediction_btn.click(process_pdb, inputs=[pdb_input, segment_input], outputs=[predictions_output, molecule_output, download_output])\n",
801
- " \n",
802
- " gr.Markdown(\"## Examples\")\n",
803
- " gr.Examples(\n",
804
- " examples=[\n",
805
- " [\"2IWI\", \"A\"],\n",
806
- " [\"7RPZ\", \"B\"],\n",
807
- " [\"3TJN\", \"C\"]\n",
808
- " ],\n",
809
- " inputs=[pdb_input, segment_input],\n",
810
- " outputs=[predictions_output, molecule_output, download_output]\n",
811
- " )\n",
812
- "\n",
813
- "demo.launch()"
814
- ]
815
- },
816
- {
817
- "cell_type": "code",
818
- "execution_count": null,
819
- "id": "14605615-8610-4d9e-841b-db7618cde844",
820
- "metadata": {},
821
- "outputs": [],
822
- "source": []
823
- }
824
- ],
825
- "metadata": {
826
- "kernelspec": {
827
- "display_name": "Python (LLM)",
828
- "language": "python",
829
- "name": "llm"
830
- },
831
- "language_info": {
832
- "codemirror_mode": {
833
- "name": "ipython",
834
- "version": 3
835
- },
836
- "file_extension": ".py",
837
- "mimetype": "text/x-python",
838
- "name": "python",
839
- "nbconvert_exporter": "python",
840
- "pygments_lexer": "ipython3",
841
- "version": "3.12.7"
842
- }
843
- },
844
- "nbformat": 4,
845
- "nbformat_minor": 5
846
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
test2.ipynb DELETED
@@ -1,1598 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "code",
5
- "execution_count": 2,
6
- "id": "f3b7f6b0-6685-4a5c-9529-45e0ca905a3b",
7
- "metadata": {},
8
- "outputs": [
9
- {
10
- "name": "stdout",
11
- "output_type": "stream",
12
- "text": [
13
- "* Running on local URL: http://127.0.0.1:7860\n",
14
- "\n",
15
- "To create a public link, set `share=True` in `launch()`.\n"
16
- ]
17
- },
18
- {
19
- "data": {
20
- "text/html": [
21
- "<div><iframe src=\"http://127.0.0.1:7860/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
22
- ],
23
- "text/plain": [
24
- "<IPython.core.display.HTML object>"
25
- ]
26
- },
27
- "metadata": {},
28
- "output_type": "display_data"
29
- },
30
- {
31
- "data": {
32
- "text/plain": []
33
- },
34
- "execution_count": 2,
35
- "metadata": {},
36
- "output_type": "execute_result"
37
- }
38
- ],
39
- "source": [
40
- "import gradio as gr\n",
41
- "import requests\n",
42
- "from Bio.PDB import PDBParser\n",
43
- "import numpy as np\n",
44
- "import os\n",
45
- "from gradio_molecule3d import Molecule3D\n",
46
- "\n",
47
- "def read_mol(pdb_path):\n",
48
- " \"\"\"Read PDB file and return its content as a string\"\"\"\n",
49
- " with open(pdb_path, 'r') as f:\n",
50
- " return f.read()\n",
51
- "\n",
52
- "def fetch_pdb(pdb_id):\n",
53
- " pdb_url = f'https://files.rcsb.org/download/{pdb_id}.pdb'\n",
54
- " pdb_path = f'{pdb_id}.pdb'\n",
55
- " response = requests.get(pdb_url)\n",
56
- " if response.status_code == 200:\n",
57
- " with open(pdb_path, 'wb') as f:\n",
58
- " f.write(response.content)\n",
59
- " return pdb_path\n",
60
- " else:\n",
61
- " return None\n",
62
- "\n",
63
- "def process_pdb(pdb_id, segment):\n",
64
- " pdb_path = fetch_pdb(pdb_id)\n",
65
- " if not pdb_path:\n",
66
- " return \"Failed to fetch PDB file\", None, None\n",
67
- " \n",
68
- " parser = PDBParser(QUIET=1)\n",
69
- " structure = parser.get_structure('protein', pdb_path)\n",
70
- " \n",
71
- " try:\n",
72
- " chain = structure[0][segment]\n",
73
- " except KeyError:\n",
74
- " return \"Invalid Chain ID\", None, None\n",
75
- " \n",
76
- " # Comprehensive amino acid mapping\n",
77
- " aa_dict = {\n",
78
- " 'ALA': 'A', 'CYS': 'C', 'ASP': 'D', 'GLU': 'E', 'PHE': 'F',\n",
79
- " 'GLY': 'G', 'HIS': 'H', 'ILE': 'I', 'LYS': 'K', 'LEU': 'L',\n",
80
- " 'MET': 'M', 'ASN': 'N', 'PRO': 'P', 'GLN': 'Q', 'ARG': 'R',\n",
81
- " 'SER': 'S', 'THR': 'T', 'VAL': 'V', 'TRP': 'W', 'TYR': 'Y',\n",
82
- " 'MSE': 'M', 'SEP': 'S', 'TPO': 'T', 'CSO': 'C', 'PTR': 'Y', 'HYP': 'P'\n",
83
- " }\n",
84
- " \n",
85
- " # Exclude non-amino acid residues\n",
86
- " sequence = [\n",
87
- " residue for residue in chain \n",
88
- " if residue.get_resname().strip() in aa_dict\n",
89
- " ]\n",
90
- " \n",
91
- " random_scores = np.random.rand(len(sequence))\n",
92
- " result_str = \"\\n\".join(\n",
93
- " f\"{aa_dict[res.get_resname()]} {res.id[1]} {score:.2f}\" \n",
94
- " for res, score in zip(sequence, random_scores)\n",
95
- " )\n",
96
- " \n",
97
- " # Save the predictions to a file\n",
98
- " prediction_file = f\"{pdb_id}_predictions.txt\"\n",
99
- " with open(prediction_file, \"w\") as f:\n",
100
- " f.write(result_str)\n",
101
- " \n",
102
- " return result_str, molecule(pdb_path, random_scores, segment), prediction_file\n",
103
- "\n",
104
- "def molecule(input_pdb, scores=None, segment='A'):\n",
105
- " mol = read_mol(input_pdb) # Read PDB file content\n",
106
- " \n",
107
- " # Prepare high-scoring residues script if scores are provided\n",
108
- " high_score_script = \"\"\n",
109
- " if scores is not None:\n",
110
- " high_score_script = \"\"\"\n",
111
- " // Reset all styles first\n",
112
- " viewer.getModel(0).setStyle({}, {});\n",
113
- " \n",
114
- " // Show only the selected chain\n",
115
- " viewer.getModel(0).setStyle(\n",
116
- " {\"chain\": \"%s\"}, \n",
117
- " { cartoon: {colorscheme:\"whiteCarbon\"} }\n",
118
- " );\n",
119
- " \n",
120
- " // Highlight high-scoring residues only for the selected chain\n",
121
- " let highScoreResidues = [%s];\n",
122
- " viewer.getModel(0).setStyle(\n",
123
- " {\"chain\": \"%s\", \"resi\": highScoreResidues}, \n",
124
- " {\"stick\": {\"color\": \"red\"}}\n",
125
- " );\n",
126
- " \"\"\" % (segment, \n",
127
- " \", \".join(str(i+1) for i, score in enumerate(scores) if score > 0.8),\n",
128
- " segment)\n",
129
- " \n",
130
- " html_content = f\"\"\"\n",
131
- " <!DOCTYPE html>\n",
132
- " <html>\n",
133
- " <head> \n",
134
- " <meta http-equiv=\"content-type\" content=\"text/html; charset=UTF-8\" />\n",
135
- " <style>\n",
136
- " .mol-container {{\n",
137
- " width: 100%;\n",
138
- " height: 700px;\n",
139
- " position: relative;\n",
140
- " }}\n",
141
- " </style>\n",
142
- " <script src=\"https://cdnjs.cloudflare.com/ajax/libs/jquery/3.6.3/jquery.min.js\"></script>\n",
143
- " <script src=\"https://3Dmol.csb.pitt.edu/build/3Dmol-min.js\"></script>\n",
144
- " </head>\n",
145
- " <body>\n",
146
- " <div id=\"container\" class=\"mol-container\"></div>\n",
147
- " <script>\n",
148
- " let pdb = `{mol}`; // Use template literal to properly escape PDB content\n",
149
- " $(document).ready(function () {{\n",
150
- " let element = $(\"#container\");\n",
151
- " let config = {{ backgroundColor: \"white\" }};\n",
152
- " let viewer = $3Dmol.createViewer(element, config);\n",
153
- " viewer.addModel(pdb, \"pdb\");\n",
154
- " \n",
155
- " // Reset all styles and show only selected chain\n",
156
- " viewer.getModel(0).setStyle(\n",
157
- " {{\"chain\": \"{segment}\"}}, \n",
158
- " {{ cartoon: {{ colorscheme:\"whiteCarbon\" }} }}\n",
159
- " );\n",
160
- " \n",
161
- " {high_score_script}\n",
162
- " \n",
163
- " viewer.zoomTo();\n",
164
- " viewer.render();\n",
165
- " viewer.zoom(0.8, 2000);\n",
166
- " }});\n",
167
- " </script>\n",
168
- " </body>\n",
169
- " </html>\n",
170
- " \"\"\"\n",
171
- " \n",
172
- " # Return the HTML content within an iframe safely encoded for special characters\n",
173
- " return f'<iframe width=\"100%\" height=\"700\" srcdoc=\"{html_content.replace(chr(34), \"&quot;\").replace(chr(39), \"&#39;\")}\"></iframe>'\n",
174
- "\n",
175
- "reps = [\n",
176
- " {\n",
177
- " \"model\": 0,\n",
178
- " \"style\": \"cartoon\",\n",
179
- " \"color\": \"whiteCarbon\",\n",
180
- " \"residue_range\": \"\",\n",
181
- " \"around\": 0,\n",
182
- " \"byres\": False,\n",
183
- " }\n",
184
- " ]\n",
185
- "# Gradio UI\n",
186
- "with gr.Blocks() as demo:\n",
187
- " gr.Markdown(\"# Protein Binding Site Prediction (Random Scores)\")\n",
188
- " with gr.Row():\n",
189
- " pdb_input = gr.Textbox(value=\"2IWI\", label=\"PDB ID\", placeholder=\"Enter PDB ID here...\")\n",
190
- " visualize_btn = gr.Button(\"Visualize Structure\")\n",
191
- "\n",
192
- " molecule_output2 = Molecule3D(label=\"Protein Structure\", reps=reps)\n",
193
- "\n",
194
- " with gr.Row():\n",
195
- " pdb_input = gr.Textbox(value=\"2IWI\", label=\"PDB ID\", placeholder=\"Enter PDB ID here...\")\n",
196
- " segment_input = gr.Textbox(value=\"A\", label=\"Chain ID\", placeholder=\"Enter Chain ID here...\")\n",
197
- " prediction_btn = gr.Button(\"Predict Random Binding Site Scores\")\n",
198
- "\n",
199
- " molecule_output = gr.HTML(label=\"Protein Structure\")\n",
200
- " predictions_output = gr.Textbox(label=\"Binding Site Predictions\")\n",
201
- " download_output = gr.File(label=\"Download Predictions\")\n",
202
- " \n",
203
- " visualize_btn.click(fetch_pdb, inputs=[pdb_input], outputs=molecule_output2)\n",
204
- " \n",
205
- " prediction_btn.click(process_pdb, inputs=[pdb_input, segment_input], outputs=[predictions_output, molecule_output, download_output])\n",
206
- " \n",
207
- " gr.Markdown(\"## Examples\")\n",
208
- " gr.Examples(\n",
209
- " examples=[\n",
210
- " [\"2IWI\", \"A\"],\n",
211
- " [\"7RPZ\", \"B\"],\n",
212
- " [\"3TJN\", \"C\"]\n",
213
- " ],\n",
214
- " inputs=[pdb_input, segment_input],\n",
215
- " outputs=[predictions_output, molecule_output, download_output]\n",
216
- " )\n",
217
- "\n",
218
- "demo.launch()"
219
- ]
220
- },
221
- {
222
- "cell_type": "code",
223
- "execution_count": 6,
224
- "id": "28f8f28c-48d3-4e35-9766-3de9882179b5",
225
- "metadata": {},
226
- "outputs": [
227
- {
228
- "name": "stdout",
229
- "output_type": "stream",
230
- "text": [
231
- "* Running on local URL: http://127.0.0.1:7864\n",
232
- "\n",
233
- "To create a public link, set `share=True` in `launch()`.\n"
234
- ]
235
- },
236
- {
237
- "data": {
238
- "text/html": [
239
- "<div><iframe src=\"http://127.0.0.1:7864/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
240
- ],
241
- "text/plain": [
242
- "<IPython.core.display.HTML object>"
243
- ]
244
- },
245
- "metadata": {},
246
- "output_type": "display_data"
247
- },
248
- {
249
- "data": {
250
- "text/plain": []
251
- },
252
- "execution_count": 6,
253
- "metadata": {},
254
- "output_type": "execute_result"
255
- }
256
- ],
257
- "source": [
258
- "import gradio as gr\n",
259
- "import requests\n",
260
- "from Bio.PDB import PDBParser\n",
261
- "import numpy as np\n",
262
- "import os\n",
263
- "from gradio_molecule3d import Molecule3D\n",
264
- "\n",
265
- "def read_mol(pdb_path):\n",
266
- " \"\"\"Read PDB file and return its content as a string\"\"\"\n",
267
- " with open(pdb_path, 'r') as f:\n",
268
- " return f.read()\n",
269
- "\n",
270
- "def fetch_pdb(pdb_id):\n",
271
- " pdb_url = f'https://files.rcsb.org/download/{pdb_id}.pdb'\n",
272
- " pdb_path = f'{pdb_id}.pdb'\n",
273
- " response = requests.get(pdb_url)\n",
274
- " if response.status_code == 200:\n",
275
- " with open(pdb_path, 'wb') as f:\n",
276
- " f.write(response.content)\n",
277
- " return pdb_path\n",
278
- " else:\n",
279
- " return None\n",
280
- "\n",
281
- "def process_pdb(pdb_id, segment):\n",
282
- " pdb_path = fetch_pdb(pdb_id)\n",
283
- " if not pdb_path:\n",
284
- " return \"Failed to fetch PDB file\", None, None\n",
285
- " \n",
286
- " parser = PDBParser(QUIET=1)\n",
287
- " structure = parser.get_structure('protein', pdb_path)\n",
288
- " \n",
289
- " try:\n",
290
- " chain = structure[0][segment]\n",
291
- " except KeyError:\n",
292
- " return \"Invalid Chain ID\", None, None\n",
293
- " \n",
294
- " # Comprehensive amino acid mapping\n",
295
- " aa_dict = {\n",
296
- " 'ALA': 'A', 'CYS': 'C', 'ASP': 'D', 'GLU': 'E', 'PHE': 'F',\n",
297
- " 'GLY': 'G', 'HIS': 'H', 'ILE': 'I', 'LYS': 'K', 'LEU': 'L',\n",
298
- " 'MET': 'M', 'ASN': 'N', 'PRO': 'P', 'GLN': 'Q', 'ARG': 'R',\n",
299
- " 'SER': 'S', 'THR': 'T', 'VAL': 'V', 'TRP': 'W', 'TYR': 'Y',\n",
300
- " 'MSE': 'M', 'SEP': 'S', 'TPO': 'T', 'CSO': 'C', 'PTR': 'Y', 'HYP': 'P'\n",
301
- " }\n",
302
- " \n",
303
- " # Exclude non-amino acid residues\n",
304
- " sequence = [\n",
305
- " residue for residue in chain \n",
306
- " if residue.get_resname().strip() in aa_dict\n",
307
- " ]\n",
308
- " \n",
309
- " random_scores = np.random.rand(len(sequence))\n",
310
- " result_str = \"\\n\".join(\n",
311
- " f\"{aa_dict[res.get_resname()]} {res.id[1]} {score:.2f}\" \n",
312
- " for res, score in zip(sequence, random_scores)\n",
313
- " )\n",
314
- " \n",
315
- " # Save the predictions to a file\n",
316
- " prediction_file = f\"{pdb_id}_predictions.txt\"\n",
317
- " with open(prediction_file, \"w\") as f:\n",
318
- " f.write(result_str)\n",
319
- " \n",
320
- " return result_str, molecule(pdb_path, random_scores, segment), prediction_file\n",
321
- "\n",
322
- "def molecule(input_pdb, scores=None, segment='A'):\n",
323
- " mol = read_mol(input_pdb) # Read PDB file content\n",
324
- " \n",
325
- " # Prepare high-scoring residues script if scores are provided\n",
326
- " high_score_script = \"\"\n",
327
- " if scores is not None:\n",
328
- " high_score_script = \"\"\"\n",
329
- " // Reset all styles first\n",
330
- " viewer.getModel(0).setStyle({}, {});\n",
331
- " \n",
332
- " // Show only the selected chain\n",
333
- " viewer.getModel(0).setStyle(\n",
334
- " {\"chain\": \"%s\"}, \n",
335
- " { cartoon: {colorscheme:\"whiteCarbon\"} }\n",
336
- " );\n",
337
- " \n",
338
- " // Highlight high-scoring residues only for the selected chain\n",
339
- " let highScoreResidues = [%s];\n",
340
- " viewer.getModel(0).setStyle(\n",
341
- " {\"chain\": \"%s\", \"resi\": highScoreResidues}, \n",
342
- " {\"stick\": {\"color\": \"red\"}}\n",
343
- " );\n",
344
- " \"\"\" % (segment, \n",
345
- " \", \".join(str(i+1) for i, score in enumerate(scores) if score > 0.8),\n",
346
- " segment)\n",
347
- " \n",
348
- " html_content = f\"\"\"\n",
349
- " <!DOCTYPE html>\n",
350
- " <html>\n",
351
- " <head> \n",
352
- " <meta http-equiv=\"content-type\" content=\"text/html; charset=UTF-8\" />\n",
353
- " <style>\n",
354
- " .mol-container {{\n",
355
- " width: 100%;\n",
356
- " height: 700px;\n",
357
- " position: relative;\n",
358
- " }}\n",
359
- " </style>\n",
360
- " <script src=\"https://cdnjs.cloudflare.com/ajax/libs/jquery/3.6.3/jquery.min.js\"></script>\n",
361
- " <script src=\"https://3Dmol.csb.pitt.edu/build/3Dmol-min.js\"></script>\n",
362
- " </head>\n",
363
- " <body>\n",
364
- " <div id=\"container\" class=\"mol-container\"></div>\n",
365
- " <script>\n",
366
- " let pdb = `{mol}`; // Use template literal to properly escape PDB content\n",
367
- " $(document).ready(function () {{\n",
368
- " let element = $(\"#container\");\n",
369
- " let config = {{ backgroundColor: \"white\" }};\n",
370
- " let viewer = $3Dmol.createViewer(element, config);\n",
371
- " viewer.addModel(pdb, \"pdb\");\n",
372
- " \n",
373
- " // Reset all styles and show only selected chain\n",
374
- " viewer.getModel(0).setStyle(\n",
375
- " {{\"chain\": \"{segment}\"}}, \n",
376
- " {{ cartoon: {{ colorscheme:\"whiteCarbon\" }} }}\n",
377
- " );\n",
378
- " \n",
379
- " {high_score_script}\n",
380
- " \n",
381
- " // Add hover functionality\n",
382
- " viewer.setHoverable(\n",
383
- " {{}}, \n",
384
- " true, \n",
385
- " function(atom, viewer, event, container) {{\n",
386
- " if (!atom.label) {{\n",
387
- " atom.label = viewer.addLabel(\n",
388
- " atom.resn + \":\" + atom.atom, \n",
389
- " {{\n",
390
- " position: atom, \n",
391
- " backgroundColor: 'mintcream', \n",
392
- " fontColor: 'black',\n",
393
- " fontSize: 12,\n",
394
- " padding: 2\n",
395
- " }}\n",
396
- " );\n",
397
- " }}\n",
398
- " }},\n",
399
- " function(atom, viewer) {{\n",
400
- " if (atom.label) {{\n",
401
- " viewer.removeLabel(atom.label);\n",
402
- " delete atom.label;\n",
403
- " }}\n",
404
- " }}\n",
405
- " );\n",
406
- " \n",
407
- " viewer.zoomTo();\n",
408
- " viewer.render();\n",
409
- " viewer.zoom(0.8, 2000);\n",
410
- " }});\n",
411
- " </script>\n",
412
- " </body>\n",
413
- " </html>\n",
414
- " \"\"\"\n",
415
- " \n",
416
- " # Return the HTML content within an iframe safely encoded for special characters\n",
417
- " return f'<iframe width=\"100%\" height=\"700\" srcdoc=\"{html_content.replace(chr(34), \"&quot;\").replace(chr(39), \"&#39;\")}\"></iframe>'\n",
418
- "\n",
419
- "reps = [\n",
420
- " {\n",
421
- " \"model\": 0,\n",
422
- " \"style\": \"cartoon\",\n",
423
- " \"color\": \"whiteCarbon\",\n",
424
- " \"residue_range\": \"\",\n",
425
- " \"around\": 0,\n",
426
- " \"byres\": False,\n",
427
- " }\n",
428
- " ]\n",
429
- "\n",
430
- "# Gradio UI\n",
431
- "with gr.Blocks() as demo:\n",
432
- " gr.Markdown(\"# Protein Binding Site Prediction (Random Scores)\")\n",
433
- " with gr.Row():\n",
434
- " pdb_input = gr.Textbox(value=\"2IWI\", label=\"PDB ID\", placeholder=\"Enter PDB ID here...\")\n",
435
- " visualize_btn = gr.Button(\"Visualize Structure\")\n",
436
- "\n",
437
- " molecule_output2 = Molecule3D(label=\"Protein Structure\", reps=reps)\n",
438
- "\n",
439
- " with gr.Row():\n",
440
- " pdb_input = gr.Textbox(value=\"2IWI\", label=\"PDB ID\", placeholder=\"Enter PDB ID here...\")\n",
441
- " segment_input = gr.Textbox(value=\"A\", label=\"Chain ID\", placeholder=\"Enter Chain ID here...\")\n",
442
- " prediction_btn = gr.Button(\"Predict Random Binding Site Scores\")\n",
443
- "\n",
444
- " molecule_output = gr.HTML(label=\"Protein Structure\")\n",
445
- " predictions_output = gr.Textbox(label=\"Binding Site Predictions\")\n",
446
- " download_output = gr.File(label=\"Download Predictions\")\n",
447
- " \n",
448
- " visualize_btn.click(fetch_pdb, inputs=[pdb_input], outputs=molecule_output2)\n",
449
- " \n",
450
- " prediction_btn.click(process_pdb, inputs=[pdb_input, segment_input], outputs=[predictions_output, molecule_output, download_output])\n",
451
- " \n",
452
- " gr.Markdown(\"## Examples\")\n",
453
- " gr.Examples(\n",
454
- " examples=[\n",
455
- " [\"2IWI\", \"A\"],\n",
456
- " [\"7RPZ\", \"B\"],\n",
457
- " [\"3TJN\", \"C\"]\n",
458
- " ],\n",
459
- " inputs=[pdb_input, segment_input],\n",
460
- " outputs=[predictions_output, molecule_output, download_output]\n",
461
- " )\n",
462
- "\n",
463
- "demo.launch()"
464
- ]
465
- },
466
- {
467
- "cell_type": "code",
468
- "execution_count": null,
469
- "id": "517a2fe7-419f-4d0b-a9ed-62a22c1c1284",
470
- "metadata": {},
471
- "outputs": [],
472
- "source": []
473
- },
474
- {
475
- "cell_type": "code",
476
- "execution_count": 1,
477
- "id": "d62be1b5-762e-4b69-aed4-e4ba2a44482f",
478
- "metadata": {},
479
- "outputs": [
480
- {
481
- "name": "stdout",
482
- "output_type": "stream",
483
- "text": [
484
- "* Running on local URL: http://127.0.0.1:7860\n",
485
- "\n",
486
- "To create a public link, set `share=True` in `launch()`.\n"
487
- ]
488
- },
489
- {
490
- "data": {
491
- "text/html": [
492
- "<div><iframe src=\"http://127.0.0.1:7860/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
493
- ],
494
- "text/plain": [
495
- "<IPython.core.display.HTML object>"
496
- ]
497
- },
498
- "metadata": {},
499
- "output_type": "display_data"
500
- },
501
- {
502
- "data": {
503
- "text/plain": []
504
- },
505
- "execution_count": 1,
506
- "metadata": {},
507
- "output_type": "execute_result"
508
- }
509
- ],
510
- "source": [
511
- "import gradio as gr\n",
512
- "import requests\n",
513
- "from Bio.PDB import PDBParser\n",
514
- "import numpy as np\n",
515
- "import os\n",
516
- "from gradio_molecule3d import Molecule3D\n",
517
- "\n",
518
- "def read_mol(pdb_path):\n",
519
- " \"\"\"Read PDB file and return its content as a string\"\"\"\n",
520
- " with open(pdb_path, 'r') as f:\n",
521
- " return f.read()\n",
522
- "\n",
523
- "def fetch_pdb(pdb_id):\n",
524
- " pdb_url = f'https://files.rcsb.org/download/{pdb_id}.pdb'\n",
525
- " pdb_path = f'{pdb_id}.pdb'\n",
526
- " response = requests.get(pdb_url)\n",
527
- " if response.status_code == 200:\n",
528
- " with open(pdb_path, 'wb') as f:\n",
529
- " f.write(response.content)\n",
530
- " return pdb_path\n",
531
- " else:\n",
532
- " return None\n",
533
- "\n",
534
- "def process_pdb(pdb_id, segment):\n",
535
- " pdb_path = fetch_pdb(pdb_id)\n",
536
- " if not pdb_path:\n",
537
- " return \"Failed to fetch PDB file\", None, None\n",
538
- " \n",
539
- " parser = PDBParser(QUIET=1)\n",
540
- " structure = parser.get_structure('protein', pdb_path)\n",
541
- " \n",
542
- " try:\n",
543
- " chain = structure[0][segment]\n",
544
- " except KeyError:\n",
545
- " return \"Invalid Chain ID\", None, None\n",
546
- " \n",
547
- " # Comprehensive amino acid mapping\n",
548
- " aa_dict = {\n",
549
- " 'ALA': 'A', 'CYS': 'C', 'ASP': 'D', 'GLU': 'E', 'PHE': 'F',\n",
550
- " 'GLY': 'G', 'HIS': 'H', 'ILE': 'I', 'LYS': 'K', 'LEU': 'L',\n",
551
- " 'MET': 'M', 'ASN': 'N', 'PRO': 'P', 'GLN': 'Q', 'ARG': 'R',\n",
552
- " 'SER': 'S', 'THR': 'T', 'VAL': 'V', 'TRP': 'W', 'TYR': 'Y',\n",
553
- " 'MSE': 'M', 'SEP': 'S', 'TPO': 'T', 'CSO': 'C', 'PTR': 'Y', 'HYP': 'P'\n",
554
- " }\n",
555
- " \n",
556
- " # Exclude non-amino acid residues\n",
557
- " sequence = [\n",
558
- " residue for residue in chain \n",
559
- " if residue.get_resname().strip() in aa_dict\n",
560
- " ]\n",
561
- " \n",
562
- " random_scores = np.random.rand(len(sequence))\n",
563
- " result_str = \"\\n\".join(\n",
564
- " f\"{aa_dict[res.get_resname()]} {res.id[1]} {score:.2f}\" \n",
565
- " for res, score in zip(sequence, random_scores)\n",
566
- " )\n",
567
- " \n",
568
- " # Save the predictions to a file\n",
569
- " prediction_file = f\"{pdb_id}_predictions.txt\"\n",
570
- " with open(prediction_file, \"w\") as f:\n",
571
- " f.write(result_str)\n",
572
- " \n",
573
- " return result_str, molecule(pdb_path, random_scores, segment), prediction_file\n",
574
- "\n",
575
- "def molecule(input_pdb, scores=None, segment='A'):\n",
576
- " mol = read_mol(input_pdb) # Read PDB file content\n",
577
- " \n",
578
- " # Prepare high-scoring residues script if scores are provided\n",
579
- " high_score_script = \"\"\n",
580
- " if scores is not None:\n",
581
- " high_score_script = \"\"\"\n",
582
- " // Reset all styles first\n",
583
- " viewer.getModel(0).setStyle({}, {});\n",
584
- " \n",
585
- " // Show only the selected chain\n",
586
- " viewer.getModel(0).setStyle(\n",
587
- " {\"chain\": \"%s\"}, \n",
588
- " { cartoon: {colorscheme:\"whiteCarbon\"} }\n",
589
- " );\n",
590
- " \n",
591
- " // Highlight high-scoring residues only for the selected chain\n",
592
- " let highScoreResidues = [%s];\n",
593
- " viewer.getModel(0).setStyle(\n",
594
- " {\"chain\": \"%s\", \"resi\": highScoreResidues}, \n",
595
- " {\"stick\": {\"color\": \"red\"}}\n",
596
- " );\n",
597
- "\n",
598
- " // Highlight high-scoring residues only for the selected chain\n",
599
- " let highScoreResidues2 = [%s];\n",
600
- " viewer.getModel(0).setStyle(\n",
601
- " {\"chain\": \"%s\", \"resi\": highScoreResidues2}, \n",
602
- " {\"stick\": {\"color\": \"orange\"}}\n",
603
- " );\n",
604
- " \"\"\" % (segment, \n",
605
- " \", \".join(str(i+1) for i, score in enumerate(scores) if score > 0.8),\n",
606
- " segment,\n",
607
- " \", \".join(str(i+1) for i, score in enumerate(scores) if (score > 0.5) and (score < 0.8)),\n",
608
- " segment)\n",
609
- " \n",
610
- " html_content = f\"\"\"\n",
611
- " <!DOCTYPE html>\n",
612
- " <html>\n",
613
- " <head> \n",
614
- " <meta http-equiv=\"content-type\" content=\"text/html; charset=UTF-8\" />\n",
615
- " <style>\n",
616
- " .mol-container {{\n",
617
- " width: 100%;\n",
618
- " height: 700px;\n",
619
- " position: relative;\n",
620
- " }}\n",
621
- " </style>\n",
622
- " <script src=\"https://cdnjs.cloudflare.com/ajax/libs/jquery/3.6.3/jquery.min.js\"></script>\n",
623
- " <script src=\"https://3Dmol.csb.pitt.edu/build/3Dmol-min.js\"></script>\n",
624
- " </head>\n",
625
- " <body>\n",
626
- " <div id=\"container\" class=\"mol-container\"></div>\n",
627
- " <script>\n",
628
- " let pdb = `{mol}`; // Use template literal to properly escape PDB content\n",
629
- " $(document).ready(function () {{\n",
630
- " let element = $(\"#container\");\n",
631
- " let config = {{ backgroundColor: \"white\" }};\n",
632
- " let viewer = $3Dmol.createViewer(element, config);\n",
633
- " viewer.addModel(pdb, \"pdb\");\n",
634
- " \n",
635
- " // Reset all styles and show only selected chain\n",
636
- " viewer.getModel(0).setStyle(\n",
637
- " {{\"chain\": \"{segment}\"}}, \n",
638
- " {{ cartoon: {{ colorscheme:\"whiteCarbon\" }} }}\n",
639
- " );\n",
640
- " \n",
641
- " {high_score_script}\n",
642
- " \n",
643
- " // Add hover functionality\n",
644
- " viewer.setHoverable(\n",
645
- " {{}}, \n",
646
- " true, \n",
647
- " function(atom, viewer, event, container) {{\n",
648
- " if (!atom.label) {{\n",
649
- " atom.label = viewer.addLabel(\n",
650
- " atom.resn + \":\" +atom.resi + \":\" + atom.atom, \n",
651
- " {{\n",
652
- " position: atom, \n",
653
- " backgroundColor: 'mintcream', \n",
654
- " fontColor: 'black',\n",
655
- " fontSize: 12,\n",
656
- " padding: 2\n",
657
- " }}\n",
658
- " );\n",
659
- " }}\n",
660
- " }},\n",
661
- " function(atom, viewer) {{\n",
662
- " if (atom.label) {{\n",
663
- " viewer.removeLabel(atom.label);\n",
664
- " delete atom.label;\n",
665
- " }}\n",
666
- " }}\n",
667
- " );\n",
668
- " \n",
669
- " viewer.zoomTo();\n",
670
- " viewer.render();\n",
671
- " viewer.zoom(0.8, 2000);\n",
672
- " }});\n",
673
- " </script>\n",
674
- " </body>\n",
675
- " </html>\n",
676
- " \"\"\"\n",
677
- " \n",
678
- " # Return the HTML content within an iframe safely encoded for special characters\n",
679
- " return f'<iframe width=\"100%\" height=\"700\" srcdoc=\"{html_content.replace(chr(34), \"&quot;\").replace(chr(39), \"&#39;\")}\"></iframe>'\n",
680
- "\n",
681
- "reps = [\n",
682
- " {\n",
683
- " \"model\": 0,\n",
684
- " \"style\": \"cartoon\",\n",
685
- " \"color\": \"whiteCarbon\",\n",
686
- " \"residue_range\": \"\",\n",
687
- " \"around\": 0,\n",
688
- " \"byres\": False,\n",
689
- " }\n",
690
- " ]\n",
691
- "\n",
692
- "# Gradio UI\n",
693
- "with gr.Blocks() as demo:\n",
694
- " gr.Markdown(\"# Protein Binding Site Prediction (Random Scores)\")\n",
695
- " with gr.Row():\n",
696
- " pdb_input = gr.Textbox(value=\"2IWI\", label=\"PDB ID\", placeholder=\"Enter PDB ID here...\")\n",
697
- " visualize_btn = gr.Button(\"Visualize Structure\")\n",
698
- "\n",
699
- " molecule_output2 = Molecule3D(label=\"Protein Structure\", reps=reps)\n",
700
- "\n",
701
- " with gr.Row():\n",
702
- " pdb_input = gr.Textbox(value=\"2IWI\", label=\"PDB ID\", placeholder=\"Enter PDB ID here...\")\n",
703
- " segment_input = gr.Textbox(value=\"A\", label=\"Chain ID\", placeholder=\"Enter Chain ID here...\")\n",
704
- " prediction_btn = gr.Button(\"Predict Random Binding Site Scores\")\n",
705
- "\n",
706
- " molecule_output = gr.HTML(label=\"Protein Structure\")\n",
707
- " predictions_output = gr.Textbox(label=\"Binding Site Predictions\")\n",
708
- " download_output = gr.File(label=\"Download Predictions\")\n",
709
- " \n",
710
- " visualize_btn.click(fetch_pdb, inputs=[pdb_input], outputs=molecule_output2)\n",
711
- " \n",
712
- " prediction_btn.click(process_pdb, inputs=[pdb_input, segment_input], outputs=[predictions_output, molecule_output, download_output])\n",
713
- " \n",
714
- " gr.Markdown(\"## Examples\")\n",
715
- " gr.Examples(\n",
716
- " examples=[\n",
717
- " [\"2IWI\", \"A\"],\n",
718
- " [\"7RPZ\", \"B\"],\n",
719
- " [\"3TJN\", \"C\"]\n",
720
- " ],\n",
721
- " inputs=[pdb_input, segment_input],\n",
722
- " outputs=[predictions_output, molecule_output, download_output]\n",
723
- " )\n",
724
- "\n",
725
- "demo.launch()"
726
- ]
727
- },
728
- {
729
- "cell_type": "code",
730
- "execution_count": 6,
731
- "id": "30f35243-852f-4771-9a4b-5cdd198552b5",
732
- "metadata": {},
733
- "outputs": [
734
- {
735
- "name": "stdout",
736
- "output_type": "stream",
737
- "text": [
738
- "* Running on local URL: http://127.0.0.1:7865\n",
739
- "\n",
740
- "To create a public link, set `share=True` in `launch()`.\n"
741
- ]
742
- },
743
- {
744
- "data": {
745
- "text/html": [
746
- "<div><iframe src=\"http://127.0.0.1:7865/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
747
- ],
748
- "text/plain": [
749
- "<IPython.core.display.HTML object>"
750
- ]
751
- },
752
- "metadata": {},
753
- "output_type": "display_data"
754
- },
755
- {
756
- "data": {
757
- "text/plain": []
758
- },
759
- "execution_count": 6,
760
- "metadata": {},
761
- "output_type": "execute_result"
762
- }
763
- ],
764
- "source": [
765
- "import gradio as gr\n",
766
- "import requests\n",
767
- "from Bio.PDB import PDBParser\n",
768
- "import numpy as np\n",
769
- "import os\n",
770
- "from gradio_molecule3d import Molecule3D\n",
771
- "\n",
772
- "def read_mol(pdb_path):\n",
773
- " \"\"\"Read PDB file and return its content as a string\"\"\"\n",
774
- " with open(pdb_path, 'r') as f:\n",
775
- " return f.read()\n",
776
- "\n",
777
- "def fetch_pdb(pdb_id):\n",
778
- " pdb_url = f'https://files.rcsb.org/download/{pdb_id}.pdb'\n",
779
- " pdb_path = f'{pdb_id}.pdb'\n",
780
- " response = requests.get(pdb_url)\n",
781
- " if response.status_code == 200:\n",
782
- " with open(pdb_path, 'wb') as f:\n",
783
- " f.write(response.content)\n",
784
- " return pdb_path\n",
785
- " else:\n",
786
- " return None\n",
787
- "\n",
788
- "def process_pdb(pdb_id, segment):\n",
789
- " pdb_path = fetch_pdb(pdb_id)\n",
790
- " if not pdb_path:\n",
791
- " return \"Failed to fetch PDB file\", None, None\n",
792
- " \n",
793
- " parser = PDBParser(QUIET=1)\n",
794
- " structure = parser.get_structure('protein', pdb_path)\n",
795
- " \n",
796
- " try:\n",
797
- " chain = structure[0][segment]\n",
798
- " except KeyError:\n",
799
- " return \"Invalid Chain ID\", None, None\n",
800
- " \n",
801
- " # Comprehensive amino acid mapping\n",
802
- " aa_dict = {\n",
803
- " 'ALA': 'A', 'CYS': 'C', 'ASP': 'D', 'GLU': 'E', 'PHE': 'F',\n",
804
- " 'GLY': 'G', 'HIS': 'H', 'ILE': 'I', 'LYS': 'K', 'LEU': 'L',\n",
805
- " 'MET': 'M', 'ASN': 'N', 'PRO': 'P', 'GLN': 'Q', 'ARG': 'R',\n",
806
- " 'SER': 'S', 'THR': 'T', 'VAL': 'V', 'TRP': 'W', 'TYR': 'Y',\n",
807
- " 'MSE': 'M', 'SEP': 'S', 'TPO': 'T', 'CSO': 'C', 'PTR': 'Y', 'HYP': 'P'\n",
808
- " }\n",
809
- " \n",
810
- " # Exclude non-amino acid residues and create a list of (resi, score) pairs\n",
811
- " sequence = [\n",
812
- " (res.id[1], res) for res in chain\n",
813
- " if res.get_resname().strip() in aa_dict\n",
814
- " ]\n",
815
- "\n",
816
- " random_scores = np.random.rand(len(sequence))\n",
817
- " \n",
818
- " # Zip residues with scores to track the residue ID and score\n",
819
- " residue_scores = [(resi, score) for (resi, _), score in zip(sequence, random_scores)]\n",
820
- " \n",
821
- " result_str = \"\\n\".join(\n",
822
- " f\"{aa_dict[chain[resi].get_resname()]} {resi} {score:.2f}\"\n",
823
- " for resi, score in residue_scores\n",
824
- " )\n",
825
- " \n",
826
- " # Save the predictions to a file\n",
827
- " prediction_file = f\"{pdb_id}_predictions.txt\"\n",
828
- " with open(prediction_file, \"w\") as f:\n",
829
- " f.write(result_str)\n",
830
- " \n",
831
- " return result_str, molecule(pdb_path, residue_scores, segment), prediction_file\n",
832
- "\n",
833
- "def molecule(input_pdb, residue_scores=None, segment='A'):\n",
834
- " mol = read_mol(input_pdb) # Read PDB file content\n",
835
- " \n",
836
- " # Prepare high-scoring residues script if scores are provided\n",
837
- " high_score_script = \"\"\n",
838
- " if residue_scores is not None:\n",
839
- " # Sort residues based on their scores\n",
840
- " high_score_residues = [resi for resi, score in residue_scores if score > 0.9]\n",
841
- " mid_score_residues = [resi for resi, score in residue_scores if 0.8 < score <= 0.9]\n",
842
- " \n",
843
- " high_score_script = \"\"\"\n",
844
- " // Reset all styles first\n",
845
- " viewer.getModel(0).setStyle({}, {});\n",
846
- " \n",
847
- " // Show only the selected chain\n",
848
- " viewer.getModel(0).setStyle(\n",
849
- " {\"chain\": \"%s\"}, \n",
850
- " { cartoon: {colorscheme:\"whiteCarbon\"} }\n",
851
- " );\n",
852
- " \n",
853
- " // Highlight high-scoring residues only for the selected chain\n",
854
- " let highScoreResidues = [%s];\n",
855
- " viewer.getModel(0).setStyle(\n",
856
- " {\"chain\": \"%s\", \"resi\": highScoreResidues}, \n",
857
- " {\"stick\": {\"color\": \"red\"}}\n",
858
- " );\n",
859
- "\n",
860
- " // Highlight medium-scoring residues only for the selected chain\n",
861
- " let midScoreResidues = [%s];\n",
862
- " viewer.getModel(0).setStyle(\n",
863
- " {\"chain\": \"%s\", \"resi\": midScoreResidues}, \n",
864
- " {\"stick\": {\"color\": \"orange\"}}\n",
865
- " );\n",
866
- " \"\"\" % (segment, \n",
867
- " \", \".join(str(resi) for resi in high_score_residues),\n",
868
- " segment,\n",
869
- " \", \".join(str(resi) for resi in mid_score_residues),\n",
870
- " segment)\n",
871
- " \n",
872
- " html_content = f\"\"\"\n",
873
- " <!DOCTYPE html>\n",
874
- " <html>\n",
875
- " <head> \n",
876
- " <meta http-equiv=\"content-type\" content=\"text/html; charset=UTF-8\" />\n",
877
- " <style>\n",
878
- " .mol-container {{\n",
879
- " width: 100%;\n",
880
- " height: 700px;\n",
881
- " position: relative;\n",
882
- " }}\n",
883
- " </style>\n",
884
- " <script src=\"https://cdnjs.cloudflare.com/ajax/libs/jquery/3.6.3/jquery.min.js\"></script>\n",
885
- " <script src=\"https://3Dmol.csb.pitt.edu/build/3Dmol-min.js\"></script>\n",
886
- " </head>\n",
887
- " <body>\n",
888
- " <div id=\"container\" class=\"mol-container\"></div>\n",
889
- " <script>\n",
890
- " let pdb = `{mol}`; // Use template literal to properly escape PDB content\n",
891
- " $(document).ready(function () {{\n",
892
- " let element = $(\"#container\");\n",
893
- " let config = {{ backgroundColor: \"white\" }};\n",
894
- " let viewer = $3Dmol.createViewer(element, config);\n",
895
- " viewer.addModel(pdb, \"pdb\");\n",
896
- " \n",
897
- " // Reset all styles and show only selected chain\n",
898
- " viewer.getModel(0).setStyle(\n",
899
- " {{\"chain\": \"{segment}\"}}, \n",
900
- " {{ cartoon: {{ colorscheme:\"whiteCarbon\" }} }}\n",
901
- " );\n",
902
- " \n",
903
- " {high_score_script}\n",
904
- " \n",
905
- " // Add hover functionality\n",
906
- " viewer.setHoverable(\n",
907
- " {{}}, \n",
908
- " true, \n",
909
- " function(atom, viewer, event, container) {{\n",
910
- " if (!atom.label) {{\n",
911
- " atom.label = viewer.addLabel(\n",
912
- " atom.resn + \":\" +atom.resi + \":\" + atom.atom, \n",
913
- " {{\n",
914
- " position: atom, \n",
915
- " backgroundColor: 'mintcream', \n",
916
- " fontColor: 'black',\n",
917
- " fontSize: 12,\n",
918
- " padding: 2\n",
919
- " }}\n",
920
- " );\n",
921
- " }}\n",
922
- " }},\n",
923
- " function(atom, viewer) {{\n",
924
- " if (atom.label) {{\n",
925
- " viewer.removeLabel(atom.label);\n",
926
- " delete atom.label;\n",
927
- " }}\n",
928
- " }}\n",
929
- " );\n",
930
- " \n",
931
- " viewer.zoomTo();\n",
932
- " viewer.render();\n",
933
- " viewer.zoom(0.8, 2000);\n",
934
- " }});\n",
935
- " </script>\n",
936
- " </body>\n",
937
- " </html>\n",
938
- " \"\"\"\n",
939
- " \n",
940
- " # Return the HTML content within an iframe safely encoded for special characters\n",
941
- " return f'<iframe width=\"100%\" height=\"700\" srcdoc=\"{html_content.replace(chr(34), \"&quot;\").replace(chr(39), \"&#39;\")}\"></iframe>'\n",
942
- "\n",
943
- "reps = [\n",
944
- " {\n",
945
- " \"model\": 0,\n",
946
- " \"style\": \"cartoon\",\n",
947
- " \"color\": \"whiteCarbon\",\n",
948
- " \"residue_range\": \"\",\n",
949
- " \"around\": 0,\n",
950
- " \"byres\": False,\n",
951
- " }\n",
952
- " ]\n",
953
- "\n",
954
- "# Gradio UI\n",
955
- "with gr.Blocks() as demo:\n",
956
- " gr.Markdown(\"# Protein Binding Site Prediction (Random Scores)\")\n",
957
- " with gr.Row():\n",
958
- " pdb_input = gr.Textbox(value=\"2IWI\", label=\"PDB ID\", placeholder=\"Enter PDB ID here...\")\n",
959
- " visualize_btn = gr.Button(\"Visualize Structure\")\n",
960
- "\n",
961
- " molecule_output2 = Molecule3D(label=\"Protein Structure\", reps=reps)\n",
962
- "\n",
963
- " with gr.Row():\n",
964
- " #pdb_input = gr.Textbox(value=\"2IWI\", label=\"PDB ID\", placeholder=\"Enter PDB ID here...\")\n",
965
- " segment_input = gr.Textbox(value=\"A\", label=\"Chain ID\", placeholder=\"Enter Chain ID here...\")\n",
966
- " prediction_btn = gr.Button(\"Predict Random Binding Site Scores\")\n",
967
- "\n",
968
- " molecule_output = gr.HTML(label=\"Protein Structure\")\n",
969
- " predictions_output = gr.Textbox(label=\"Binding Site Predictions\")\n",
970
- " download_output = gr.File(label=\"Download Predictions\")\n",
971
- " \n",
972
- " visualize_btn.click(fetch_pdb, inputs=[pdb_input], outputs=molecule_output2)\n",
973
- " \n",
974
- " prediction_btn.click(process_pdb, inputs=[pdb_input, segment_input], outputs=[predictions_output, molecule_output, download_output])\n",
975
- " \n",
976
- " gr.Markdown(\"## Examples\")\n",
977
- " gr.Examples(\n",
978
- " examples=[\n",
979
- " [\"7RPZ\", \"A\"],\n",
980
- " [\"2IWI\", \"B\"],\n",
981
- " [\"2F6V\", \"A\"]\n",
982
- " ],\n",
983
- " inputs=[pdb_input, segment_input],\n",
984
- " outputs=[predictions_output, molecule_output, download_output]\n",
985
- " )\n",
986
- "\n",
987
- "demo.launch()"
988
- ]
989
- },
990
- {
991
- "cell_type": "code",
992
- "execution_count": null,
993
- "id": "6f17feec-0347-4f9d-acd4-ae681c3ed425",
994
- "metadata": {},
995
- "outputs": [],
996
- "source": []
997
- },
998
- {
999
- "cell_type": "code",
1000
- "execution_count": null,
1001
- "id": "63201f38-adde-4b12-a8d3-f23474d045cf",
1002
- "metadata": {},
1003
- "outputs": [],
1004
- "source": []
1005
- },
1006
- {
1007
- "cell_type": "code",
1008
- "execution_count": null,
1009
- "id": "5ccbf398-5ef2-4955-98db-99f904f8daa4",
1010
- "metadata": {},
1011
- "outputs": [],
1012
- "source": []
1013
- },
1014
- {
1015
- "cell_type": "code",
1016
- "execution_count": null,
1017
- "id": "4c61bac4-4f2e-4f4a-aa1f-30dca209747c",
1018
- "metadata": {},
1019
- "outputs": [],
1020
- "source": [
1021
- "import gradio as gr\n",
1022
- "import requests\n",
1023
- "from Bio.PDB import PDBParser\n",
1024
- "import numpy as np\n",
1025
- "import os\n",
1026
- "from gradio_molecule3d import Molecule3D\n",
1027
- "\n",
1028
- "\n",
1029
- "from model_loader import load_model\n",
1030
- "\n",
1031
- "import torch\n",
1032
- "import torch.nn as nn\n",
1033
- "import torch.nn.functional as F\n",
1034
- "from torch.utils.data import DataLoader\n",
1035
- "\n",
1036
- "import re\n",
1037
- "import pandas as pd\n",
1038
- "import copy\n",
1039
- "\n",
1040
- "import transformers, datasets\n",
1041
- "from transformers import AutoTokenizer\n",
1042
- "from transformers import DataCollatorForTokenClassification\n",
1043
- "\n",
1044
- "from datasets import Dataset\n",
1045
- "\n",
1046
- "from scipy.special import expit\n",
1047
- "\n",
1048
- "# Load model and move to device\n",
1049
- "checkpoint = 'ThorbenF/prot_t5_xl_uniref50'\n",
1050
- "max_length = 1500\n",
1051
- "model, tokenizer = load_model(checkpoint, max_length)\n",
1052
- "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n",
1053
- "model.to(device)\n",
1054
- "model.eval()\n",
1055
- "\n",
1056
- "def normalize_scores(scores):\n",
1057
- " min_score = np.min(scores)\n",
1058
- " max_score = np.max(scores)\n",
1059
- " return (scores - min_score) / (max_score - min_score) if max_score > min_score else scores\n",
1060
- " \n",
1061
- "def read_mol(pdb_path):\n",
1062
- " \"\"\"Read PDB file and return its content as a string\"\"\"\n",
1063
- " with open(pdb_path, 'r') as f:\n",
1064
- " return f.read()\n",
1065
- "\n",
1066
- "def fetch_pdb(pdb_id):\n",
1067
- " pdb_url = f'https://files.rcsb.org/download/{pdb_id}.pdb'\n",
1068
- " pdb_path = f'{pdb_id}.pdb'\n",
1069
- " response = requests.get(pdb_url)\n",
1070
- " if response.status_code == 200:\n",
1071
- " with open(pdb_path, 'wb') as f:\n",
1072
- " f.write(response.content)\n",
1073
- " return pdb_path\n",
1074
- " else:\n",
1075
- " return None\n",
1076
- "\n",
1077
- "def process_pdb(pdb_id, segment):\n",
1078
- " pdb_path = fetch_pdb(pdb_id)\n",
1079
- " if not pdb_path:\n",
1080
- " return \"Failed to fetch PDB file\", None, None\n",
1081
- " \n",
1082
- " parser = PDBParser(QUIET=1)\n",
1083
- " structure = parser.get_structure('protein', pdb_path)\n",
1084
- " \n",
1085
- " try:\n",
1086
- " chain = structure[0][segment]\n",
1087
- " except KeyError:\n",
1088
- " return \"Invalid Chain ID\", None, None\n",
1089
- " \n",
1090
- " \n",
1091
- " aa_dict = {\n",
1092
- " 'ALA': 'A', 'CYS': 'C', 'ASP': 'D', 'GLU': 'E', 'PHE': 'F',\n",
1093
- " 'GLY': 'G', 'HIS': 'H', 'ILE': 'I', 'LYS': 'K', 'LEU': 'L',\n",
1094
- " 'MET': 'M', 'ASN': 'N', 'PRO': 'P', 'GLN': 'Q', 'ARG': 'R',\n",
1095
- " 'SER': 'S', 'THR': 'T', 'VAL': 'V', 'TRP': 'W', 'TYR': 'Y',\n",
1096
- " 'MSE': 'M', 'SEP': 'S', 'TPO': 'T', 'CSO': 'C', 'PTR': 'Y', 'HYP': 'P'\n",
1097
- " }\n",
1098
- " \n",
1099
- " # Exclude non-amino acid residues\n",
1100
- " sequence = \"\".join(\n",
1101
- " aa_dict[residue.get_resname().strip()] \n",
1102
- " for residue in chain \n",
1103
- " if residue.get_resname().strip() in aa_dict\n",
1104
- " )\n",
1105
- " sequence2 = [\n",
1106
- " (res.id[1], res) for res in chain\n",
1107
- " if res.get_resname().strip() in aa_dict\n",
1108
- " ]\n",
1109
- " \n",
1110
- " # Prepare input for model prediction\n",
1111
- " input_ids = tokenizer(\" \".join(sequence), return_tensors=\"pt\").input_ids.to(device)\n",
1112
- " with torch.no_grad():\n",
1113
- " outputs = model(input_ids).logits.detach().cpu().numpy().squeeze()\n",
1114
- "\n",
1115
- " # Calculate scores and normalize them\n",
1116
- " scores = expit(outputs[:, 1] - outputs[:, 0])\n",
1117
- " normalized_scores = normalize_scores(scores)\n",
1118
- "\n",
1119
- " # Zip residues with scores to track the residue ID and score\n",
1120
- " residue_scores = [(resi, score) for (resi, _), score in zip(sequence2, normalized_scores)]\n",
1121
- " \n",
1122
- " result_str = \"\\n\".join([\n",
1123
- " f\"{res.get_resname()} {res.id[1]} {sequence[i]} {normalized_scores[i]:.2f}\" \n",
1124
- " for i, res in enumerate(chain) if res.get_resname().strip() in aa_dict\n",
1125
- " ])\n",
1126
- " \n",
1127
- " # Save the predictions to a file\n",
1128
- " prediction_file = f\"{pdb_id}_predictions.txt\"\n",
1129
- " with open(prediction_file, \"w\") as f:\n",
1130
- " f.write(result_str)\n",
1131
- " \n",
1132
- " return result_str, molecule(pdb_path, residue_scores, segment), prediction_file\n",
1133
- "\n",
1134
- "def molecule(input_pdb, residue_scores=None, segment='A'):\n",
1135
- " mol = read_mol(input_pdb) # Read PDB file content\n",
1136
- " \n",
1137
- " # Prepare high-scoring residues script if scores are provided\n",
1138
- " high_score_script = \"\"\n",
1139
- " if residue_scores is not None:\n",
1140
- " # Sort residues based on their scores\n",
1141
- " high_score_residues = [resi for resi, score in residue_scores if score > 0.75]\n",
1142
- " mid_score_residues = [resi for resi, score in residue_scores if 0.5 < score <= 0.75]\n",
1143
- " \n",
1144
- " high_score_script = \"\"\"\n",
1145
- " // Reset all styles first\n",
1146
- " viewer.getModel(0).setStyle({}, {});\n",
1147
- " \n",
1148
- " // Show only the selected chain\n",
1149
- " viewer.getModel(0).setStyle(\n",
1150
- " {\"chain\": \"%s\"}, \n",
1151
- " { cartoon: {colorscheme:\"whiteCarbon\"} }\n",
1152
- " );\n",
1153
- " \n",
1154
- " // Highlight high-scoring residues only for the selected chain\n",
1155
- " let highScoreResidues = [%s];\n",
1156
- " viewer.getModel(0).setStyle(\n",
1157
- " {\"chain\": \"%s\", \"resi\": highScoreResidues}, \n",
1158
- " {\"stick\": {\"color\": \"red\"}}\n",
1159
- " );\n",
1160
- "\n",
1161
- " // Highlight medium-scoring residues only for the selected chain\n",
1162
- " let midScoreResidues = [%s];\n",
1163
- " viewer.getModel(0).setStyle(\n",
1164
- " {\"chain\": \"%s\", \"resi\": midScoreResidues}, \n",
1165
- " {\"stick\": {\"color\": \"orange\"}}\n",
1166
- " );\n",
1167
- " \"\"\" % (segment, \n",
1168
- " \", \".join(str(resi) for resi in high_score_residues),\n",
1169
- " segment,\n",
1170
- " \", \".join(str(resi) for resi in mid_score_residues),\n",
1171
- " segment)\n",
1172
- " \n",
1173
- " html_content = f\"\"\"\n",
1174
- " <!DOCTYPE html>\n",
1175
- " <html>\n",
1176
- " <head> \n",
1177
- " <meta http-equiv=\"content-type\" content=\"text/html; charset=UTF-8\" />\n",
1178
- " <style>\n",
1179
- " .mol-container {{\n",
1180
- " width: 100%;\n",
1181
- " height: 700px;\n",
1182
- " position: relative;\n",
1183
- " }}\n",
1184
- " </style>\n",
1185
- " <script src=\"https://cdnjs.cloudflare.com/ajax/libs/jquery/3.6.3/jquery.min.js\"></script>\n",
1186
- " <script src=\"https://3Dmol.csb.pitt.edu/build/3Dmol-min.js\"></script>\n",
1187
- " </head>\n",
1188
- " <body>\n",
1189
- " <div id=\"container\" class=\"mol-container\"></div>\n",
1190
- " <script>\n",
1191
- " let pdb = `{mol}`; // Use template literal to properly escape PDB content\n",
1192
- " $(document).ready(function () {{\n",
1193
- " let element = $(\"#container\");\n",
1194
- " let config = {{ backgroundColor: \"white\" }};\n",
1195
- " let viewer = $3Dmol.createViewer(element, config);\n",
1196
- " viewer.addModel(pdb, \"pdb\");\n",
1197
- " \n",
1198
- " // Reset all styles and show only selected chain\n",
1199
- " viewer.getModel(0).setStyle(\n",
1200
- " {{\"chain\": \"{segment}\"}}, \n",
1201
- " {{ cartoon: {{ colorscheme:\"whiteCarbon\" }} }}\n",
1202
- " );\n",
1203
- " \n",
1204
- " {high_score_script}\n",
1205
- " \n",
1206
- " // Add hover functionality\n",
1207
- " viewer.setHoverable(\n",
1208
- " {{}}, \n",
1209
- " true, \n",
1210
- " function(atom, viewer, event, container) {{\n",
1211
- " if (!atom.label) {{\n",
1212
- " atom.label = viewer.addLabel(\n",
1213
- " atom.resn + \":\" +atom.resi + \":\" + atom.atom, \n",
1214
- " {{\n",
1215
- " position: atom, \n",
1216
- " backgroundColor: 'mintcream', \n",
1217
- " fontColor: 'black',\n",
1218
- " fontSize: 12,\n",
1219
- " padding: 2\n",
1220
- " }}\n",
1221
- " );\n",
1222
- " }}\n",
1223
- " }},\n",
1224
- " function(atom, viewer) {{\n",
1225
- " if (atom.label) {{\n",
1226
- " viewer.removeLabel(atom.label);\n",
1227
- " delete atom.label;\n",
1228
- " }}\n",
1229
- " }}\n",
1230
- " );\n",
1231
- " \n",
1232
- " viewer.zoomTo();\n",
1233
- " viewer.render();\n",
1234
- " viewer.zoom(0.8, 2000);\n",
1235
- " }});\n",
1236
- " </script>\n",
1237
- " </body>\n",
1238
- " </html>\n",
1239
- " \"\"\"\n",
1240
- " \n",
1241
- " # Return the HTML content within an iframe safely encoded for special characters\n",
1242
- " return f'<iframe width=\"100%\" height=\"700\" srcdoc=\"{html_content.replace(chr(34), \"&quot;\").replace(chr(39), \"&#39;\")}\"></iframe>'\n",
1243
- "\n",
1244
- "reps = [\n",
1245
- " {\n",
1246
- " \"model\": 0,\n",
1247
- " \"style\": \"cartoon\",\n",
1248
- " \"color\": \"whiteCarbon\",\n",
1249
- " \"residue_range\": \"\",\n",
1250
- " \"around\": 0,\n",
1251
- " \"byres\": False,\n",
1252
- " }\n",
1253
- " ]\n",
1254
- "\n",
1255
- "# Gradio UI\n",
1256
- "with gr.Blocks() as demo:\n",
1257
- " gr.Markdown(\"# Protein Binding Site Prediction (Random Scores)\")\n",
1258
- " with gr.Row():\n",
1259
- " pdb_input = gr.Textbox(value=\"2IWI\", label=\"PDB ID\", placeholder=\"Enter PDB ID here...\")\n",
1260
- " visualize_btn = gr.Button(\"Visualize Structure\")\n",
1261
- "\n",
1262
- " molecule_output2 = Molecule3D(label=\"Protein Structure\", reps=reps)\n",
1263
- "\n",
1264
- " with gr.Row():\n",
1265
- " #pdb_input = gr.Textbox(value=\"2IWI\", label=\"PDB ID\", placeholder=\"Enter PDB ID here...\")\n",
1266
- " segment_input = gr.Textbox(value=\"A\", label=\"Chain ID\", placeholder=\"Enter Chain ID here...\")\n",
1267
- " prediction_btn = gr.Button(\"Predict Random Binding Site Scores\")\n",
1268
- "\n",
1269
- " molecule_output = gr.HTML(label=\"Protein Structure\")\n",
1270
- " predictions_output = gr.Textbox(label=\"Binding Site Predictions\")\n",
1271
- " download_output = gr.File(label=\"Download Predictions\")\n",
1272
- " \n",
1273
- " visualize_btn.click(fetch_pdb, inputs=[pdb_input], outputs=molecule_output2)\n",
1274
- " \n",
1275
- " prediction_btn.click(process_pdb, inputs=[pdb_input, segment_input], outputs=[predictions_output, molecule_output, download_output])\n",
1276
- " \n",
1277
- " gr.Markdown(\"## Examples\")\n",
1278
- " gr.Examples(\n",
1279
- " examples=[\n",
1280
- " [\"7RPZ\", \"A\"],\n",
1281
- " [\"2IWI\", \"B\"],\n",
1282
- " [\"2F6V\", \"A\"]\n",
1283
- " ],\n",
1284
- " inputs=[pdb_input, segment_input],\n",
1285
- " outputs=[predictions_output, molecule_output, download_output]\n",
1286
- " )\n",
1287
- "\n",
1288
- "demo.launch(share=True)"
1289
- ]
1290
- },
1291
- {
1292
- "cell_type": "code",
1293
- "execution_count": null,
1294
- "id": "b61d06ec-a4ee-4f65-925f-d2688730416a",
1295
- "metadata": {},
1296
- "outputs": [],
1297
- "source": []
1298
- },
1299
- {
1300
- "cell_type": "code",
1301
- "execution_count": null,
1302
- "id": "4d67d69f-1f53-4bcc-8905-8d29384c4e20",
1303
- "metadata": {},
1304
- "outputs": [],
1305
- "source": [
1306
- "import gradio as gr\n",
1307
- "import requests\n",
1308
- "from Bio.PDB import PDBParser\n",
1309
- "import numpy as np\n",
1310
- "import os\n",
1311
- "from gradio_molecule3d import Molecule3D\n",
1312
- "\n",
1313
- "\n",
1314
- "from model_loader import load_model\n",
1315
- "\n",
1316
- "import torch\n",
1317
- "import torch.nn as nn\n",
1318
- "import torch.nn.functional as F\n",
1319
- "from torch.utils.data import DataLoader\n",
1320
- "\n",
1321
- "import re\n",
1322
- "import pandas as pd\n",
1323
- "import copy\n",
1324
- "\n",
1325
- "import transformers, datasets\n",
1326
- "from transformers import AutoTokenizer\n",
1327
- "from transformers import DataCollatorForTokenClassification\n",
1328
- "\n",
1329
- "from datasets import Dataset\n",
1330
- "\n",
1331
- "from scipy.special import expit\n",
1332
- "\n",
1333
- "# Load model and move to device\n",
1334
- "checkpoint = 'ThorbenF/prot_t5_xl_uniref50'\n",
1335
- "max_length = 1500\n",
1336
- "model, tokenizer = load_model(checkpoint, max_length)\n",
1337
- "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n",
1338
- "model.to(device)\n",
1339
- "model.eval()\n",
1340
- "\n",
1341
- "def normalize_scores(scores):\n",
1342
- " min_score = np.min(scores)\n",
1343
- " max_score = np.max(scores)\n",
1344
- " return (scores - min_score) / (max_score - min_score) if max_score > min_score else scores\n",
1345
- " \n",
1346
- "def read_mol(pdb_path):\n",
1347
- " \"\"\"Read PDB file and return its content as a string\"\"\"\n",
1348
- " with open(pdb_path, 'r') as f:\n",
1349
- " return f.read()\n",
1350
- "\n",
1351
- "def fetch_pdb(pdb_id):\n",
1352
- " pdb_url = f'https://files.rcsb.org/download/{pdb_id}.pdb'\n",
1353
- " pdb_path = f'{pdb_id}.pdb'\n",
1354
- " response = requests.get(pdb_url)\n",
1355
- " if response.status_code == 200:\n",
1356
- " with open(pdb_path, 'wb') as f:\n",
1357
- " f.write(response.content)\n",
1358
- " return pdb_path\n",
1359
- " else:\n",
1360
- " return None\n",
1361
- "\n",
1362
- "def process_pdb(pdb_id, segment):\n",
1363
- " pdb_path = fetch_pdb(pdb_id)\n",
1364
- " if not pdb_path:\n",
1365
- " return \"Failed to fetch PDB file\", None, None\n",
1366
- " \n",
1367
- " parser = PDBParser(QUIET=1)\n",
1368
- " structure = parser.get_structure('protein', pdb_path)\n",
1369
- " \n",
1370
- " try:\n",
1371
- " chain = structure[0][segment]\n",
1372
- " except KeyError:\n",
1373
- " return \"Invalid Chain ID\", None, None\n",
1374
- " \n",
1375
- " \n",
1376
- " aa_dict = {\n",
1377
- " 'ALA': 'A', 'CYS': 'C', 'ASP': 'D', 'GLU': 'E', 'PHE': 'F',\n",
1378
- " 'GLY': 'G', 'HIS': 'H', 'ILE': 'I', 'LYS': 'K', 'LEU': 'L',\n",
1379
- " 'MET': 'M', 'ASN': 'N', 'PRO': 'P', 'GLN': 'Q', 'ARG': 'R',\n",
1380
- " 'SER': 'S', 'THR': 'T', 'VAL': 'V', 'TRP': 'W', 'TYR': 'Y',\n",
1381
- " 'MSE': 'M', 'SEP': 'S', 'TPO': 'T', 'CSO': 'C', 'PTR': 'Y', 'HYP': 'P'\n",
1382
- " }\n",
1383
- " \n",
1384
- " # Exclude non-amino acid residues\n",
1385
- " sequence = \"\".join(\n",
1386
- " aa_dict[residue.get_resname().strip()] \n",
1387
- " for residue in chain \n",
1388
- " if residue.get_resname().strip() in aa_dict\n",
1389
- " )\n",
1390
- " sequence2 = [\n",
1391
- " (res.id[1], res) for res in chain\n",
1392
- " if res.get_resname().strip() in aa_dict\n",
1393
- " ]\n",
1394
- " \n",
1395
- " # Prepare input for model prediction\n",
1396
- " input_ids = tokenizer(\" \".join(sequence), return_tensors=\"pt\").input_ids.to(device)\n",
1397
- " with torch.no_grad():\n",
1398
- " outputs = model(input_ids).logits.detach().cpu().numpy().squeeze()\n",
1399
- "\n",
1400
- " # Calculate scores and normalize them\n",
1401
- " scores = expit(outputs[:, 1] - outputs[:, 0])\n",
1402
- " normalized_scores = normalize_scores(scores)\n",
1403
- "\n",
1404
- " # Zip residues with scores to track the residue ID and score\n",
1405
- " residue_scores = [(resi, score) for (resi, _), score in zip(sequence2, normalized_scores)]\n",
1406
- " \n",
1407
- " result_str = \"\\n\".join([\n",
1408
- " f\"{res.get_resname()} {res.id[1]} {sequence[i]} {normalized_scores[i]:.2f}\" \n",
1409
- " for i, res in enumerate(chain) if res.get_resname().strip() in aa_dict\n",
1410
- " ])\n",
1411
- " \n",
1412
- " # Save the predictions to a file\n",
1413
- " prediction_file = f\"{pdb_id}_predictions.txt\"\n",
1414
- " with open(prediction_file, \"w\") as f:\n",
1415
- " f.write(result_str)\n",
1416
- " \n",
1417
- " return result_str, molecule(pdb_path, residue_scores, segment), prediction_file\n",
1418
- "\n",
1419
- "def molecule(input_pdb, residue_scores=None, segment='A'):\n",
1420
- " mol = read_mol(input_pdb) # Read PDB file content\n",
1421
- " \n",
1422
- " # Prepare high-scoring residues script if scores are provided\n",
1423
- " high_score_script = \"\"\n",
1424
- " if residue_scores is not None:\n",
1425
- " # Sort residues based on their scores\n",
1426
- " high_score_residues = [resi for resi, score in residue_scores if score > 0.75]\n",
1427
- " mid_score_residues = [resi for resi, score in residue_scores if 0.5 < score <= 0.75]\n",
1428
- " \n",
1429
- " high_score_script = \"\"\"\n",
1430
- " // Reset all styles first\n",
1431
- " viewer.getModel(0).setStyle({}, {});\n",
1432
- " \n",
1433
- " // Show only the selected chain\n",
1434
- " viewer.getModel(0).setStyle(\n",
1435
- " {\"chain\": \"%s\"}, \n",
1436
- " { cartoon: {colorscheme:\"whiteCarbon\"} }\n",
1437
- " );\n",
1438
- " \n",
1439
- " // Highlight high-scoring residues only for the selected chain\n",
1440
- " let highScoreResidues = [%s];\n",
1441
- " viewer.getModel(0).setStyle(\n",
1442
- " {\"chain\": \"%s\", \"resi\": highScoreResidues}, \n",
1443
- " {\"stick\": {\"color\": \"red\"}}\n",
1444
- " );\n",
1445
- "\n",
1446
- " // Highlight medium-scoring residues only for the selected chain\n",
1447
- " let midScoreResidues = [%s];\n",
1448
- " viewer.getModel(0).setStyle(\n",
1449
- " {\"chain\": \"%s\", \"resi\": midScoreResidues}, \n",
1450
- " {\"stick\": {\"color\": \"orange\"}}\n",
1451
- " );\n",
1452
- " \"\"\" % (segment, \n",
1453
- " \", \".join(str(resi) for resi in high_score_residues),\n",
1454
- " segment,\n",
1455
- " \", \".join(str(resi) for resi in mid_score_residues),\n",
1456
- " segment)\n",
1457
- " \n",
1458
- " html_content = f\"\"\"\n",
1459
- " <!DOCTYPE html>\n",
1460
- " <html>\n",
1461
- " <head> \n",
1462
- " <meta http-equiv=\"content-type\" content=\"text/html; charset=UTF-8\" />\n",
1463
- " <style>\n",
1464
- " .mol-container {{\n",
1465
- " width: 100%;\n",
1466
- " height: 700px;\n",
1467
- " position: relative;\n",
1468
- " }}\n",
1469
- " </style>\n",
1470
- " <script src=\"https://cdnjs.cloudflare.com/ajax/libs/jquery/3.6.3/jquery.min.js\"></script>\n",
1471
- " <script src=\"https://3Dmol.csb.pitt.edu/build/3Dmol-min.js\"></script>\n",
1472
- " </head>\n",
1473
- " <body>\n",
1474
- " <div id=\"container\" class=\"mol-container\"></div>\n",
1475
- " <script>\n",
1476
- " let pdb = `{mol}`; // Use template literal to properly escape PDB content\n",
1477
- " $(document).ready(function () {{\n",
1478
- " let element = $(\"#container\");\n",
1479
- " let config = {{ backgroundColor: \"white\" }};\n",
1480
- " let viewer = $3Dmol.createViewer(element, config);\n",
1481
- " viewer.addModel(pdb, \"pdb\");\n",
1482
- " \n",
1483
- " // Reset all styles and show only selected chain\n",
1484
- " viewer.getModel(0).setStyle(\n",
1485
- " {{\"chain\": \"{segment}\"}}, \n",
1486
- " {{ cartoon: {{ colorscheme:\"whiteCarbon\" }} }}\n",
1487
- " );\n",
1488
- " \n",
1489
- " {high_score_script}\n",
1490
- " \n",
1491
- " // Add hover functionality\n",
1492
- " viewer.setHoverable(\n",
1493
- " {{}}, \n",
1494
- " true, \n",
1495
- " function(atom, viewer, event, container) {{\n",
1496
- " if (!atom.label) {{\n",
1497
- " atom.label = viewer.addLabel(\n",
1498
- " atom.resn + \":\" +atom.resi + \":\" + atom.atom, \n",
1499
- " {{\n",
1500
- " position: atom, \n",
1501
- " backgroundColor: 'mintcream', \n",
1502
- " fontColor: 'black',\n",
1503
- " fontSize: 12,\n",
1504
- " padding: 2\n",
1505
- " }}\n",
1506
- " );\n",
1507
- " }}\n",
1508
- " }},\n",
1509
- " function(atom, viewer) {{\n",
1510
- " if (atom.label) {{\n",
1511
- " viewer.removeLabel(atom.label);\n",
1512
- " delete atom.label;\n",
1513
- " }}\n",
1514
- " }}\n",
1515
- " );\n",
1516
- " \n",
1517
- " viewer.zoomTo();\n",
1518
- " viewer.render();\n",
1519
- " viewer.zoom(0.8, 2000);\n",
1520
- " }});\n",
1521
- " </script>\n",
1522
- " </body>\n",
1523
- " </html>\n",
1524
- " \"\"\"\n",
1525
- " \n",
1526
- " # Return the HTML content within an iframe safely encoded for special characters\n",
1527
- " return f'<iframe width=\"100%\" height=\"700\" srcdoc=\"{html_content.replace(chr(34), \"&quot;\").replace(chr(39), \"&#39;\")}\"></iframe>'\n",
1528
- "\n",
1529
- "reps = [\n",
1530
- " {\n",
1531
- " \"model\": 0,\n",
1532
- " \"style\": \"cartoon\",\n",
1533
- " \"color\": \"whiteCarbon\",\n",
1534
- " \"residue_range\": \"\",\n",
1535
- " \"around\": 0,\n",
1536
- " \"byres\": False,\n",
1537
- " }\n",
1538
- " ]\n",
1539
- "\n",
1540
- "# Gradio UI\n",
1541
- "with gr.Blocks() as demo:\n",
1542
- " gr.Markdown(\"# Protein Binding Site Prediction\")\n",
1543
- " with gr.Row():\n",
1544
- " pdb_input = gr.Textbox(value=\"2IWI\", label=\"PDB ID\", placeholder=\"Enter PDB ID here...\")\n",
1545
- " visualize_btn = gr.Button(\"Visualize Structure\")\n",
1546
- "\n",
1547
- " molecule_output2 = Molecule3D(label=\"Protein Structure\", reps=reps)\n",
1548
- "\n",
1549
- " with gr.Row():\n",
1550
- " #pdb_input = gr.Textbox(value=\"2IWI\", label=\"PDB ID\", placeholder=\"Enter PDB ID here...\")\n",
1551
- " segment_input = gr.Textbox(value=\"A\", label=\"Chain ID\", placeholder=\"Enter Chain ID here...\")\n",
1552
- " prediction_btn = gr.Button(\"Predict Binding Site\")\n",
1553
- "\n",
1554
- " molecule_output = gr.HTML(label=\"Protein Structure\")\n",
1555
- " predictions_output = gr.Textbox(label=\"Binding Site Predictions\")\n",
1556
- " download_output = gr.File(label=\"Download Predictions\")\n",
1557
- " \n",
1558
- " visualize_btn.click(fetch_pdb, inputs=[pdb_input], outputs=molecule_output2)\n",
1559
- " \n",
1560
- " prediction_btn.click(process_pdb, inputs=[pdb_input, segment_input], outputs=[predictions_output, molecule_output, download_output])\n",
1561
- " \n",
1562
- " gr.Markdown(\"## Examples\")\n",
1563
- " gr.Examples(\n",
1564
- " examples=[\n",
1565
- " [\"7RPZ\", \"A\"],\n",
1566
- " [\"2IWI\", \"B\"],\n",
1567
- " [\"2F6V\", \"A\"]\n",
1568
- " ],\n",
1569
- " inputs=[pdb_input, segment_input],\n",
1570
- " outputs=[predictions_output, molecule_output, download_output]\n",
1571
- " )\n",
1572
- "\n",
1573
- "demo.launch(share=True)"
1574
- ]
1575
- }
1576
- ],
1577
- "metadata": {
1578
- "kernelspec": {
1579
- "display_name": "Python (LLM)",
1580
- "language": "python",
1581
- "name": "llm"
1582
- },
1583
- "language_info": {
1584
- "codemirror_mode": {
1585
- "name": "ipython",
1586
- "version": 3
1587
- },
1588
- "file_extension": ".py",
1589
- "mimetype": "text/x-python",
1590
- "name": "python",
1591
- "nbconvert_exporter": "python",
1592
- "pygments_lexer": "ipython3",
1593
- "version": "3.12.7"
1594
- }
1595
- },
1596
- "nbformat": 4,
1597
- "nbformat_minor": 5
1598
- }