ThorbenF commited on
Commit
8bd6bbb
·
1 Parent(s): a28eeb5
.ipynb_checkpoints/app-checkpoint.py CHANGED
@@ -22,11 +22,16 @@ from scipy.special import expit
22
 
23
  import requests
24
 
 
 
25
  # Biopython imports
26
  from Bio.PDB import PDBParser, Select, PDBIO
27
  from Bio.PDB.DSSP import DSSP
28
  import Bio.PDB.PDBList as PDBList
29
 
 
 
 
30
  # Configuration
31
  checkpoint = 'ThorbenF/prot_t5_xl_uniref50'
32
  max_length = 1500
@@ -210,64 +215,77 @@ def fetch_pdb(pdb_id):
210
  print(f"Error fetching PDB: {e}")
211
  return None
212
 
 
 
 
 
 
 
 
 
213
  def process_pdb(pdb_id):
214
  # Fetch PDB file
215
- # Use PDBList to download the file if it doesn't exist locally
216
  pdbl = PDBList.PDBList()
217
  pdb_path = pdbl.retrieve_pdb_file(pdb_id, pdir='pdb_files', file_format='pdb')
218
-
219
  if not pdb_path or not os.path.exists(pdb_path):
220
  return "Failed to fetch PDB file", None
221
 
222
  # Extract protein sequence and chain
223
  protein_sequence, chain, filtered_pdb_path = extract_protein_sequence(pdb_path)
224
-
225
  if not protein_sequence:
226
  return "No suitable protein sequence found", None
227
 
228
  # Predict binding sites
229
  sequence, normalized_scores = predict_protein_sequence(protein_sequence)
230
-
231
  # Prepare result string
232
  result_str = "\n".join([f"{aa}: {score:.2f}" for aa, score in zip(sequence, normalized_scores)])
233
-
234
- return result_str, filtered_pdb_path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
235
 
236
  # Create Gradio interface
237
  with gr.Blocks() as demo:
238
  gr.Markdown("# Protein Binding Site Prediction")
239
-
240
  with gr.Row():
241
  with gr.Column():
242
- # PDB ID input with default suggestion
243
  pdb_input = gr.Textbox(
244
- value="2IWI",
245
  label="PDB ID",
246
  placeholder="Enter PDB ID here..."
247
  )
248
-
249
- # Predict button
250
  predict_btn = gr.Button("Predict Binding Sites")
251
-
252
  with gr.Column():
253
- # Binding site predictions output
254
  predictions_output = gr.Textbox(
255
  label="Binding Site Predictions"
256
  )
257
-
258
- # 3D Molecule visualization
259
- molecule_output = Molecule3D(
260
- label="Protein Structure"
261
- )
262
-
263
  # Prediction logic
264
  predict_btn.click(
265
- process_pdb,
266
- inputs=[pdb_input],
267
  outputs=[predictions_output, molecule_output]
268
  )
269
 
270
- # Add some example inputs
271
  gr.Markdown("## Examples")
272
  gr.Examples(
273
  examples=[
 
22
 
23
  import requests
24
 
25
+ from gradio_molecule3d import Molecule3D
26
+
27
  # Biopython imports
28
  from Bio.PDB import PDBParser, Select, PDBIO
29
  from Bio.PDB.DSSP import DSSP
30
  import Bio.PDB.PDBList as PDBList
31
 
32
+ from matplotlib import cm # For color mapping
33
+ from matplotlib.colors import Normalize
34
+
35
  # Configuration
36
  checkpoint = 'ThorbenF/prot_t5_xl_uniref50'
37
  max_length = 1500
 
215
  print(f"Error fetching PDB: {e}")
216
  return None
217
 
218
+ # Function to map scores to colors (blue for low scores, red for high scores)
219
+ def score_to_color(score):
220
+ norm = Normalize(vmin=0, vmax=1) # Assuming scores are normalized between 0 and 1
221
+ color_map = cm.get_cmap('coolwarm') # Use a blue-to-red colormap
222
+ rgba = color_map(norm(score)) # Get RGBA values
223
+ hex_color = '#{:02x}{:02x}{:02x}'.format(int(rgba[0] * 255), int(rgba[1] * 255), int(rgba[2] * 255))
224
+ return hex_color
225
+
226
  def process_pdb(pdb_id):
227
  # Fetch PDB file
 
228
  pdbl = PDBList.PDBList()
229
  pdb_path = pdbl.retrieve_pdb_file(pdb_id, pdir='pdb_files', file_format='pdb')
230
+
231
  if not pdb_path or not os.path.exists(pdb_path):
232
  return "Failed to fetch PDB file", None
233
 
234
  # Extract protein sequence and chain
235
  protein_sequence, chain, filtered_pdb_path = extract_protein_sequence(pdb_path)
236
+
237
  if not protein_sequence:
238
  return "No suitable protein sequence found", None
239
 
240
  # Predict binding sites
241
  sequence, normalized_scores = predict_protein_sequence(protein_sequence)
242
+
243
  # Prepare result string
244
  result_str = "\n".join([f"{aa}: {score:.2f}" for aa, score in zip(sequence, normalized_scores)])
245
+
246
+ # Prepare residue-based coloring for Molecule3D
247
+ reps = []
248
+ for i, score in enumerate(normalized_scores):
249
+ reps.append({
250
+ "model": 0,
251
+ "chain": chain.get_id(),
252
+ "residue_range": f"{i}-{i}",
253
+ "style": "stick",
254
+ "color": score_to_color(score),
255
+ "byres": True,
256
+ "visible": True
257
+ })
258
+
259
+ molecule_viewer = Molecule3D(reps=reps)
260
+
261
+ return result_str, molecule_viewer
262
 
263
  # Create Gradio interface
264
  with gr.Blocks() as demo:
265
  gr.Markdown("# Protein Binding Site Prediction")
266
+
267
  with gr.Row():
268
  with gr.Column():
 
269
  pdb_input = gr.Textbox(
270
+ value="2IWI",
271
  label="PDB ID",
272
  placeholder="Enter PDB ID here..."
273
  )
 
 
274
  predict_btn = gr.Button("Predict Binding Sites")
275
+
276
  with gr.Column():
 
277
  predictions_output = gr.Textbox(
278
  label="Binding Site Predictions"
279
  )
280
+ molecule_output = Molecule3D(label="Protein Structure")
281
+
 
 
 
 
282
  # Prediction logic
283
  predict_btn.click(
284
+ process_pdb,
285
+ inputs=[pdb_input],
286
  outputs=[predictions_output, molecule_output]
287
  )
288
 
 
289
  gr.Markdown("## Examples")
290
  gr.Examples(
291
  examples=[
.ipynb_checkpoints/requirements-checkpoint.txt CHANGED
@@ -10,4 +10,5 @@ sentencepiece
10
  huggingface_hub>=0.15.0
11
  requests
12
  gradio_molecule3d
13
- biopython>=1.81
 
 
10
  huggingface_hub>=0.15.0
11
  requests
12
  gradio_molecule3d
13
+ biopython>=1.81
14
+ matplotlib
app.py CHANGED
@@ -22,11 +22,16 @@ from scipy.special import expit
22
 
23
  import requests
24
 
 
 
25
  # Biopython imports
26
  from Bio.PDB import PDBParser, Select, PDBIO
27
  from Bio.PDB.DSSP import DSSP
28
  import Bio.PDB.PDBList as PDBList
29
 
 
 
 
30
  # Configuration
31
  checkpoint = 'ThorbenF/prot_t5_xl_uniref50'
32
  max_length = 1500
@@ -210,64 +215,77 @@ def fetch_pdb(pdb_id):
210
  print(f"Error fetching PDB: {e}")
211
  return None
212
 
 
 
 
 
 
 
 
 
213
  def process_pdb(pdb_id):
214
  # Fetch PDB file
215
- # Use PDBList to download the file if it doesn't exist locally
216
  pdbl = PDBList.PDBList()
217
  pdb_path = pdbl.retrieve_pdb_file(pdb_id, pdir='pdb_files', file_format='pdb')
218
-
219
  if not pdb_path or not os.path.exists(pdb_path):
220
  return "Failed to fetch PDB file", None
221
 
222
  # Extract protein sequence and chain
223
  protein_sequence, chain, filtered_pdb_path = extract_protein_sequence(pdb_path)
224
-
225
  if not protein_sequence:
226
  return "No suitable protein sequence found", None
227
 
228
  # Predict binding sites
229
  sequence, normalized_scores = predict_protein_sequence(protein_sequence)
230
-
231
  # Prepare result string
232
  result_str = "\n".join([f"{aa}: {score:.2f}" for aa, score in zip(sequence, normalized_scores)])
233
-
234
- return result_str, filtered_pdb_path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
235
 
236
  # Create Gradio interface
237
  with gr.Blocks() as demo:
238
  gr.Markdown("# Protein Binding Site Prediction")
239
-
240
  with gr.Row():
241
  with gr.Column():
242
- # PDB ID input with default suggestion
243
  pdb_input = gr.Textbox(
244
- value="2IWI",
245
  label="PDB ID",
246
  placeholder="Enter PDB ID here..."
247
  )
248
-
249
- # Predict button
250
  predict_btn = gr.Button("Predict Binding Sites")
251
-
252
  with gr.Column():
253
- # Binding site predictions output
254
  predictions_output = gr.Textbox(
255
  label="Binding Site Predictions"
256
  )
257
-
258
- # 3D Molecule visualization
259
- molecule_output = Molecule3D(
260
- label="Protein Structure"
261
- )
262
-
263
  # Prediction logic
264
  predict_btn.click(
265
- process_pdb,
266
- inputs=[pdb_input],
267
  outputs=[predictions_output, molecule_output]
268
  )
269
 
270
- # Add some example inputs
271
  gr.Markdown("## Examples")
272
  gr.Examples(
273
  examples=[
 
22
 
23
  import requests
24
 
25
+ from gradio_molecule3d import Molecule3D
26
+
27
  # Biopython imports
28
  from Bio.PDB import PDBParser, Select, PDBIO
29
  from Bio.PDB.DSSP import DSSP
30
  import Bio.PDB.PDBList as PDBList
31
 
32
+ from matplotlib import cm # For color mapping
33
+ from matplotlib.colors import Normalize
34
+
35
  # Configuration
36
  checkpoint = 'ThorbenF/prot_t5_xl_uniref50'
37
  max_length = 1500
 
215
  print(f"Error fetching PDB: {e}")
216
  return None
217
 
218
+ # Function to map scores to colors (blue for low scores, red for high scores)
219
+ def score_to_color(score):
220
+ norm = Normalize(vmin=0, vmax=1) # Assuming scores are normalized between 0 and 1
221
+ color_map = cm.get_cmap('coolwarm') # Use a blue-to-red colormap
222
+ rgba = color_map(norm(score)) # Get RGBA values
223
+ hex_color = '#{:02x}{:02x}{:02x}'.format(int(rgba[0] * 255), int(rgba[1] * 255), int(rgba[2] * 255))
224
+ return hex_color
225
+
226
  def process_pdb(pdb_id):
227
  # Fetch PDB file
 
228
  pdbl = PDBList.PDBList()
229
  pdb_path = pdbl.retrieve_pdb_file(pdb_id, pdir='pdb_files', file_format='pdb')
230
+
231
  if not pdb_path or not os.path.exists(pdb_path):
232
  return "Failed to fetch PDB file", None
233
 
234
  # Extract protein sequence and chain
235
  protein_sequence, chain, filtered_pdb_path = extract_protein_sequence(pdb_path)
236
+
237
  if not protein_sequence:
238
  return "No suitable protein sequence found", None
239
 
240
  # Predict binding sites
241
  sequence, normalized_scores = predict_protein_sequence(protein_sequence)
242
+
243
  # Prepare result string
244
  result_str = "\n".join([f"{aa}: {score:.2f}" for aa, score in zip(sequence, normalized_scores)])
245
+
246
+ # Prepare residue-based coloring for Molecule3D
247
+ reps = []
248
+ for i, score in enumerate(normalized_scores):
249
+ reps.append({
250
+ "model": 0,
251
+ "chain": chain.get_id(),
252
+ "residue_range": f"{i}-{i}",
253
+ "style": "stick",
254
+ "color": score_to_color(score),
255
+ "byres": True,
256
+ "visible": True
257
+ })
258
+
259
+ molecule_viewer = Molecule3D(reps=reps)
260
+
261
+ return result_str, molecule_viewer
262
 
263
  # Create Gradio interface
264
  with gr.Blocks() as demo:
265
  gr.Markdown("# Protein Binding Site Prediction")
266
+
267
  with gr.Row():
268
  with gr.Column():
 
269
  pdb_input = gr.Textbox(
270
+ value="2IWI",
271
  label="PDB ID",
272
  placeholder="Enter PDB ID here..."
273
  )
 
 
274
  predict_btn = gr.Button("Predict Binding Sites")
275
+
276
  with gr.Column():
 
277
  predictions_output = gr.Textbox(
278
  label="Binding Site Predictions"
279
  )
280
+ molecule_output = Molecule3D(label="Protein Structure")
281
+
 
 
 
 
282
  # Prediction logic
283
  predict_btn.click(
284
+ process_pdb,
285
+ inputs=[pdb_input],
286
  outputs=[predictions_output, molecule_output]
287
  )
288
 
 
289
  gr.Markdown("## Examples")
290
  gr.Examples(
291
  examples=[
requirements.txt CHANGED
@@ -10,4 +10,5 @@ sentencepiece
10
  huggingface_hub>=0.15.0
11
  requests
12
  gradio_molecule3d
13
- biopython>=1.81
 
 
10
  huggingface_hub>=0.15.0
11
  requests
12
  gradio_molecule3d
13
+ biopython>=1.81
14
+ matplotlib