ThorbenFroehlking commited on
Commit
160210a
·
1 Parent(s): 5ff1286
Files changed (2) hide show
  1. .ipynb_checkpoints/app-checkpoint.py +13 -44
  2. app.py +6 -43
.ipynb_checkpoints/app-checkpoint.py CHANGED
@@ -1,7 +1,7 @@
1
  from datetime import datetime
2
  import gradio as gr
3
  import requests
4
- from Bio.PDB import PDBParser, MMCIFParser, PDBIO
5
  from Bio.PDB.Polypeptide import is_aa
6
  from Bio.SeqUtils import seq1
7
  from typing import Optional, Tuple
@@ -9,7 +9,6 @@ import numpy as np
9
  import os
10
  from gradio_molecule3d import Molecule3D
11
 
12
-
13
  from model_loader import load_model
14
 
15
  import torch
@@ -21,31 +20,13 @@ import re
21
  import pandas as pd
22
  import copy
23
 
24
- import transformers, datasets
25
- from transformers import AutoTokenizer
26
- from transformers import DataCollatorForTokenClassification
27
 
28
  from datasets import Dataset
29
 
30
  from scipy.special import expit
31
 
32
- from datetime import datetime
33
- import gradio as gr
34
- import requests
35
- from Bio.PDB import PDBParser, MMCIFParser, PDBIO
36
- from Bio.PDB.Polypeptide import is_aa
37
- from Bio.SeqUtils import seq1
38
- from typing import Optional, Tuple
39
- import numpy as np
40
- import os
41
- from gradio_molecule3d import Molecule3D
42
-
43
- import re
44
- import pandas as pd
45
- import copy
46
-
47
- from scipy.special import expit
48
-
49
 
50
  # Load model and move to device
51
  checkpoint = 'ThorbenF/prot_t5_xl_uniref50'
@@ -55,24 +36,6 @@ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
55
  model.to(device)
56
  model.eval()
57
 
58
- from datetime import datetime
59
- import gradio as gr
60
- import requests
61
- from Bio.PDB import PDBParser, MMCIFParser, PDBIO
62
- from Bio.PDB.Polypeptide import is_aa
63
- from Bio.SeqUtils import seq1
64
- from Bio.PDB import Select
65
- from typing import Optional, Tuple
66
- import numpy as np
67
- import os
68
- from gradio_molecule3d import Molecule3D
69
-
70
- import re
71
- import pandas as pd
72
- import copy
73
-
74
- from scipy.special import expit
75
-
76
  def normalize_scores(scores):
77
  min_score = np.min(scores)
78
  max_score = np.max(scores)
@@ -232,7 +195,13 @@ def process_pdb(pdb_id_or_file, segment):
232
  sequence = "".join(seq1(res.resname) for res in protein_residues)
233
  sequence_id = [res.id[1] for res in protein_residues]
234
 
235
- scores = np.random.rand(len(sequence))
 
 
 
 
 
 
236
  normalized_scores = normalize_scores(scores)
237
 
238
  # Zip residues with scores to track the residue ID and score
@@ -404,8 +373,8 @@ def molecule(input_pdb, residue_scores=None, segment='A'):
404
  position: atom,
405
  backgroundColor: 'mintcream',
406
  fontColor: 'black',
407
- fontSize: 12,
408
- padding: 2
409
  }}
410
  );
411
  }}
@@ -464,7 +433,7 @@ with gr.Blocks() as demo:
464
 
465
  molecule_output = gr.HTML(label="Protein Structure")
466
  explanation_vis = gr.Markdown("""
467
- Residues with a score > 0.5 are considered binding sites and represented as sticks with the score dependent colorcoding:
468
  - 0.5-0.6: blue
469
  - 0.6–0.7: light blue
470
  - 0.7–0.8: white
 
1
  from datetime import datetime
2
  import gradio as gr
3
  import requests
4
+ from Bio.PDB import PDBParser, MMCIFParser, PDBIO, Select
5
  from Bio.PDB.Polypeptide import is_aa
6
  from Bio.SeqUtils import seq1
7
  from typing import Optional, Tuple
 
9
  import os
10
  from gradio_molecule3d import Molecule3D
11
 
 
12
  from model_loader import load_model
13
 
14
  import torch
 
20
  import pandas as pd
21
  import copy
22
 
23
+ import transformers
24
+ from transformers import AutoTokenizer, DataCollatorForTokenClassification
 
25
 
26
  from datasets import Dataset
27
 
28
  from scipy.special import expit
29
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
  # Load model and move to device
32
  checkpoint = 'ThorbenF/prot_t5_xl_uniref50'
 
36
  model.to(device)
37
  model.eval()
38
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  def normalize_scores(scores):
40
  min_score = np.min(scores)
41
  max_score = np.max(scores)
 
195
  sequence = "".join(seq1(res.resname) for res in protein_residues)
196
  sequence_id = [res.id[1] for res in protein_residues]
197
 
198
+ input_ids = tokenizer(" ".join(sequence), return_tensors="pt").input_ids.to(device)
199
+ with torch.no_grad():
200
+ outputs = model(input_ids).logits.detach().cpu().numpy().squeeze()
201
+
202
+ # Calculate scores and normalize them
203
+ scores = expit(outputs[:, 1] - outputs[:, 0])
204
+
205
  normalized_scores = normalize_scores(scores)
206
 
207
  # Zip residues with scores to track the residue ID and score
 
373
  position: atom,
374
  backgroundColor: 'mintcream',
375
  fontColor: 'black',
376
+ fontSize: 18,
377
+ padding: 4
378
  }}
379
  );
380
  }}
 
433
 
434
  molecule_output = gr.HTML(label="Protein Structure")
435
  explanation_vis = gr.Markdown("""
436
+ Residues with a score > 0.5 are represented as sticks with a score dependent colorcoding:
437
  - 0.5-0.6: blue
438
  - 0.6–0.7: light blue
439
  - 0.7–0.8: white
app.py CHANGED
@@ -1,7 +1,7 @@
1
  from datetime import datetime
2
  import gradio as gr
3
  import requests
4
- from Bio.PDB import PDBParser, MMCIFParser, PDBIO
5
  from Bio.PDB.Polypeptide import is_aa
6
  from Bio.SeqUtils import seq1
7
  from typing import Optional, Tuple
@@ -9,7 +9,6 @@ import numpy as np
9
  import os
10
  from gradio_molecule3d import Molecule3D
11
 
12
-
13
  from model_loader import load_model
14
 
15
  import torch
@@ -21,31 +20,13 @@ import re
21
  import pandas as pd
22
  import copy
23
 
24
- import transformers, datasets
25
- from transformers import AutoTokenizer
26
- from transformers import DataCollatorForTokenClassification
27
 
28
  from datasets import Dataset
29
 
30
  from scipy.special import expit
31
 
32
- from datetime import datetime
33
- import gradio as gr
34
- import requests
35
- from Bio.PDB import PDBParser, MMCIFParser, PDBIO
36
- from Bio.PDB.Polypeptide import is_aa
37
- from Bio.SeqUtils import seq1
38
- from typing import Optional, Tuple
39
- import numpy as np
40
- import os
41
- from gradio_molecule3d import Molecule3D
42
-
43
- import re
44
- import pandas as pd
45
- import copy
46
-
47
- from scipy.special import expit
48
-
49
 
50
  # Load model and move to device
51
  checkpoint = 'ThorbenF/prot_t5_xl_uniref50'
@@ -55,24 +36,6 @@ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
55
  model.to(device)
56
  model.eval()
57
 
58
- from datetime import datetime
59
- import gradio as gr
60
- import requests
61
- from Bio.PDB import PDBParser, MMCIFParser, PDBIO
62
- from Bio.PDB.Polypeptide import is_aa
63
- from Bio.SeqUtils import seq1
64
- from Bio.PDB import Select
65
- from typing import Optional, Tuple
66
- import numpy as np
67
- import os
68
- from gradio_molecule3d import Molecule3D
69
-
70
- import re
71
- import pandas as pd
72
- import copy
73
-
74
- from scipy.special import expit
75
-
76
  def normalize_scores(scores):
77
  min_score = np.min(scores)
78
  max_score = np.max(scores)
@@ -410,8 +373,8 @@ def molecule(input_pdb, residue_scores=None, segment='A'):
410
  position: atom,
411
  backgroundColor: 'mintcream',
412
  fontColor: 'black',
413
- fontSize: 12,
414
- padding: 2
415
  }}
416
  );
417
  }}
@@ -470,7 +433,7 @@ with gr.Blocks() as demo:
470
 
471
  molecule_output = gr.HTML(label="Protein Structure")
472
  explanation_vis = gr.Markdown("""
473
- Residues with a score > 0.5 are considered binding sites and represented as sticks with the score dependent colorcoding:
474
  - 0.5-0.6: blue
475
  - 0.6–0.7: light blue
476
  - 0.7–0.8: white
 
1
  from datetime import datetime
2
  import gradio as gr
3
  import requests
4
+ from Bio.PDB import PDBParser, MMCIFParser, PDBIO, Select
5
  from Bio.PDB.Polypeptide import is_aa
6
  from Bio.SeqUtils import seq1
7
  from typing import Optional, Tuple
 
9
  import os
10
  from gradio_molecule3d import Molecule3D
11
 
 
12
  from model_loader import load_model
13
 
14
  import torch
 
20
  import pandas as pd
21
  import copy
22
 
23
+ import transformers
24
+ from transformers import AutoTokenizer, DataCollatorForTokenClassification
 
25
 
26
  from datasets import Dataset
27
 
28
  from scipy.special import expit
29
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
  # Load model and move to device
32
  checkpoint = 'ThorbenF/prot_t5_xl_uniref50'
 
36
  model.to(device)
37
  model.eval()
38
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  def normalize_scores(scores):
40
  min_score = np.min(scores)
41
  max_score = np.max(scores)
 
373
  position: atom,
374
  backgroundColor: 'mintcream',
375
  fontColor: 'black',
376
+ fontSize: 18,
377
+ padding: 4
378
  }}
379
  );
380
  }}
 
433
 
434
  molecule_output = gr.HTML(label="Protein Structure")
435
  explanation_vis = gr.Markdown("""
436
+ Residues with a score > 0.5 are represented as sticks with a score dependent colorcoding:
437
  - 0.5-0.6: blue
438
  - 0.6–0.7: light blue
439
  - 0.7–0.8: white