Spaces:
Sleeping
Sleeping
ThorbenFroehlking
commited on
Commit
·
160210a
1
Parent(s):
5ff1286
Update
Browse files- .ipynb_checkpoints/app-checkpoint.py +13 -44
- app.py +6 -43
.ipynb_checkpoints/app-checkpoint.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
from datetime import datetime
|
2 |
import gradio as gr
|
3 |
import requests
|
4 |
-
from Bio.PDB import PDBParser, MMCIFParser, PDBIO
|
5 |
from Bio.PDB.Polypeptide import is_aa
|
6 |
from Bio.SeqUtils import seq1
|
7 |
from typing import Optional, Tuple
|
@@ -9,7 +9,6 @@ import numpy as np
|
|
9 |
import os
|
10 |
from gradio_molecule3d import Molecule3D
|
11 |
|
12 |
-
|
13 |
from model_loader import load_model
|
14 |
|
15 |
import torch
|
@@ -21,31 +20,13 @@ import re
|
|
21 |
import pandas as pd
|
22 |
import copy
|
23 |
|
24 |
-
import transformers
|
25 |
-
from transformers import AutoTokenizer
|
26 |
-
from transformers import DataCollatorForTokenClassification
|
27 |
|
28 |
from datasets import Dataset
|
29 |
|
30 |
from scipy.special import expit
|
31 |
|
32 |
-
from datetime import datetime
|
33 |
-
import gradio as gr
|
34 |
-
import requests
|
35 |
-
from Bio.PDB import PDBParser, MMCIFParser, PDBIO
|
36 |
-
from Bio.PDB.Polypeptide import is_aa
|
37 |
-
from Bio.SeqUtils import seq1
|
38 |
-
from typing import Optional, Tuple
|
39 |
-
import numpy as np
|
40 |
-
import os
|
41 |
-
from gradio_molecule3d import Molecule3D
|
42 |
-
|
43 |
-
import re
|
44 |
-
import pandas as pd
|
45 |
-
import copy
|
46 |
-
|
47 |
-
from scipy.special import expit
|
48 |
-
|
49 |
|
50 |
# Load model and move to device
|
51 |
checkpoint = 'ThorbenF/prot_t5_xl_uniref50'
|
@@ -55,24 +36,6 @@ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
|
55 |
model.to(device)
|
56 |
model.eval()
|
57 |
|
58 |
-
from datetime import datetime
|
59 |
-
import gradio as gr
|
60 |
-
import requests
|
61 |
-
from Bio.PDB import PDBParser, MMCIFParser, PDBIO
|
62 |
-
from Bio.PDB.Polypeptide import is_aa
|
63 |
-
from Bio.SeqUtils import seq1
|
64 |
-
from Bio.PDB import Select
|
65 |
-
from typing import Optional, Tuple
|
66 |
-
import numpy as np
|
67 |
-
import os
|
68 |
-
from gradio_molecule3d import Molecule3D
|
69 |
-
|
70 |
-
import re
|
71 |
-
import pandas as pd
|
72 |
-
import copy
|
73 |
-
|
74 |
-
from scipy.special import expit
|
75 |
-
|
76 |
def normalize_scores(scores):
|
77 |
min_score = np.min(scores)
|
78 |
max_score = np.max(scores)
|
@@ -232,7 +195,13 @@ def process_pdb(pdb_id_or_file, segment):
|
|
232 |
sequence = "".join(seq1(res.resname) for res in protein_residues)
|
233 |
sequence_id = [res.id[1] for res in protein_residues]
|
234 |
|
235 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
236 |
normalized_scores = normalize_scores(scores)
|
237 |
|
238 |
# Zip residues with scores to track the residue ID and score
|
@@ -404,8 +373,8 @@ def molecule(input_pdb, residue_scores=None, segment='A'):
|
|
404 |
position: atom,
|
405 |
backgroundColor: 'mintcream',
|
406 |
fontColor: 'black',
|
407 |
-
fontSize:
|
408 |
-
padding:
|
409 |
}}
|
410 |
);
|
411 |
}}
|
@@ -464,7 +433,7 @@ with gr.Blocks() as demo:
|
|
464 |
|
465 |
molecule_output = gr.HTML(label="Protein Structure")
|
466 |
explanation_vis = gr.Markdown("""
|
467 |
-
Residues with a score > 0.5 are
|
468 |
- 0.5-0.6: blue
|
469 |
- 0.6–0.7: light blue
|
470 |
- 0.7–0.8: white
|
|
|
1 |
from datetime import datetime
|
2 |
import gradio as gr
|
3 |
import requests
|
4 |
+
from Bio.PDB import PDBParser, MMCIFParser, PDBIO, Select
|
5 |
from Bio.PDB.Polypeptide import is_aa
|
6 |
from Bio.SeqUtils import seq1
|
7 |
from typing import Optional, Tuple
|
|
|
9 |
import os
|
10 |
from gradio_molecule3d import Molecule3D
|
11 |
|
|
|
12 |
from model_loader import load_model
|
13 |
|
14 |
import torch
|
|
|
20 |
import pandas as pd
|
21 |
import copy
|
22 |
|
23 |
+
import transformers
|
24 |
+
from transformers import AutoTokenizer, DataCollatorForTokenClassification
|
|
|
25 |
|
26 |
from datasets import Dataset
|
27 |
|
28 |
from scipy.special import expit
|
29 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
|
31 |
# Load model and move to device
|
32 |
checkpoint = 'ThorbenF/prot_t5_xl_uniref50'
|
|
|
36 |
model.to(device)
|
37 |
model.eval()
|
38 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
def normalize_scores(scores):
|
40 |
min_score = np.min(scores)
|
41 |
max_score = np.max(scores)
|
|
|
195 |
sequence = "".join(seq1(res.resname) for res in protein_residues)
|
196 |
sequence_id = [res.id[1] for res in protein_residues]
|
197 |
|
198 |
+
input_ids = tokenizer(" ".join(sequence), return_tensors="pt").input_ids.to(device)
|
199 |
+
with torch.no_grad():
|
200 |
+
outputs = model(input_ids).logits.detach().cpu().numpy().squeeze()
|
201 |
+
|
202 |
+
# Calculate scores and normalize them
|
203 |
+
scores = expit(outputs[:, 1] - outputs[:, 0])
|
204 |
+
|
205 |
normalized_scores = normalize_scores(scores)
|
206 |
|
207 |
# Zip residues with scores to track the residue ID and score
|
|
|
373 |
position: atom,
|
374 |
backgroundColor: 'mintcream',
|
375 |
fontColor: 'black',
|
376 |
+
fontSize: 18,
|
377 |
+
padding: 4
|
378 |
}}
|
379 |
);
|
380 |
}}
|
|
|
433 |
|
434 |
molecule_output = gr.HTML(label="Protein Structure")
|
435 |
explanation_vis = gr.Markdown("""
|
436 |
+
Residues with a score > 0.5 are represented as sticks with a score dependent colorcoding:
|
437 |
- 0.5-0.6: blue
|
438 |
- 0.6–0.7: light blue
|
439 |
- 0.7–0.8: white
|
app.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
from datetime import datetime
|
2 |
import gradio as gr
|
3 |
import requests
|
4 |
-
from Bio.PDB import PDBParser, MMCIFParser, PDBIO
|
5 |
from Bio.PDB.Polypeptide import is_aa
|
6 |
from Bio.SeqUtils import seq1
|
7 |
from typing import Optional, Tuple
|
@@ -9,7 +9,6 @@ import numpy as np
|
|
9 |
import os
|
10 |
from gradio_molecule3d import Molecule3D
|
11 |
|
12 |
-
|
13 |
from model_loader import load_model
|
14 |
|
15 |
import torch
|
@@ -21,31 +20,13 @@ import re
|
|
21 |
import pandas as pd
|
22 |
import copy
|
23 |
|
24 |
-
import transformers
|
25 |
-
from transformers import AutoTokenizer
|
26 |
-
from transformers import DataCollatorForTokenClassification
|
27 |
|
28 |
from datasets import Dataset
|
29 |
|
30 |
from scipy.special import expit
|
31 |
|
32 |
-
from datetime import datetime
|
33 |
-
import gradio as gr
|
34 |
-
import requests
|
35 |
-
from Bio.PDB import PDBParser, MMCIFParser, PDBIO
|
36 |
-
from Bio.PDB.Polypeptide import is_aa
|
37 |
-
from Bio.SeqUtils import seq1
|
38 |
-
from typing import Optional, Tuple
|
39 |
-
import numpy as np
|
40 |
-
import os
|
41 |
-
from gradio_molecule3d import Molecule3D
|
42 |
-
|
43 |
-
import re
|
44 |
-
import pandas as pd
|
45 |
-
import copy
|
46 |
-
|
47 |
-
from scipy.special import expit
|
48 |
-
|
49 |
|
50 |
# Load model and move to device
|
51 |
checkpoint = 'ThorbenF/prot_t5_xl_uniref50'
|
@@ -55,24 +36,6 @@ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
|
55 |
model.to(device)
|
56 |
model.eval()
|
57 |
|
58 |
-
from datetime import datetime
|
59 |
-
import gradio as gr
|
60 |
-
import requests
|
61 |
-
from Bio.PDB import PDBParser, MMCIFParser, PDBIO
|
62 |
-
from Bio.PDB.Polypeptide import is_aa
|
63 |
-
from Bio.SeqUtils import seq1
|
64 |
-
from Bio.PDB import Select
|
65 |
-
from typing import Optional, Tuple
|
66 |
-
import numpy as np
|
67 |
-
import os
|
68 |
-
from gradio_molecule3d import Molecule3D
|
69 |
-
|
70 |
-
import re
|
71 |
-
import pandas as pd
|
72 |
-
import copy
|
73 |
-
|
74 |
-
from scipy.special import expit
|
75 |
-
|
76 |
def normalize_scores(scores):
|
77 |
min_score = np.min(scores)
|
78 |
max_score = np.max(scores)
|
@@ -410,8 +373,8 @@ def molecule(input_pdb, residue_scores=None, segment='A'):
|
|
410 |
position: atom,
|
411 |
backgroundColor: 'mintcream',
|
412 |
fontColor: 'black',
|
413 |
-
fontSize:
|
414 |
-
padding:
|
415 |
}}
|
416 |
);
|
417 |
}}
|
@@ -470,7 +433,7 @@ with gr.Blocks() as demo:
|
|
470 |
|
471 |
molecule_output = gr.HTML(label="Protein Structure")
|
472 |
explanation_vis = gr.Markdown("""
|
473 |
-
Residues with a score > 0.5 are
|
474 |
- 0.5-0.6: blue
|
475 |
- 0.6–0.7: light blue
|
476 |
- 0.7–0.8: white
|
|
|
1 |
from datetime import datetime
|
2 |
import gradio as gr
|
3 |
import requests
|
4 |
+
from Bio.PDB import PDBParser, MMCIFParser, PDBIO, Select
|
5 |
from Bio.PDB.Polypeptide import is_aa
|
6 |
from Bio.SeqUtils import seq1
|
7 |
from typing import Optional, Tuple
|
|
|
9 |
import os
|
10 |
from gradio_molecule3d import Molecule3D
|
11 |
|
|
|
12 |
from model_loader import load_model
|
13 |
|
14 |
import torch
|
|
|
20 |
import pandas as pd
|
21 |
import copy
|
22 |
|
23 |
+
import transformers
|
24 |
+
from transformers import AutoTokenizer, DataCollatorForTokenClassification
|
|
|
25 |
|
26 |
from datasets import Dataset
|
27 |
|
28 |
from scipy.special import expit
|
29 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
|
31 |
# Load model and move to device
|
32 |
checkpoint = 'ThorbenF/prot_t5_xl_uniref50'
|
|
|
36 |
model.to(device)
|
37 |
model.eval()
|
38 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
def normalize_scores(scores):
|
40 |
min_score = np.min(scores)
|
41 |
max_score = np.max(scores)
|
|
|
373 |
position: atom,
|
374 |
backgroundColor: 'mintcream',
|
375 |
fontColor: 'black',
|
376 |
+
fontSize: 18,
|
377 |
+
padding: 4
|
378 |
}}
|
379 |
);
|
380 |
}}
|
|
|
433 |
|
434 |
molecule_output = gr.HTML(label="Protein Structure")
|
435 |
explanation_vis = gr.Markdown("""
|
436 |
+
Residues with a score > 0.5 are represented as sticks with a score dependent colorcoding:
|
437 |
- 0.5-0.6: blue
|
438 |
- 0.6–0.7: light blue
|
439 |
- 0.7–0.8: white
|