Spaces:
Sleeping
Sleeping
Update
Browse files- .ipynb_checkpoints/app-checkpoint.py +22 -5
- app.py +22 -5
.ipynb_checkpoints/app-checkpoint.py
CHANGED
@@ -40,8 +40,6 @@ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
|
40 |
model.to(device)
|
41 |
model.eval()
|
42 |
|
43 |
-
reps = [{"model": 0, "style": "cartoon", "color": "spectrum"}]
|
44 |
-
|
45 |
# Function to fetch a PDB file
|
46 |
def fetch_pdb(pdb_id):
|
47 |
pdb_url = f'https://files.rcsb.org/download/{pdb_id}.pdb'
|
@@ -60,7 +58,6 @@ def normalize_scores(scores):
|
|
60 |
max_score = np.max(scores)
|
61 |
return (scores - min_score) / (max_score - min_score) if max_score > min_score else scores
|
62 |
|
63 |
-
# Extract sequence and predict binding scores
|
64 |
def process_pdb(pdb_id, segment):
|
65 |
pdb_path = fetch_pdb(pdb_id)
|
66 |
if not pdb_path:
|
@@ -70,25 +67,45 @@ def process_pdb(pdb_id, segment):
|
|
70 |
structure = parser.get_structure('protein', pdb_path)
|
71 |
chain = structure[0][segment]
|
72 |
|
73 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
74 |
|
|
|
75 |
input_ids = tokenizer(" ".join(sequence), return_tensors="pt").input_ids.to(device)
|
76 |
with torch.no_grad():
|
77 |
outputs = model(input_ids).logits.detach().cpu().numpy().squeeze()
|
78 |
|
|
|
79 |
scores = expit(outputs[:, 1] - outputs[:, 0])
|
80 |
normalized_scores = normalize_scores(scores)
|
81 |
|
|
|
82 |
result_str = "\n".join([
|
83 |
f"{res.get_resname()} {res.id[1]} {sequence[i]} {normalized_scores[i]:.2f}"
|
84 |
-
for i, res in enumerate(chain)
|
85 |
])
|
86 |
|
|
|
87 |
with open(f"{pdb_id}_predictions.txt", "w") as f:
|
88 |
f.write(result_str)
|
89 |
|
90 |
return result_str, pdb_path, f"{pdb_id}_predictions.txt"
|
91 |
|
|
|
|
|
92 |
# Gradio UI
|
93 |
with gr.Blocks() as demo:
|
94 |
gr.Markdown("# Protein Binding Site Prediction")
|
|
|
40 |
model.to(device)
|
41 |
model.eval()
|
42 |
|
|
|
|
|
43 |
# Function to fetch a PDB file
|
44 |
def fetch_pdb(pdb_id):
|
45 |
pdb_url = f'https://files.rcsb.org/download/{pdb_id}.pdb'
|
|
|
58 |
max_score = np.max(scores)
|
59 |
return (scores - min_score) / (max_score - min_score) if max_score > min_score else scores
|
60 |
|
|
|
61 |
def process_pdb(pdb_id, segment):
|
62 |
pdb_path = fetch_pdb(pdb_id)
|
63 |
if not pdb_path:
|
|
|
67 |
structure = parser.get_structure('protein', pdb_path)
|
68 |
chain = structure[0][segment]
|
69 |
|
70 |
+
# Comprehensive amino acid mapping
|
71 |
+
aa_dict = {
|
72 |
+
'ALA': 'A', 'CYS': 'C', 'ASP': 'D', 'GLU': 'E', 'PHE': 'F',
|
73 |
+
'GLY': 'G', 'HIS': 'H', 'ILE': 'I', 'LYS': 'K', 'LEU': 'L',
|
74 |
+
'MET': 'M', 'ASN': 'N', 'PRO': 'P', 'GLN': 'Q', 'ARG': 'R',
|
75 |
+
'SER': 'S', 'THR': 'T', 'VAL': 'V', 'TRP': 'W', 'TYR': 'Y',
|
76 |
+
'MSE': 'M', 'SEP': 'S', 'TPO': 'T', 'CSO': 'C', 'PTR': 'Y', 'HYP': 'P'
|
77 |
+
}
|
78 |
+
|
79 |
+
# Exclude non-amino acid residues
|
80 |
+
sequence = "".join(
|
81 |
+
aa_dict[residue.get_resname().strip()]
|
82 |
+
for residue in chain
|
83 |
+
if residue.get_resname().strip() in aa_dict
|
84 |
+
)
|
85 |
|
86 |
+
# Prepare input for model prediction
|
87 |
input_ids = tokenizer(" ".join(sequence), return_tensors="pt").input_ids.to(device)
|
88 |
with torch.no_grad():
|
89 |
outputs = model(input_ids).logits.detach().cpu().numpy().squeeze()
|
90 |
|
91 |
+
# Calculate scores and normalize them
|
92 |
scores = expit(outputs[:, 1] - outputs[:, 0])
|
93 |
normalized_scores = normalize_scores(scores)
|
94 |
|
95 |
+
# Prepare the result string, including only amino acid residues
|
96 |
result_str = "\n".join([
|
97 |
f"{res.get_resname()} {res.id[1]} {sequence[i]} {normalized_scores[i]:.2f}"
|
98 |
+
for i, res in enumerate(chain) if res.get_resname().strip() in aa_dict
|
99 |
])
|
100 |
|
101 |
+
# Save predictions to file
|
102 |
with open(f"{pdb_id}_predictions.txt", "w") as f:
|
103 |
f.write(result_str)
|
104 |
|
105 |
return result_str, pdb_path, f"{pdb_id}_predictions.txt"
|
106 |
|
107 |
+
reps = [{"model": 0, "style": "cartoon", "color": "spectrum"}]
|
108 |
+
|
109 |
# Gradio UI
|
110 |
with gr.Blocks() as demo:
|
111 |
gr.Markdown("# Protein Binding Site Prediction")
|
app.py
CHANGED
@@ -40,8 +40,6 @@ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
|
40 |
model.to(device)
|
41 |
model.eval()
|
42 |
|
43 |
-
reps = [{"model": 0, "style": "cartoon", "color": "spectrum"}]
|
44 |
-
|
45 |
# Function to fetch a PDB file
|
46 |
def fetch_pdb(pdb_id):
|
47 |
pdb_url = f'https://files.rcsb.org/download/{pdb_id}.pdb'
|
@@ -60,7 +58,6 @@ def normalize_scores(scores):
|
|
60 |
max_score = np.max(scores)
|
61 |
return (scores - min_score) / (max_score - min_score) if max_score > min_score else scores
|
62 |
|
63 |
-
# Extract sequence and predict binding scores
|
64 |
def process_pdb(pdb_id, segment):
|
65 |
pdb_path = fetch_pdb(pdb_id)
|
66 |
if not pdb_path:
|
@@ -70,25 +67,45 @@ def process_pdb(pdb_id, segment):
|
|
70 |
structure = parser.get_structure('protein', pdb_path)
|
71 |
chain = structure[0][segment]
|
72 |
|
73 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
74 |
|
|
|
75 |
input_ids = tokenizer(" ".join(sequence), return_tensors="pt").input_ids.to(device)
|
76 |
with torch.no_grad():
|
77 |
outputs = model(input_ids).logits.detach().cpu().numpy().squeeze()
|
78 |
|
|
|
79 |
scores = expit(outputs[:, 1] - outputs[:, 0])
|
80 |
normalized_scores = normalize_scores(scores)
|
81 |
|
|
|
82 |
result_str = "\n".join([
|
83 |
f"{res.get_resname()} {res.id[1]} {sequence[i]} {normalized_scores[i]:.2f}"
|
84 |
-
for i, res in enumerate(chain)
|
85 |
])
|
86 |
|
|
|
87 |
with open(f"{pdb_id}_predictions.txt", "w") as f:
|
88 |
f.write(result_str)
|
89 |
|
90 |
return result_str, pdb_path, f"{pdb_id}_predictions.txt"
|
91 |
|
|
|
|
|
92 |
# Gradio UI
|
93 |
with gr.Blocks() as demo:
|
94 |
gr.Markdown("# Protein Binding Site Prediction")
|
|
|
40 |
model.to(device)
|
41 |
model.eval()
|
42 |
|
|
|
|
|
43 |
# Function to fetch a PDB file
|
44 |
def fetch_pdb(pdb_id):
|
45 |
pdb_url = f'https://files.rcsb.org/download/{pdb_id}.pdb'
|
|
|
58 |
max_score = np.max(scores)
|
59 |
return (scores - min_score) / (max_score - min_score) if max_score > min_score else scores
|
60 |
|
|
|
61 |
def process_pdb(pdb_id, segment):
|
62 |
pdb_path = fetch_pdb(pdb_id)
|
63 |
if not pdb_path:
|
|
|
67 |
structure = parser.get_structure('protein', pdb_path)
|
68 |
chain = structure[0][segment]
|
69 |
|
70 |
+
# Comprehensive amino acid mapping
|
71 |
+
aa_dict = {
|
72 |
+
'ALA': 'A', 'CYS': 'C', 'ASP': 'D', 'GLU': 'E', 'PHE': 'F',
|
73 |
+
'GLY': 'G', 'HIS': 'H', 'ILE': 'I', 'LYS': 'K', 'LEU': 'L',
|
74 |
+
'MET': 'M', 'ASN': 'N', 'PRO': 'P', 'GLN': 'Q', 'ARG': 'R',
|
75 |
+
'SER': 'S', 'THR': 'T', 'VAL': 'V', 'TRP': 'W', 'TYR': 'Y',
|
76 |
+
'MSE': 'M', 'SEP': 'S', 'TPO': 'T', 'CSO': 'C', 'PTR': 'Y', 'HYP': 'P'
|
77 |
+
}
|
78 |
+
|
79 |
+
# Exclude non-amino acid residues
|
80 |
+
sequence = "".join(
|
81 |
+
aa_dict[residue.get_resname().strip()]
|
82 |
+
for residue in chain
|
83 |
+
if residue.get_resname().strip() in aa_dict
|
84 |
+
)
|
85 |
|
86 |
+
# Prepare input for model prediction
|
87 |
input_ids = tokenizer(" ".join(sequence), return_tensors="pt").input_ids.to(device)
|
88 |
with torch.no_grad():
|
89 |
outputs = model(input_ids).logits.detach().cpu().numpy().squeeze()
|
90 |
|
91 |
+
# Calculate scores and normalize them
|
92 |
scores = expit(outputs[:, 1] - outputs[:, 0])
|
93 |
normalized_scores = normalize_scores(scores)
|
94 |
|
95 |
+
# Prepare the result string, including only amino acid residues
|
96 |
result_str = "\n".join([
|
97 |
f"{res.get_resname()} {res.id[1]} {sequence[i]} {normalized_scores[i]:.2f}"
|
98 |
+
for i, res in enumerate(chain) if res.get_resname().strip() in aa_dict
|
99 |
])
|
100 |
|
101 |
+
# Save predictions to file
|
102 |
with open(f"{pdb_id}_predictions.txt", "w") as f:
|
103 |
f.write(result_str)
|
104 |
|
105 |
return result_str, pdb_path, f"{pdb_id}_predictions.txt"
|
106 |
|
107 |
+
reps = [{"model": 0, "style": "cartoon", "color": "spectrum"}]
|
108 |
+
|
109 |
# Gradio UI
|
110 |
with gr.Blocks() as demo:
|
111 |
gr.Markdown("# Protein Binding Site Prediction")
|