add dockerfile and methods for secretion scores
Browse files- Dockerfile.secretion +20 -0
- secretion_scores.py +88 -0
Dockerfile.secretion
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Use official Rosetta as base image
|
2 |
+
FROM rosettacommons/rosetta:latest
|
3 |
+
|
4 |
+
# Install system dependencies
|
5 |
+
RUN apt-get update && apt-get install -y \
|
6 |
+
build-essential \
|
7 |
+
wget \
|
8 |
+
&& rm -rf /var/lib/apt/lists/*
|
9 |
+
|
10 |
+
# Set up working directory
|
11 |
+
WORKDIR /app
|
12 |
+
|
13 |
+
# Copy Aggrescan3D package
|
14 |
+
COPY aggrescan3d /app/aggrescan3d
|
15 |
+
|
16 |
+
# Install Aggrescan3D and its dependencies
|
17 |
+
RUN pip install --no-cache-dir /app/aggrescan3d
|
18 |
+
|
19 |
+
# Default command
|
20 |
+
CMD ["/bin/bash"]
|
secretion_scores.py
ADDED
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from __future__ import annotations
|
2 |
+
import pathlib
|
3 |
+
import pyrosetta
|
4 |
+
from pyrosetta.rosetta.protocols.protein_interface_design import movers
|
5 |
+
|
6 |
+
import aggrescan
|
7 |
+
|
8 |
+
pyrosetta.init()
|
9 |
+
|
10 |
+
def get_sap_scores(structure_path: str | pathlib.Path) -> list[float]:
|
11 |
+
"""Calculate per-residue SAP scores.
|
12 |
+
|
13 |
+
Args:
|
14 |
+
structure_path: Path to the structure file.
|
15 |
+
|
16 |
+
Returns:
|
17 |
+
Dictionary of per-residue SAP scores.
|
18 |
+
"""
|
19 |
+
pose = pyrosetta.pose_from_file(str(structure_path))
|
20 |
+
all_residues_selector = (
|
21 |
+
pyrosetta.rosetta.core.select.residue_selector.TrueResidueSelector()
|
22 |
+
)
|
23 |
+
per_res_sap_scores = (
|
24 |
+
pyrosetta.rosetta.core.pack.guidance_scoreterms.sap.calculate_per_res_sap(
|
25 |
+
pose,
|
26 |
+
all_residues_selector,
|
27 |
+
all_residues_selector,
|
28 |
+
all_residues_selector,
|
29 |
+
)
|
30 |
+
)
|
31 |
+
return list(per_res_sap_scores)
|
32 |
+
|
33 |
+
|
34 |
+
def get_dG_ins_pred_scores(structure_path: str | pathlib.Path) -> list[float]:
|
35 |
+
"""Calculate per-residue dG_ins_pred scores.
|
36 |
+
|
37 |
+
The dGins,pred is predicted over sub-sequences using SecretionOptimizationMover.
|
38 |
+
For middle residues, a window size of 19 is used. For residues near sequence ends,
|
39 |
+
smaller windows are used to ensure accurate scoring.
|
40 |
+
|
41 |
+
Args:
|
42 |
+
structure_path: Path to the structure file.
|
43 |
+
|
44 |
+
Returns:
|
45 |
+
List of per-residue dG_ins scores.
|
46 |
+
"""
|
47 |
+
pose = pyrosetta.pose_from_file(str(structure_path))
|
48 |
+
num_chains = pose.num_chains()
|
49 |
+
secoptmover = movers.SecretionOptimizationMover()
|
50 |
+
max_window_size = 19
|
51 |
+
per_res_scores = []
|
52 |
+
|
53 |
+
for chain_id in range(1, num_chains + 1):
|
54 |
+
sequence = pose.chain_sequence(chain_id)
|
55 |
+
for i in range(len(sequence)):
|
56 |
+
# Calculate window boundaries
|
57 |
+
if i < max_window_size // 2:
|
58 |
+
# Near start: use smaller window
|
59 |
+
start = 0
|
60 |
+
end = i + (max_window_size // 2) + 1
|
61 |
+
elif i > len(sequence) - (max_window_size // 2) - 1:
|
62 |
+
# Near end: use smaller window
|
63 |
+
end = len(sequence)
|
64 |
+
start = i - (max_window_size // 2)
|
65 |
+
else:
|
66 |
+
# Middle: use full window size
|
67 |
+
start = i - (max_window_size // 2)
|
68 |
+
end = i + (max_window_size // 2) + 1
|
69 |
+
|
70 |
+
sub_seq = sequence[start:end]
|
71 |
+
dg_ins = secoptmover.dG_ins_for_window(sub_seq)
|
72 |
+
per_res_scores.append(dg_ins)
|
73 |
+
|
74 |
+
return per_res_scores
|
75 |
+
|
76 |
+
|
77 |
+
def get_aggrescan_scores(structure_path: str | pathlib.Path) -> dict[str, float]:
|
78 |
+
"""Calculation of Aggrescan scores.
|
79 |
+
|
80 |
+
Args:
|
81 |
+
structure_path: Path to the structure file.
|
82 |
+
|
83 |
+
Returns:
|
84 |
+
Dictionary of aggrescan scores.
|
85 |
+
"""
|
86 |
+
agg_residues = aggrescan.run_aggrescan3d(str(structure_path))
|
87 |
+
agg_scores = [r.score for r in agg_residues]
|
88 |
+
return agg_scores
|