l-mizrahi commited on
Commit
f431a67
·
1 Parent(s): 54915a9

add dockerfile and methods for secretion scores

Browse files
Files changed (2) hide show
  1. Dockerfile.secretion +20 -0
  2. secretion_scores.py +88 -0
Dockerfile.secretion ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use official Rosetta as base image
2
+ FROM rosettacommons/rosetta:latest
3
+
4
+ # Install system dependencies
5
+ RUN apt-get update && apt-get install -y \
6
+ build-essential \
7
+ wget \
8
+ && rm -rf /var/lib/apt/lists/*
9
+
10
+ # Set up working directory
11
+ WORKDIR /app
12
+
13
+ # Copy Aggrescan3D package
14
+ COPY aggrescan3d /app/aggrescan3d
15
+
16
+ # Install Aggrescan3D and its dependencies
17
+ RUN pip install --no-cache-dir /app/aggrescan3d
18
+
19
+ # Default command
20
+ CMD ["/bin/bash"]
secretion_scores.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+ import pathlib
3
+ import pyrosetta
4
+ from pyrosetta.rosetta.protocols.protein_interface_design import movers
5
+
6
+ import aggrescan
7
+
8
+ pyrosetta.init()
9
+
10
+ def get_sap_scores(structure_path: str | pathlib.Path) -> list[float]:
11
+ """Calculate per-residue SAP scores.
12
+
13
+ Args:
14
+ structure_path: Path to the structure file.
15
+
16
+ Returns:
17
+ Dictionary of per-residue SAP scores.
18
+ """
19
+ pose = pyrosetta.pose_from_file(str(structure_path))
20
+ all_residues_selector = (
21
+ pyrosetta.rosetta.core.select.residue_selector.TrueResidueSelector()
22
+ )
23
+ per_res_sap_scores = (
24
+ pyrosetta.rosetta.core.pack.guidance_scoreterms.sap.calculate_per_res_sap(
25
+ pose,
26
+ all_residues_selector,
27
+ all_residues_selector,
28
+ all_residues_selector,
29
+ )
30
+ )
31
+ return list(per_res_sap_scores)
32
+
33
+
34
+ def get_dG_ins_pred_scores(structure_path: str | pathlib.Path) -> list[float]:
35
+ """Calculate per-residue dG_ins_pred scores.
36
+
37
+ The dGins,pred is predicted over sub-sequences using SecretionOptimizationMover.
38
+ For middle residues, a window size of 19 is used. For residues near sequence ends,
39
+ smaller windows are used to ensure accurate scoring.
40
+
41
+ Args:
42
+ structure_path: Path to the structure file.
43
+
44
+ Returns:
45
+ List of per-residue dG_ins scores.
46
+ """
47
+ pose = pyrosetta.pose_from_file(str(structure_path))
48
+ num_chains = pose.num_chains()
49
+ secoptmover = movers.SecretionOptimizationMover()
50
+ max_window_size = 19
51
+ per_res_scores = []
52
+
53
+ for chain_id in range(1, num_chains + 1):
54
+ sequence = pose.chain_sequence(chain_id)
55
+ for i in range(len(sequence)):
56
+ # Calculate window boundaries
57
+ if i < max_window_size // 2:
58
+ # Near start: use smaller window
59
+ start = 0
60
+ end = i + (max_window_size // 2) + 1
61
+ elif i > len(sequence) - (max_window_size // 2) - 1:
62
+ # Near end: use smaller window
63
+ end = len(sequence)
64
+ start = i - (max_window_size // 2)
65
+ else:
66
+ # Middle: use full window size
67
+ start = i - (max_window_size // 2)
68
+ end = i + (max_window_size // 2) + 1
69
+
70
+ sub_seq = sequence[start:end]
71
+ dg_ins = secoptmover.dG_ins_for_window(sub_seq)
72
+ per_res_scores.append(dg_ins)
73
+
74
+ return per_res_scores
75
+
76
+
77
+ def get_aggrescan_scores(structure_path: str | pathlib.Path) -> dict[str, float]:
78
+ """Calculation of Aggrescan scores.
79
+
80
+ Args:
81
+ structure_path: Path to the structure file.
82
+
83
+ Returns:
84
+ Dictionary of aggrescan scores.
85
+ """
86
+ agg_residues = aggrescan.run_aggrescan3d(str(structure_path))
87
+ agg_scores = [r.score for r in agg_residues]
88
+ return agg_scores