File size: 2,869 Bytes
a3f3d91 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 |
from __future__ import annotations
import pathlib
import pyrosetta
from pyrosetta.rosetta.protocols.protein_interface_design import movers
import aggrescan
pyrosetta.init()
def get_sap_scores(structure_path: str | pathlib.Path) -> list[float]:
"""Calculate per-residue SAP scores.
Args:
structure_path: Path to the structure file.
Returns:
Dictionary of per-residue SAP scores.
"""
pose = pyrosetta.pose_from_file(str(structure_path))
all_residues_selector = (
pyrosetta.rosetta.core.select.residue_selector.TrueResidueSelector()
)
per_res_sap_scores = (
pyrosetta.rosetta.core.pack.guidance_scoreterms.sap.calculate_per_res_sap(
pose,
all_residues_selector,
all_residues_selector,
all_residues_selector,
)
)
return list(per_res_sap_scores)
def get_dG_ins_pred_scores(structure_path: str | pathlib.Path) -> list[float]:
"""Calculate per-residue dG_ins_pred scores.
The dGins,pred is predicted over sub-sequences using SecretionOptimizationMover.
For middle residues, a window size of 19 is used. For residues near sequence ends,
smaller windows are used to ensure accurate scoring.
Args:
structure_path: Path to the structure file.
Returns:
List of per-residue dG_ins scores.
"""
pose = pyrosetta.pose_from_file(str(structure_path))
num_chains = pose.num_chains()
secoptmover = movers.SecretionOptimizationMover()
max_window_size = 19
per_res_scores = []
for chain_id in range(1, num_chains + 1):
sequence = pose.chain_sequence(chain_id)
for i in range(len(sequence)):
# Calculate window boundaries
if i < max_window_size // 2:
# Near start: use smaller window
start = 0
end = i + (max_window_size // 2) + 1
elif i > len(sequence) - (max_window_size // 2) - 1:
# Near end: use smaller window
end = len(sequence)
start = i - (max_window_size // 2)
else:
# Middle: use full window size
start = i - (max_window_size // 2)
end = i + (max_window_size // 2) + 1
sub_seq = sequence[start:end]
dg_ins = secoptmover.dG_ins_for_window(sub_seq)
per_res_scores.append(dg_ins)
return per_res_scores
def get_aggrescan_scores(structure_path: str | pathlib.Path) -> dict[str, float]:
"""Calculation of Aggrescan scores.
Args:
structure_path: Path to the structure file.
Returns:
Dictionary of aggrescan scores.
"""
agg_residues = aggrescan.run_aggrescan3d(str(structure_path))
agg_scores = [r.score for r in agg_residues]
return agg_scores
|