folding-studio-demo / secretion_scores.py
jfaustin's picture
secretion-scores (#4)
a3f3d91 verified
from __future__ import annotations
import pathlib
import pyrosetta
from pyrosetta.rosetta.protocols.protein_interface_design import movers
import aggrescan
pyrosetta.init()
def get_sap_scores(structure_path: str | pathlib.Path) -> list[float]:
"""Calculate per-residue SAP scores.
Args:
structure_path: Path to the structure file.
Returns:
Dictionary of per-residue SAP scores.
"""
pose = pyrosetta.pose_from_file(str(structure_path))
all_residues_selector = (
pyrosetta.rosetta.core.select.residue_selector.TrueResidueSelector()
)
per_res_sap_scores = (
pyrosetta.rosetta.core.pack.guidance_scoreterms.sap.calculate_per_res_sap(
pose,
all_residues_selector,
all_residues_selector,
all_residues_selector,
)
)
return list(per_res_sap_scores)
def get_dG_ins_pred_scores(structure_path: str | pathlib.Path) -> list[float]:
"""Calculate per-residue dG_ins_pred scores.
The dGins,pred is predicted over sub-sequences using SecretionOptimizationMover.
For middle residues, a window size of 19 is used. For residues near sequence ends,
smaller windows are used to ensure accurate scoring.
Args:
structure_path: Path to the structure file.
Returns:
List of per-residue dG_ins scores.
"""
pose = pyrosetta.pose_from_file(str(structure_path))
num_chains = pose.num_chains()
secoptmover = movers.SecretionOptimizationMover()
max_window_size = 19
per_res_scores = []
for chain_id in range(1, num_chains + 1):
sequence = pose.chain_sequence(chain_id)
for i in range(len(sequence)):
# Calculate window boundaries
if i < max_window_size // 2:
# Near start: use smaller window
start = 0
end = i + (max_window_size // 2) + 1
elif i > len(sequence) - (max_window_size // 2) - 1:
# Near end: use smaller window
end = len(sequence)
start = i - (max_window_size // 2)
else:
# Middle: use full window size
start = i - (max_window_size // 2)
end = i + (max_window_size // 2) + 1
sub_seq = sequence[start:end]
dg_ins = secoptmover.dG_ins_for_window(sub_seq)
per_res_scores.append(dg_ins)
return per_res_scores
def get_aggrescan_scores(structure_path: str | pathlib.Path) -> dict[str, float]:
"""Calculation of Aggrescan scores.
Args:
structure_path: Path to the structure file.
Returns:
Dictionary of aggrescan scores.
"""
agg_residues = aggrescan.run_aggrescan3d(str(structure_path))
agg_scores = [r.score for r in agg_residues]
return agg_scores