|
from __future__ import annotations |
|
import pathlib |
|
import pyrosetta |
|
from pyrosetta.rosetta.protocols.protein_interface_design import movers |
|
|
|
import aggrescan |
|
|
|
pyrosetta.init() |
|
|
|
def get_sap_scores(structure_path: str | pathlib.Path) -> list[float]: |
|
"""Calculate per-residue SAP scores. |
|
|
|
Args: |
|
structure_path: Path to the structure file. |
|
|
|
Returns: |
|
Dictionary of per-residue SAP scores. |
|
""" |
|
pose = pyrosetta.pose_from_file(str(structure_path)) |
|
all_residues_selector = ( |
|
pyrosetta.rosetta.core.select.residue_selector.TrueResidueSelector() |
|
) |
|
per_res_sap_scores = ( |
|
pyrosetta.rosetta.core.pack.guidance_scoreterms.sap.calculate_per_res_sap( |
|
pose, |
|
all_residues_selector, |
|
all_residues_selector, |
|
all_residues_selector, |
|
) |
|
) |
|
return list(per_res_sap_scores) |
|
|
|
|
|
def get_dG_ins_pred_scores(structure_path: str | pathlib.Path) -> list[float]: |
|
"""Calculate per-residue dG_ins_pred scores. |
|
|
|
The dGins,pred is predicted over sub-sequences using SecretionOptimizationMover. |
|
For middle residues, a window size of 19 is used. For residues near sequence ends, |
|
smaller windows are used to ensure accurate scoring. |
|
|
|
Args: |
|
structure_path: Path to the structure file. |
|
|
|
Returns: |
|
List of per-residue dG_ins scores. |
|
""" |
|
pose = pyrosetta.pose_from_file(str(structure_path)) |
|
num_chains = pose.num_chains() |
|
secoptmover = movers.SecretionOptimizationMover() |
|
max_window_size = 19 |
|
per_res_scores = [] |
|
|
|
for chain_id in range(1, num_chains + 1): |
|
sequence = pose.chain_sequence(chain_id) |
|
for i in range(len(sequence)): |
|
|
|
if i < max_window_size // 2: |
|
|
|
start = 0 |
|
end = i + (max_window_size // 2) + 1 |
|
elif i > len(sequence) - (max_window_size // 2) - 1: |
|
|
|
end = len(sequence) |
|
start = i - (max_window_size // 2) |
|
else: |
|
|
|
start = i - (max_window_size // 2) |
|
end = i + (max_window_size // 2) + 1 |
|
|
|
sub_seq = sequence[start:end] |
|
dg_ins = secoptmover.dG_ins_for_window(sub_seq) |
|
per_res_scores.append(dg_ins) |
|
|
|
return per_res_scores |
|
|
|
|
|
def get_aggrescan_scores(structure_path: str | pathlib.Path) -> dict[str, float]: |
|
"""Calculation of Aggrescan scores. |
|
|
|
Args: |
|
structure_path: Path to the structure file. |
|
|
|
Returns: |
|
Dictionary of aggrescan scores. |
|
""" |
|
agg_residues = aggrescan.run_aggrescan3d(str(structure_path)) |
|
agg_scores = [r.score for r in agg_residues] |
|
return agg_scores |
|
|