File size: 2,869 Bytes
a3f3d91
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
from __future__ import annotations
import pathlib
import pyrosetta
from pyrosetta.rosetta.protocols.protein_interface_design import movers

import aggrescan

pyrosetta.init()

def get_sap_scores(structure_path: str | pathlib.Path) -> list[float]:
    """Calculate per-residue SAP scores.

    Args:
        structure_path: Path to the structure file.

    Returns:
        Dictionary of per-residue SAP scores.
    """
    pose = pyrosetta.pose_from_file(str(structure_path))
    all_residues_selector = (
        pyrosetta.rosetta.core.select.residue_selector.TrueResidueSelector()
    )
    per_res_sap_scores = (
        pyrosetta.rosetta.core.pack.guidance_scoreterms.sap.calculate_per_res_sap(
            pose,
            all_residues_selector,
            all_residues_selector,
            all_residues_selector,
        )
    )
    return list(per_res_sap_scores)


def get_dG_ins_pred_scores(structure_path: str | pathlib.Path) -> list[float]:
    """Calculate per-residue dG_ins_pred scores.

    The dGins,pred is predicted over sub-sequences using SecretionOptimizationMover.
    For middle residues, a window size of 19 is used. For residues near sequence ends,
    smaller windows are used to ensure accurate scoring.
    
    Args:
        structure_path: Path to the structure file.
        
    Returns:
        List of per-residue dG_ins scores.
    """
    pose = pyrosetta.pose_from_file(str(structure_path))
    num_chains = pose.num_chains()
    secoptmover = movers.SecretionOptimizationMover()
    max_window_size = 19
    per_res_scores = []
    
    for chain_id in range(1, num_chains + 1):
        sequence = pose.chain_sequence(chain_id)
        for i in range(len(sequence)):
            # Calculate window boundaries
            if i < max_window_size // 2:
                # Near start: use smaller window
                start = 0
                end = i + (max_window_size // 2) + 1
            elif i > len(sequence) - (max_window_size // 2) - 1:
                # Near end: use smaller window
                end = len(sequence)
                start = i - (max_window_size // 2)
            else:
                # Middle: use full window size
                start = i - (max_window_size // 2)
                end = i + (max_window_size // 2) + 1
                
            sub_seq = sequence[start:end]
            dg_ins = secoptmover.dG_ins_for_window(sub_seq)
            per_res_scores.append(dg_ins)
        
    return per_res_scores


def get_aggrescan_scores(structure_path: str | pathlib.Path) -> dict[str, float]:
    """Calculation of Aggrescan scores.

    Args:
        structure_path: Path to the structure file.

    Returns:
        Dictionary of aggrescan scores.
    """
    agg_residues = aggrescan.run_aggrescan3d(str(structure_path))
    agg_scores = [r.score for r in agg_residues]
    return agg_scores