File size: 4,964 Bytes
ce7bf5b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
import copy
import tempfile
from pathlib import Path

import pytest

import chroma
from chroma.data.protein import Protein

BASE_PATH = str(Path(chroma.__file__).parent.parent)
PROTEIN_SINGLE_CHAIN = BASE_PATH + "/tests/resources/4kw4.cif"
PROTEIN_COMPLEX = BASE_PATH + "/tests/resources/3hn3.cif"
CIF_TRAJECTORY = BASE_PATH + "/tests/resources/chroma_trajectory.cif"
SEQUENCE = "MSKGEELFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTTGKLPVPWPTL"
PDB_ID = "1B9C"

TESTS = [PROTEIN_SINGLE_CHAIN, PROTEIN_COMPLEX, SEQUENCE, PDB_ID]


@pytest.mark.parametrize("protein_path", TESTS)
def test_Protein(protein_path):
    # Loading Smoke Tests
    if protein_path.endswith(".pdb"):
        protein = Protein.from_PDB(protein_path)
    elif protein_path.endswith(".cif"):
        protein = Protein.from_CIF(protein_path)
    elif len(protein_path) == 4:
        protein = Protein.from_PDBID(protein_path)
    else:  # Protein Sequence Input
        protein = Protein.from_sequence(protein_path)

    # Selection Smoke Test
    # Select all structured residues
    D = protein.get_mask("all").bool()

    # Method Smoke Tests
    protein.canonicalize()
    protein.sequence()
    len(protein)
    protein.display()

    # Cycles save / load /validate
    X, C, S = protein.to_XCS()

    # XCS
    xcs_cycle_protein = Protein.from_XCS(X, C, S)
    Xt, Ct, St = xcs_cycle_protein.to_XCS()
    assert (Xt == X).all() and (Ct == C).all() and (St == S).all()

    # CIF
    with tempfile.NamedTemporaryFile(suffix=".cif", delete=True) as temp_file:
        protein.to_CIF(temp_file.name)
        Xt, Ct, St = protein.from_CIF(temp_file.name).to_XCS()
        assert (Xt == X).all() and (Ct == C).all() and (St == S).all()

    # PDB
    with tempfile.NamedTemporaryFile(suffix=".pdb", delete=True) as temp_file:
        protein.to_PDB(temp_file.name)
        structured_residues = protein.sys.num_structured_residues()
        round_trip_protein = protein.from_PDB(temp_file.name)
        assert len(round_trip_protein) == structured_residues

    # smoke test copy behavior
    copy.copy(protein)
    copy.deepcopy(protein)


def compare_proteins(A, B):
    A, B = A.sys, B.sys
    if (
        A.num_chains() != B.num_chains()
        or A.num_residues() != B.num_residues()
        or A.num_atoms() != B.num_atoms()
        or A.num_atom_locations() != B.num_atom_locations()
        or A.num_structured_residues() != B.num_structured_residues()
    ):
        return False

    for cA, cB in zip(A.chains(), B.chains()):
        if (
            cA.num_residues() != cB.num_residues()
            or cA.cid != cB.cid
            or cA.segid != cB.segid
            or cA.authid != cB.authid
        ):
            print(f"chains {cA} and {cB} differ")
            return False
        for rA, rB in zip(cA.residues(), cB.residues()):
            if (
                rA.num_atoms() != rB.num_atoms()
                or rA.name != rA.name
                or rA.num != rB.num
                or rA.authid != rB.authid
                or rA.icode != rB.icode
            ):
                print(f"residues {rA} and {rB} differ")
                return False
            for aA, aB in zip(rA.atoms(), rB.atoms()):
                if (
                    aA.num_locations() != aB.num_locations()
                    or aA.name != aB.name
                    or aA.het != aB.het
                ):
                    print(f"atoms {aA} and {aB} differ")
                    return False
                for lA, lB in zip(aA.locations(), aB.locations()):
                    if (
                        (abs(lA.coors - lB.coors) > 0.01).any()
                        or lA.occ != lB.occ
                        or lA.B != lB.B
                        or lA.alt != lB.alt
                    ):
                        print(f"atoms {lA} and {lB} differ")
                        return False
    return True


def test_xcs_trajectory():
    # Load Trajectory
    protein = Protein(CIF_TRAJECTORY)

    # Save out Trajectory
    X_list, C, S = protein.to_XCS_trajectory()

    # Load back in via XCS
    protein_xcs_load = Protein(X_list, C, S)
    assert compare_proteins(protein, protein_xcs_load)

    # Print Trajectory
    print(protein)

    # Display Trajectory
    protein.display()


def test_trajectory_round_trip():
    # Load Trajectory
    protein = Protein(CIF_TRAJECTORY)

    # Save out Trajectory
    X_list, C, S = protein.to_XCS_trajectory()

    # Load back in via XCS
    protein_xcs_load = Protein.from_XCS_trajectory(X_list, C, S)
    assert compare_proteins(protein, protein_xcs_load)

    # Turn back into XCS
    X_list_1, C_1, S_1 = protein_xcs_load.to_XCS_trajectory()
    assert len(X_list) == len(X_list_1)
    assert [(x1 == x2).all() for x1, x2 in zip(X_list, X_list_1)]
    assert (C == C_1).all()
    assert (S == S_1).all()


@pytest.mark.parametrize("pdb_id", ["3bdi", "5sv5"])
def test_edge_cases(pdb_id):
    Protein(pdb_id, canonicalize=True)