Spaces:
Sleeping
Sleeping
File size: 4,964 Bytes
ce7bf5b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 |
import copy
import tempfile
from pathlib import Path
import pytest
import chroma
from chroma.data.protein import Protein
BASE_PATH = str(Path(chroma.__file__).parent.parent)
PROTEIN_SINGLE_CHAIN = BASE_PATH + "/tests/resources/4kw4.cif"
PROTEIN_COMPLEX = BASE_PATH + "/tests/resources/3hn3.cif"
CIF_TRAJECTORY = BASE_PATH + "/tests/resources/chroma_trajectory.cif"
SEQUENCE = "MSKGEELFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTTGKLPVPWPTL"
PDB_ID = "1B9C"
TESTS = [PROTEIN_SINGLE_CHAIN, PROTEIN_COMPLEX, SEQUENCE, PDB_ID]
@pytest.mark.parametrize("protein_path", TESTS)
def test_Protein(protein_path):
# Loading Smoke Tests
if protein_path.endswith(".pdb"):
protein = Protein.from_PDB(protein_path)
elif protein_path.endswith(".cif"):
protein = Protein.from_CIF(protein_path)
elif len(protein_path) == 4:
protein = Protein.from_PDBID(protein_path)
else: # Protein Sequence Input
protein = Protein.from_sequence(protein_path)
# Selection Smoke Test
# Select all structured residues
D = protein.get_mask("all").bool()
# Method Smoke Tests
protein.canonicalize()
protein.sequence()
len(protein)
protein.display()
# Cycles save / load /validate
X, C, S = protein.to_XCS()
# XCS
xcs_cycle_protein = Protein.from_XCS(X, C, S)
Xt, Ct, St = xcs_cycle_protein.to_XCS()
assert (Xt == X).all() and (Ct == C).all() and (St == S).all()
# CIF
with tempfile.NamedTemporaryFile(suffix=".cif", delete=True) as temp_file:
protein.to_CIF(temp_file.name)
Xt, Ct, St = protein.from_CIF(temp_file.name).to_XCS()
assert (Xt == X).all() and (Ct == C).all() and (St == S).all()
# PDB
with tempfile.NamedTemporaryFile(suffix=".pdb", delete=True) as temp_file:
protein.to_PDB(temp_file.name)
structured_residues = protein.sys.num_structured_residues()
round_trip_protein = protein.from_PDB(temp_file.name)
assert len(round_trip_protein) == structured_residues
# smoke test copy behavior
copy.copy(protein)
copy.deepcopy(protein)
def compare_proteins(A, B):
A, B = A.sys, B.sys
if (
A.num_chains() != B.num_chains()
or A.num_residues() != B.num_residues()
or A.num_atoms() != B.num_atoms()
or A.num_atom_locations() != B.num_atom_locations()
or A.num_structured_residues() != B.num_structured_residues()
):
return False
for cA, cB in zip(A.chains(), B.chains()):
if (
cA.num_residues() != cB.num_residues()
or cA.cid != cB.cid
or cA.segid != cB.segid
or cA.authid != cB.authid
):
print(f"chains {cA} and {cB} differ")
return False
for rA, rB in zip(cA.residues(), cB.residues()):
if (
rA.num_atoms() != rB.num_atoms()
or rA.name != rA.name
or rA.num != rB.num
or rA.authid != rB.authid
or rA.icode != rB.icode
):
print(f"residues {rA} and {rB} differ")
return False
for aA, aB in zip(rA.atoms(), rB.atoms()):
if (
aA.num_locations() != aB.num_locations()
or aA.name != aB.name
or aA.het != aB.het
):
print(f"atoms {aA} and {aB} differ")
return False
for lA, lB in zip(aA.locations(), aB.locations()):
if (
(abs(lA.coors - lB.coors) > 0.01).any()
or lA.occ != lB.occ
or lA.B != lB.B
or lA.alt != lB.alt
):
print(f"atoms {lA} and {lB} differ")
return False
return True
def test_xcs_trajectory():
# Load Trajectory
protein = Protein(CIF_TRAJECTORY)
# Save out Trajectory
X_list, C, S = protein.to_XCS_trajectory()
# Load back in via XCS
protein_xcs_load = Protein(X_list, C, S)
assert compare_proteins(protein, protein_xcs_load)
# Print Trajectory
print(protein)
# Display Trajectory
protein.display()
def test_trajectory_round_trip():
# Load Trajectory
protein = Protein(CIF_TRAJECTORY)
# Save out Trajectory
X_list, C, S = protein.to_XCS_trajectory()
# Load back in via XCS
protein_xcs_load = Protein.from_XCS_trajectory(X_list, C, S)
assert compare_proteins(protein, protein_xcs_load)
# Turn back into XCS
X_list_1, C_1, S_1 = protein_xcs_load.to_XCS_trajectory()
assert len(X_list) == len(X_list_1)
assert [(x1 == x2).all() for x1, x2 in zip(X_list, X_list_1)]
assert (C == C_1).all()
assert (S == S_1).all()
@pytest.mark.parametrize("pdb_id", ["3bdi", "5sv5"])
def test_edge_cases(pdb_id):
Protein(pdb_id, canonicalize=True)
|