import copy import tempfile from pathlib import Path import pytest import chroma from chroma.data.protein import Protein BASE_PATH = str(Path(chroma.__file__).parent.parent) PROTEIN_SINGLE_CHAIN = BASE_PATH + "/tests/resources/4kw4.cif" PROTEIN_COMPLEX = BASE_PATH + "/tests/resources/3hn3.cif" CIF_TRAJECTORY = BASE_PATH + "/tests/resources/chroma_trajectory.cif" SEQUENCE = "MSKGEELFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTTGKLPVPWPTL" PDB_ID = "1B9C" TESTS = [PROTEIN_SINGLE_CHAIN, PROTEIN_COMPLEX, SEQUENCE, PDB_ID] @pytest.mark.parametrize("protein_path", TESTS) def test_Protein(protein_path): # Loading Smoke Tests if protein_path.endswith(".pdb"): protein = Protein.from_PDB(protein_path) elif protein_path.endswith(".cif"): protein = Protein.from_CIF(protein_path) elif len(protein_path) == 4: protein = Protein.from_PDBID(protein_path) else: # Protein Sequence Input protein = Protein.from_sequence(protein_path) # Selection Smoke Test # Select all structured residues D = protein.get_mask("all").bool() # Method Smoke Tests protein.canonicalize() protein.sequence() len(protein) protein.display() # Cycles save / load /validate X, C, S = protein.to_XCS() # XCS xcs_cycle_protein = Protein.from_XCS(X, C, S) Xt, Ct, St = xcs_cycle_protein.to_XCS() assert (Xt == X).all() and (Ct == C).all() and (St == S).all() # CIF with tempfile.NamedTemporaryFile(suffix=".cif", delete=True) as temp_file: protein.to_CIF(temp_file.name) Xt, Ct, St = protein.from_CIF(temp_file.name).to_XCS() assert (Xt == X).all() and (Ct == C).all() and (St == S).all() # PDB with tempfile.NamedTemporaryFile(suffix=".pdb", delete=True) as temp_file: protein.to_PDB(temp_file.name) structured_residues = protein.sys.num_structured_residues() round_trip_protein = protein.from_PDB(temp_file.name) assert len(round_trip_protein) == structured_residues # smoke test copy behavior copy.copy(protein) copy.deepcopy(protein) def compare_proteins(A, B): A, B = A.sys, B.sys if ( A.num_chains() != B.num_chains() or A.num_residues() != B.num_residues() or A.num_atoms() != B.num_atoms() or A.num_atom_locations() != B.num_atom_locations() or A.num_structured_residues() != B.num_structured_residues() ): return False for cA, cB in zip(A.chains(), B.chains()): if ( cA.num_residues() != cB.num_residues() or cA.cid != cB.cid or cA.segid != cB.segid or cA.authid != cB.authid ): print(f"chains {cA} and {cB} differ") return False for rA, rB in zip(cA.residues(), cB.residues()): if ( rA.num_atoms() != rB.num_atoms() or rA.name != rA.name or rA.num != rB.num or rA.authid != rB.authid or rA.icode != rB.icode ): print(f"residues {rA} and {rB} differ") return False for aA, aB in zip(rA.atoms(), rB.atoms()): if ( aA.num_locations() != aB.num_locations() or aA.name != aB.name or aA.het != aB.het ): print(f"atoms {aA} and {aB} differ") return False for lA, lB in zip(aA.locations(), aB.locations()): if ( (abs(lA.coors - lB.coors) > 0.01).any() or lA.occ != lB.occ or lA.B != lB.B or lA.alt != lB.alt ): print(f"atoms {lA} and {lB} differ") return False return True def test_xcs_trajectory(): # Load Trajectory protein = Protein(CIF_TRAJECTORY) # Save out Trajectory X_list, C, S = protein.to_XCS_trajectory() # Load back in via XCS protein_xcs_load = Protein(X_list, C, S) assert compare_proteins(protein, protein_xcs_load) # Print Trajectory print(protein) # Display Trajectory protein.display() def test_trajectory_round_trip(): # Load Trajectory protein = Protein(CIF_TRAJECTORY) # Save out Trajectory X_list, C, S = protein.to_XCS_trajectory() # Load back in via XCS protein_xcs_load = Protein.from_XCS_trajectory(X_list, C, S) assert compare_proteins(protein, protein_xcs_load) # Turn back into XCS X_list_1, C_1, S_1 = protein_xcs_load.to_XCS_trajectory() assert len(X_list) == len(X_list_1) assert [(x1 == x2).all() for x1, x2 in zip(X_list, X_list_1)] assert (C == C_1).all() assert (S == S_1).all() @pytest.mark.parametrize("pdb_id", ["3bdi", "5sv5"]) def test_edge_cases(pdb_id): Protein(pdb_id, canonicalize=True)