File size: 8,949 Bytes
b7731cd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
# Copyright 2016 Anthony Bradley.  All rights reserved.
#
# This file is part of the Biopython distribution and governed by your
# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
# Please see the LICENSE file that should have been included as part of this
# package.
"""Code handle loading mmtf-python into Biopython's structures."""

from Bio.PDB.StructureBuilder import StructureBuilder
import numpy


class StructureDecoder:
    """Class to pass the data from mmtf-python into a Biopython data structure."""

    def __init__(self):
        """Initialize the class."""
        self.this_type = ""

    def init_structure(
        self,
        total_num_bonds,
        total_num_atoms,
        total_num_groups,
        total_num_chains,
        total_num_models,
        structure_id,
    ):
        """Initialize the structure object.

        :param total_num_bonds: the number of bonds in the structure
        :param total_num_atoms: the number of atoms in the structure
        :param total_num_groups: the number of groups in the structure
        :param total_num_chains: the number of chains in the structure
        :param total_num_models: the number of models in the structure
        :param structure_id: the id of the structure (e.g. PDB id)

        """
        self.structure_builder = StructureBuilder()
        self.structure_builder.init_structure(structure_id=structure_id)
        self.chain_index_to_type_map = {}
        self.chain_index_to_seq_map = {}
        self.chain_index_to_description_map = {}
        self.chain_counter = 0

    def set_atom_info(
        self,
        atom_name,
        serial_number,
        alternative_location_id,
        x,
        y,
        z,
        occupancy,
        temperature_factor,
        element,
        charge,
    ):
        """Create an atom object an set the information.

        :param atom_name: the atom name, e.g. CA for this atom
        :param serial_number: the serial id of the atom (e.g. 1)
        :param alternative_location_id: the alternative location id for the atom, if present
        :param x: the x coordinate of the atom
        :param y: the y coordinate of the atom
        :param z: the z coordinate of the atom
        :param occupancy: the occupancy of the atom
        :param temperature_factor: the temperature factor of the atom
        :param element: the element of the atom, e.g. C for carbon. According to IUPAC. Calcium  is Ca
        :param charge: the formal atomic charge of the atom

        """
        # MMTF uses "\x00" (the NUL character) to indicate to altloc, so convert
        # that to the space required by StructureBuilder
        if alternative_location_id == "\x00":
            alternative_location_id = " "

        # Atom_name is in twice - the full_name is with spaces
        self.structure_builder.init_atom(
            str(atom_name),
            numpy.array((x, y, z), "f"),
            temperature_factor,
            occupancy,
            alternative_location_id,
            str(atom_name),
            serial_number=serial_number,
            element=str(element).upper(),
        )

    def set_chain_info(self, chain_id, chain_name, num_groups):
        """Set the chain information.

        :param chain_id: the asym chain id from mmCIF
        :param chain_name: the auth chain id from mmCIF
        :param num_groups: the number of groups this chain has

        """
        # A Bradley - chose to use chain_name (auth_id) as it complies
        # with current Biopython. Chain_id might be better.
        self.structure_builder.init_chain(chain_id=chain_name)
        if self.chain_index_to_type_map[self.chain_counter] == "polymer":
            self.this_type = " "
        elif self.chain_index_to_type_map[self.chain_counter] == "non-polymer":
            self.this_type = "H"
        elif self.chain_index_to_type_map[self.chain_counter] == "water":
            self.this_type = "W"
        self.chain_counter += 1

    def set_entity_info(self, chain_indices, sequence, description, entity_type):
        """Set the entity level information for the structure.

        :param chain_indices: the indices of the chains for this entity
        :param sequence: the one letter code sequence for this entity
        :param description: the description for this entity
        :param entity_type: the entity type (polymer,non-polymer,water)

        """
        for chain_ind in chain_indices:
            self.chain_index_to_type_map[chain_ind] = entity_type
            self.chain_index_to_seq_map[chain_ind] = sequence
            self.chain_index_to_description_map[chain_ind] = description

    def set_group_info(
        self,
        group_name,
        group_number,
        insertion_code,
        group_type,
        atom_count,
        bond_count,
        single_letter_code,
        sequence_index,
        secondary_structure_type,
    ):
        """Set the information for a group.

        :param group_name: the name of this group, e.g. LYS
        :param group_number: the residue number of this group
        :param insertion_code: the insertion code for this group
        :param group_type: a string indicating the type of group (as found in the chemcomp dictionary.
            Empty string if none available.
        :param atom_count: the number of atoms in the group
        :param bond_count: the number of unique bonds in the group
        :param single_letter_code: the single letter code of the group
        :param sequence_index: the index of this group in the sequence defined by the entity
        :param secondary_structure_type: the type of secondary structure used
            (types are according to DSSP and number to type mappings are defined in the specification)

        """
        # MMTF uses a NUL character to indicate a blank insertion code, but
        # StructureBuilder expects a space instead.
        if insertion_code == "\x00":
            insertion_code = " "

        self.structure_builder.init_seg(" ")
        self.structure_builder.init_residue(
            group_name, self.this_type, group_number, insertion_code
        )

    def set_model_info(self, model_id, chain_count):
        """Set the information for a model.

        :param model_id: the index for the model
        :param chain_count: the number of chains in the model

        """
        self.structure_builder.init_model(model_id)

    def set_xtal_info(self, space_group, unit_cell):
        """Set the crystallographic information for the structure.

        :param space_group: the space group name, e.g. "P 21 21 21"
        :param unit_cell: an array of length 6 with the unit cell parameters in order: a, b, c, alpha, beta, gamma

        """
        self.structure_builder.set_symmetry(space_group, unit_cell)

    def set_header_info(
        self,
        r_free,
        r_work,
        resolution,
        title,
        deposition_date,
        release_date,
        experimnetal_methods,
    ):
        """Set the header information.

        :param r_free: the measured R-Free for the structure
        :param r_work: the measure R-Work for the structure
        :param resolution: the resolution of the structure
        :param title: the title of the structure
        :param deposition_date: the deposition date of the structure
        :param release_date: the release date of the structure
        :param experimnetal_methods: the list of experimental methods in the structure

        """
        pass

    def set_bio_assembly_trans(
        self, bio_assembly_index, input_chain_indices, input_transform
    ):
        """Set the Bioassembly transformation information. A single bioassembly can have multiple transforms.

        :param bio_assembly_index: the integer index of the bioassembly
        :param input_chain_indices: the list of integer indices for the chains of this bioassembly
        :param input_transform: the list of doubles for  the transform of this bioassmbly transform.

        """
        pass

    def finalize_structure(self):
        """Any functions needed to cleanup the structure."""
        pass

    def set_group_bond(self, atom_index_one, atom_index_two, bond_order):
        """Add bonds within a group.

        :param atom_index_one: the integer atom index (in the group) of the first partner in the bond
        :param atom_index_two: the integer atom index (in the group) of the second partner in the bond
        :param bond_order: the integer bond order

        """
        pass

    def set_inter_group_bond(self, atom_index_one, atom_index_two, bond_order):
        """Add bonds between groups.

        :param atom_index_one: the integer atom index (in the structure) of the first partner in the bond
        :param atom_index_two: the integer atom index (in the structure) of the second partner in the bond
        :param bond_order: the bond order

        """
        pass