File size: 1,969 Bytes
4636dc3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 |
import numpy as np
from biopandas.pdb import PandasPdb
pdb_order = [
"record_name",
"atom_number",
"blank_1",
"atom_name",
"alt_loc",
"residue_name",
"blank_2",
"chain_id",
"residue_number",
"insertion",
"blank_3",
"x_coord",
"y_coord",
"z_coord",
"occupancy",
"b_factor",
"blank_4",
"segment_id",
"element_symbol",
"charge",
"line_idx",
]
mmcif_read = {
"group_PDB": "record_name",
"id": "atom_number",
"auth_atom_id": "atom_name",
"auth_comp_id": "residue_name",
"auth_asym_id": "chain_id",
"auth_seq_id": "residue_number",
"Cartn_x": "x_coord",
"Cartn_y": "y_coord",
"Cartn_z": "z_coord",
"occupancy": "occupancy",
"B_iso_or_equiv": "b_factor",
"type_symbol": "element_symbol",
}
nonefields = [
"blank_1",
"alt_loc",
"blank_2",
"insertion",
"blank_3",
"blank_4",
"segment_id",
"charge",
"line_idx",
]
def biopandas_mmcif2pdb(pandasmmcif, model_index = 1):
"""
Converts the ATOM and HETATM dataframes of PandasMmcif() to PandasPdb() format.
"""
pandaspdb = PandasPdb()
for a in ["ATOM", "HETATM"]:
dfa = pandasmmcif.df[a]
dfa = dfa.loc[dfa.pdbx_PDB_model_num == model_index]
if a =='ATOM':
if len(dfa) == 0:
raise ValueError(f"No model found for index: {model_index}")
# keep only those fields found in pdb
dfa = dfa[mmcif_read.keys()]
# rename fields
dfa = dfa.rename(columns=mmcif_read)
# add empty fields
for i in nonefields:
dfa[i] = ""
dfa["charge"] = np.nan
# reorder columns to PandasPdb order
dfa = dfa[pdb_order]
pandaspdb.df[a] = dfa
# update line_idx
pandaspdb.df["ATOM"]["line_idx"] = pandaspdb.df["ATOM"].index.values
pandaspdb.df["HETATM"]["line_idx"] = pandaspdb.df["HETATM"].index
return pandaspdb |