Spaces:

aakash0017
/

DrVai-Rag-Testing

No application file

App Files Files Community

DrVai-Rag-Testing / myenv /lib /python3.10 /site-packages /Bio /PDB /mmtf /mmtfio.py

aakash0017

Upload folder using huggingface_hub

b7731cd over 1 year ago

raw

history blame contribute delete

10.2 kB

	# Copyright 2019 Joe Greener. All rights reserved.
	#
	# This file is part of the Biopython distribution and governed by your
	# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
	# Please see the LICENSE file that should have been included as part of this
	# package.

	"""Write a MMTF file."""

	import itertools
	from collections import defaultdict
	from string import ascii_uppercase
	from Bio.PDB.StructureBuilder import StructureBuilder
	from Bio.PDB.PDBIO import Select, StructureIO
	from mmtf.api.mmtf_writer import MMTFEncoder
	from Bio.SeqUtils import seq1
	from Bio.Data.PDBData import protein_letters_3to1_extended

	_select = Select()


	class MMTFIO(StructureIO):
	"""Write a Structure object as a MMTF file.

	Examples
	--------
	>>> from Bio.PDB import MMCIFParser
	>>> from Bio.PDB.mmtf import MMTFIO
	>>> parser = MMCIFParser()
	>>> structure = parser.get_structure("1a8o", "PDB/1A8O.cif")
	>>> io=MMTFIO()
	>>> io.set_structure(structure)
	>>> io.save("bio-pdb-mmtf-out.mmtf")
	>>> import os
	>>> os.remove("bio-pdb-mmtf-out.mmtf") # tidy up

	"""

	def __init__(self):
	"""Initialise."""
	pass

	def save(self, filepath, select=_select):
	"""Save the structure to a file.

	:param filepath: output file
	:type filepath: string

	:param select: selects which entities will be written.
	:type select: object

	Typically select is a subclass of L{Select}, it should
	have the following methods:

	- accept_model(model)
	- accept_chain(chain)
	- accept_residue(residue)
	- accept_atom(atom)

	These methods should return 1 if the entity is to be
	written out, 0 otherwise.
	"""
	# Similar to the PDBIO save method, we check if the filepath is a
	# string for a filepath or an open file handle
	if not isinstance(filepath, str):
	raise ValueError(
	"Writing to a file handle is not supported for MMTF, filepath must be a string"
	)
	if hasattr(self, "structure"):
	self._save_structure(filepath, select)
	else:
	raise ValueError("Use set_structure to set a structure to write out")

	def _chain_id_iterator(self):
	"""Label chains sequentially: A, B, ..., Z, AA, AB etc."""
	for size in itertools.count(1):
	for s in itertools.product(ascii_uppercase, repeat=size):
	yield "".join(s)

	def _save_structure(self, filepath, select):
	count_models, count_chains, count_groups, count_atoms = 0, 0, 0, 0

	# If atom serials are missing, renumber atoms starting from 1
	atom_serials = [a.serial_number for a in self.structure.get_atoms()]
	renumber_atoms = None in atom_serials

	encoder = MMTFEncoder()
	# The counts are set to 0 here and changed later once we have the values
	encoder.init_structure(
	total_num_bonds=0,
	total_num_atoms=0,
	total_num_groups=0,
	total_num_chains=0,
	total_num_models=0,
	structure_id=self.structure.id,
	)

	encoder.set_xtal_info(space_group="", unit_cell=None)

	# The header information is missing for some structure objects
	header_dict = defaultdict(str, self.structure.header)
	if header_dict["resolution"] == "":
	header_dict["resolution"] = None
	if header_dict["structure_method"] == "":
	header_dict["structure_method"] = []
	else:
	header_dict["structure_method"] = [header_dict["structure_method"]]

	encoder.set_header_info(
	r_free=None,
	r_work=None,
	resolution=header_dict["resolution"],
	title=header_dict["name"],
	deposition_date=header_dict["deposition_date"],
	release_date=header_dict["release_date"],
	experimental_methods=header_dict["structure_method"],
	)

	# Tracks values to replace them at the end
	chains_per_model = []
	groups_per_chain = []

	for mi, model in enumerate(self.structure.get_models()):
	if not select.accept_model(model):
	continue

	chain_id_iterator = self._chain_id_iterator()

	count_models += 1
	encoder.set_model_info(
	model_id=mi, # According to mmtf-python this is meaningless
	chain_count=0, # Set to 0 here and changed later
	)
	for chain in model.get_chains():
	if not select.accept_chain(chain):
	continue

	seqs = []
	seq = ""
	prev_residue_type = ""
	prev_resname = ""
	first_chain = True

	for residue in chain.get_unpacked_list():
	if not select.accept_residue(residue):
	continue

	count_groups += 1
	hetfield, resseq, icode = residue.get_id()
	if hetfield == " ":
	residue_type = "ATOM"
	entity_type = "polymer"
	elif hetfield == "W":
	residue_type = "HETATM"
	entity_type = "water"
	else:
	residue_type = "HETATM"
	entity_type = "non-polymer"
	resname = residue.get_resname()

	# Check if the molecule changes within the chain
	# This will always increment for the first residue in a
	# chain due to the starting values above
	# Checking for similar entities is non-trivial from the
	# structure object so we treat each molecule as a separate
	# entity
	if residue_type != prev_residue_type or (
	residue_type == "HETATM" and resname != prev_resname
	):
	encoder.set_entity_info(
	chain_indices=[count_chains],
	sequence="", # Set to empty here and changed later
	description="",
	entity_type=entity_type,
	)
	encoder.set_chain_info(
	chain_id=next(chain_id_iterator),
	chain_name="\x00"
	if len(chain.get_id().strip()) == 0
	else chain.get_id(),
	num_groups=0, # Set to 0 here and changed later
	)
	if count_chains > 0:
	groups_per_chain.append(
	count_groups - sum(groups_per_chain) - 1
	)
	if not first_chain:
	seqs.append(seq)
	first_chain = False
	count_chains += 1
	seq = ""

	if entity_type == "polymer":
	seq += seq1(resname, custom_map=protein_letters_3to1_extended)

	prev_residue_type = residue_type
	prev_resname = resname

	encoder.set_group_info(
	group_name=resname,
	group_number=residue.id[1],
	insertion_code="\x00"
	if residue.id[2] == " "
	else residue.id[2],
	group_type="", # Value in the chemcomp dictionary, which is unknown here
	atom_count=sum(
	1
	for a in residue.get_unpacked_list()
	if select.accept_atom(a)
	),
	bond_count=0,
	single_letter_code=seq1(
	resname, custom_map=protein_letters_3to1_extended
	),
	sequence_index=len(seq) - 1 if entity_type == "polymer" else -1,
	secondary_structure_type=-1,
	)

	for atom in residue.get_unpacked_list():
	if select.accept_atom(atom):
	count_atoms += 1
	encoder.set_atom_info(
	atom_name=atom.name,
	serial_number=count_atoms
	if renumber_atoms
	else atom.serial_number,
	alternative_location_id="\x00"
	if atom.altloc == " "
	else atom.altloc,
	x=atom.coord[0],
	y=atom.coord[1],
	z=atom.coord[2],
	occupancy=atom.occupancy,
	temperature_factor=atom.bfactor,
	element=atom.element,
	charge=0,
	)

	seqs.append(seq)
	# Now that we have the sequences, edit the entities to add them
	start_ind = len(encoder.entity_list) - len(seqs)
	for i, seq in enumerate(seqs):
	encoder.entity_list[start_ind + i]["sequence"] = seq

	chains_per_model.append(count_chains - sum(chains_per_model))

	groups_per_chain.append(count_groups - sum(groups_per_chain))

	encoder.chains_per_model = chains_per_model
	encoder.groups_per_chain = groups_per_chain
	encoder.num_atoms = count_atoms
	encoder.num_groups = count_groups
	encoder.num_chains = count_chains
	encoder.num_models = count_models

	encoder.finalize_structure()
	encoder.write_file(filepath)